Commit fb2f6bea authored by Taddeus Kroes's avatar Taddeus Kroes

Moved some constants to data.py.

parent 8e465dae
*.dat *.dat
data/*
results*.txt results*.txt
from Rectangle import Rectangle
class LetterCropper:
def __init__(self, threshold = 0.9):
self.threshold = threshold
def crop_to_letter(self, image):
self.image = image
self.determine_letter_bounds()
self.image.crop(self.letter_bounds)
def determine_letter_bounds(self):
min_x = self.image.width
max_x = 0
min_y = self.image.height
max_y = 0
for y, x, value in self.image:
if value < self.threshold:
if x < min_x: min_x = x
if y < min_y: min_y = y
if x > max_x: max_x = x
if y > max_y: max_y = y
self.letter_bounds = Rectangle(
min_x,
min_y,
max_x - min_x ,
max_y - min_y
)
#!/usr/bin/python
from os import listdir
from GrayscaleImage import GrayscaleImage
from NormalizedCharacterImage import NormalizedCharacterImage
from Character import Character
from data import IMAGES_FOLDER, exists, fload, fdump
NORMALIZED_HEIGHT = 42
def load_characters(neighbours, blur_scale, verbose=0):
chars_file = 'characters_%s_%s.dat' % (blur_scale, neighbours)
if exists(chars_file):
print 'Loading characters...'
chars = fload(chars_file)
else:
print 'Going to generate character objects...'
chars = []
for char in sorted(listdir(IMAGES_FOLDER)):
for image in sorted(listdir(IMAGES_FOLDER + char)):
image = GrayscaleImage(IMAGES_FOLDER + char + '/' + image)
norm = NormalizedCharacterImage(image, blur=blur_scale, \
height=NORMALIZED_HEIGHT)
character = Character(char, [], norm)
character.get_single_cell_feature_vector(neighbours)
chars.append(character)
if verbose:
print 'Loaded character %s' % char
if verbose:
print 'Saving characters...'
fdump(chars, chars_file)
return chars
def load_learning_set(neighbours, blur_scale, verbose=0):
learning_set_file = 'learning_set_%s_%s.dat' % (blur_scale, neighbours)
if exists(learning_set_file):
if verbose:
print 'Loading learning set...'
learning_set = fload(learning_set_file)
if verbose:
print 'Learning set:', [c.value for c in learning_set]
else:
return generate_sets(neighbours, blur_scale, verbose=verbose)[0]
def load_test_set(neighbours, blur_scale, verbose=0):
test_set_file = 'test_set_%s_%s.dat' % (blur_scale, neighbours)
if exists(test_set_file):
if verbose:
print 'Loading test set...'
test_set = fload(test_set_file)
if verbose:
print 'Test set:', [c.value for c in test_set]
else:
return generate_sets(neighbours, blur_scale, verbose=verbose)[1]
def generate_sets(neighbours, blur_scale, verbose=0):
suffix = '_%s_%s' % (blur_scale, neighbours)
learning_set_file = 'learning_set%s.dat' % suffix
test_set_file = 'test_set%s.dat' % suffix
chars = load_characters(neighbours, blur_scale, verbose=verbose)
if verbose:
print 'Going to generate learning set and test set...'
learning_set = []
test_set = []
learned = []
for char in chars:
if learned.count(char.value) == 70:
test_set.append(char)
else:
learning_set.append(char)
learned.append(char.value)
if verbose:
print 'Learning set:', [c.value for c in learning_set]
print '\nTest set:', [c.value for c in test_set]
print '\nSaving learning set...'
fdump(learning_set, learning_set_file)
if verbose:
print 'Saving test set...'
fdump(test_set, test_set_file)
return learning_set, test_set
if __name__ == '__main__':
from sys import argv, exit
if len(argv) < 3:
print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
exit(1)
neighbours = int(argv[1])
blur_scale = float(argv[2])
# Generate the character file and the learning set/test set files
load_learning_set(neighbours, blur_scale, verbose=1)
load_test_set(neighbours, blur_scale, verbose=1)
#!/usr/bin/python
from Classifier import Classifier
from create_characters import load_learning_set
from data import exists, DATA_FOLDER
def load_classifier(neighbours, blur_scale, c=None, gamma=None, verbose=0):
classifier_file = DATA_FOLDER + 'classifier_%s_%s.dat' \
% (blur_scale, neighbours)
if exists(classifier_file):
if verbose:
print 'Loading classifier...'
classifier = Classifier(filename=classifier_file, verbose=verbose)
classifier.neighbours = neighbours
elif c != None and gamma != None:
if verbose:
print 'Training new classifier...'
classifier = Classifier(c=c, gamma=gamma, neighbours=neighbours, \
verbose=verbose)
learning_set = load_learning_set(neighbours, blur_scale, \
verbose=verbose)
classifier.train(learning_set)
else:
raise Exception('No soft margin and gamma specified.')
return classifier
if __name__ == '__main__':
from sys import argv, exit
if len(argv) < 3:
print 'Usage: python %s NEIGHBOURS BLUR_SCALE [ C GAMMA ]' % argv[0]
exit(1)
neighbours = int(argv[1])
blur_scale = float(argv[2])
# Generate the classifier file
if len(argv) > 4:
c = float(argv[3])
gamma = float(argv[4])
load_classifier(neighbours, blur_scale, c=c, gamma=gamma, verbose=1)
else:
load_classifier(neighbours, blur_scale, verbose=1)
import os
from cPickle import load, dump
DATA_FOLDER = 'data/'
IMAGES_FOLDER = '../images/LearningSet/'
RESULTS_FOLDER = 'results/'
def assert_data_folder_exists():
if not os.path.exists(DATA_FOLDER):
os.mkdir(DATA_FOLDER)
def exists(filename):
return os.path.exists(DATA_FOLDER + filename)
def fload(filename):
f = open(DATA_FOLDER + filename, 'r')
l = load(f)
f.close()
return l
def fdump(obj, filename):
assert_data_folder_exists()
f = open(DATA_FOLDER + filename, 'w+')
dump(obj, f)
f.close()
#!/usr/bin/python #!/usr/bin/python
from os import listdir import os
from os.path import exists
from cPickle import load, dump
from sys import argv, exit from sys import argv, exit
from GrayscaleImage import GrayscaleImage
from NormalizedCharacterImage import NormalizedCharacterImage
from Character import Character
from Classifier import Classifier from Classifier import Classifier
from data import DATA_FOLDER, RESULTS_FOLDER
from create_characters import load_learning_set, load_test_set
if len(argv) < 3: if len(argv) < 3:
print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0] print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
...@@ -17,64 +14,15 @@ neighbours = int(argv[1]) ...@@ -17,64 +14,15 @@ neighbours = int(argv[1])
blur_scale = float(argv[2]) blur_scale = float(argv[2])
suffix = '_%s_%s' % (blur_scale, neighbours) suffix = '_%s_%s' % (blur_scale, neighbours)
chars_file = 'characters%s.dat' % suffix if not os.path.exists(RESULTS_FOLDER):
learning_set_file = 'learning_set%s.dat' % suffix os.mkdir(RESULTS_FOLDER)
test_set_file = 'test_set%s.dat' % suffix
classifier_file = 'classifier%s.dat' % suffix
results_file = 'results%s.txt' % suffix
# Load characters
if exists(chars_file):
print 'Loading characters...'
chars = load(open(chars_file, 'r'))
else:
print 'Going to generate character objects...'
chars = []
for char in sorted(listdir('../images/LearningSet')):
for image in sorted(listdir('../images/LearningSet/' + char)):
f = '../images/LearningSet/' + char + '/' + image
image = GrayscaleImage(f)
norm = NormalizedCharacterImage(image, blur=blur_scale, height=42)
#imshow(norm.data, cmap='gray'); show()
character = Character(char, [], norm)
character.get_single_cell_feature_vector(neighbours)
chars.append(character)
print char
print 'Saving characters...'
dump(chars, open(chars_file, 'w+'))
classifier_file = DATA_FOLDER + 'classifier%s.dat' % suffix
results_file = '%sresult%s.txt' % (RESULTS_FOLDER, suffix)
# Load learning set and test set # Load learning set and test set
if exists(learning_set_file): learning_set = load_learning_set(neighbours, blur_scale, verbose=1)
print 'Loading learning set...' test_set = load_test_set(neighbours, blur_scale, verbose=1)
learning_set = load(open(learning_set_file, 'r'))
print 'Learning set:', [c.value for c in learning_set]
print 'Loading test set...'
test_set = load(open(test_set_file, 'r'))
print 'Test set:', [c.value for c in test_set]
else:
print 'Going to generate learning set and test set...'
learning_set = []
test_set = []
learned = []
for char in chars:
if learned.count(char.value) == 70:
test_set.append(char)
else:
learning_set.append(char)
learned.append(char.value)
print 'Learning set:', [c.value for c in learning_set]
print '\nTest set:', [c.value for c in test_set]
print '\nSaving learning set...'
dump(learning_set, file(learning_set_file, 'w+'))
print 'Saving test set...'
dump(test_set, file(test_set_file, 'w+'))
# Perform a grid-search to find the optimal values for C and gamma # Perform a grid-search to find the optimal values for C and gamma
C = [float(2 ** p) for p in xrange(-5, 16, 2)] C = [float(2 ** p) for p in xrange(-5, 16, 2)]
......
#!/usr/bin/python
from os import listdir
from cPickle import dump
from sys import argv, exit
from GrayscaleImage import GrayscaleImage
from NormalizedCharacterImage import NormalizedCharacterImage
from Character import Character
if len(argv) < 4:
print 'Usage: python %s FILE_SUFFIX BLUR_SCALE NEIGHBOURS' % argv[0]
exit(1)
c = []
for char in sorted(listdir('../images/LearningSet')):
for image in sorted(listdir('../images/LearningSet/' + char)):
f = '../images/LearningSet/' + char + '/' + image
image = GrayscaleImage(f)
norm = NormalizedCharacterImage(image, blur=float(argv[2]), height=42)
#from pylab import imshow, show
#imshow(norm.data, cmap='gray'); show()
character = Character(char, [], norm)
character.get_single_cell_feature_vector(int(argv[3]))
c.append(character)
print char
print 'Saving characters...'
dump(c, open('characters%s.dat' % argv[1], 'w+'))
#!/usr/bin/python
from cPickle import dump, load
from sys import argv, exit
if len(argv) < 2:
print 'Usage: python %s FILE_SUFFIX' % argv[0]
exit(1)
print 'Loading characters...'
chars = load(file('characters%s.dat' % argv[1], 'r'))
learning_set = []
test_set = []
#s = {}
#
#for char in chars:
# if char.value not in s:
# s[char.value] = [char]
# else:
# s[char.value].append(char)
#
#for value, chars in s.iteritems():
# learning_set += chars[::2]
# test_set += chars[1::2]
learned = []
for char in chars:
if learned.count(char.value) == 70:
test_set.append(char)
else:
learning_set.append(char)
learned.append(char.value)
print 'Learning set:', [c.value for c in learning_set]
print '\nTest set:', [c.value for c in test_set]
print '\nSaving learning set...'
dump(learning_set, file('learning_set%s.dat' % argv[1], 'w+'))
print 'Saving test set...'
dump(test_set, file('test_set%s.dat' % argv[1], 'w+'))
#!/usr/bin/python #!/usr/bin/python
from cPickle import load
from sys import argv, exit from sys import argv, exit
from pylab import imsave, plot, subplot, imshow, show, axis, title from pylab import subplot, imshow, show, axis, title
from math import sqrt, ceil from math import sqrt, ceil
import os
from Classifier import Classifier from create_characters import load_test_set
from create_classifier import load_classifier
if len(argv) < 3: if len(argv) < 3:
print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0] print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
...@@ -13,20 +12,17 @@ if len(argv) < 3: ...@@ -13,20 +12,17 @@ if len(argv) < 3:
neighbours = int(argv[1]) neighbours = int(argv[1])
blur_scale = float(argv[2]) blur_scale = float(argv[2])
suffix = '_%s_%s' % (blur_scale, neighbours)
test_set_file = 'test_set%s.dat' % suffix # Load classifier
classifier_file = 'classifier%s.dat' % suffix classifier = load_classifier(neighbours, blur_scale, verbose=1)
print 'Loading classifier...' # Load test set
classifier = Classifier(filename=classifier_file) test_set = load_test_set(neighbours, blur_scale, verbose=1)
classifier.neighbours = neighbours
print 'Loading test set...' # Classify each character in the test set, remembering all faulty
test_set = load(file(test_set_file, 'r')) # classified characters
l = len(test_set) l = len(test_set)
matches = 0 matches = 0
#classified = {}
classified = [] classified = []
for i, char in enumerate(test_set): for i, char in enumerate(test_set):
...@@ -35,13 +31,6 @@ for i, char in enumerate(test_set): ...@@ -35,13 +31,6 @@ for i, char in enumerate(test_set):
if char.value != prediction: if char.value != prediction:
classified.append((char, prediction)) classified.append((char, prediction))
#key = '%s_as_%s' % (char.value, prediction)
#if key not in classified:
# classified[key] = [char]
#else:
# classified[key].append(char)
print '"%s" was classified as "%s"' \ print '"%s" was classified as "%s"' \
% (char.value, prediction) % (char.value, prediction)
else: else:
...@@ -50,8 +39,7 @@ for i, char in enumerate(test_set): ...@@ -50,8 +39,7 @@ for i, char in enumerate(test_set):
print '%d of %d (%d%% done)' % (i + 1, l, round(100 * (i + 1) / l)) print '%d of %d (%d%% done)' % (i + 1, l, round(100 * (i + 1) / l))
print '\n%d matches (%d%%), %d fails' % (matches, \ print '\n%d matches (%d%%), %d fails' % (matches, \
round(100 * matches / l), \ round(100 * matches / l), len(test_set) - matches)
len(test_set) - matches)
# Show a grid plot of all faulty classified characters # Show a grid plot of all faulty classified characters
print 'Plotting faulty classified characters...' print 'Plotting faulty classified characters...'
...@@ -66,16 +54,3 @@ for i, pair in enumerate(classified): ...@@ -66,16 +54,3 @@ for i, pair in enumerate(classified):
axis('off') axis('off')
show() show()
#print 'Saving faulty classified characters...'
#folder = '../images/faulty/'
#
#if not os.path.exists(folder):
# os.mkdir(folder)
#
#for filename, chars in classified.iteritems():
# if len(chars) == 1:
# imsave('%s%s' % (folder, filename), char.image.data, cmap='gray')
# else:
# for i, char in enumerate(chars):
# imsave('%s%s_%d' % (folder, filename, i), char.image.data, cmap='gray')
#!/usr/bin/python
from pylab import subplot, show, imshow, axis
from cPickle import load
x, y = 25, 25
chars = load(file('characters.dat', 'r'))[:(x * y)]
for i in range(x):
for j in range(y):
index = j * x + i
subplot(x, y, index + 1)
axis('off')
imshow(chars[index].image.data, cmap='gray')
show()
#!/usr/bin/python
from matplotlib.pyplot import imshow, subplot, show
from LocalBinaryPatternizer import LocalBinaryPatternizer
from cPickle import load
from numpy import zeros
chars = load(file('characters.dat', 'r'))[::2]
left = None
right = None
s = {}
for char in chars:
if char.value not in s:
s[char.value] = [char]
else:
s[char.value].append(char)
left = s['F'][2].image
right = s['A'][0].image
size = 12
d = (left.size[0] * 4, left.size[1] * 4)
#GrayscaleImage.resize(left, d)
#GrayscaleImage.resize(right, d)
p1 = LocalBinaryPatternizer(left, size)
h1 = p1.get_single_histogram()
p1.create_features_vector()
p1 = p1.features
p2 = LocalBinaryPatternizer(right, size)
h2 = p2.get_single_histogram()
p2.create_features_vector()
p2 = p2.features
total_intersect = h1.intersect(h2)
s = (len(p1), len(p1[0]))
match = zeros(left.shape)
m = 0
for y in range(s[0]):
for x in range(s[1]):
h1 = p1[y][x]
h2 = p2[y][x]
intersect = h1.intersect(h2)
print intersect
for i in xrange(size):
for j in xrange(size):
try:
match[y*size + i, x*size + j] = 1 - intersect
except IndexError:
pass
m += intersect
print 'Match: %d%%' % int(m / (s[0] * s[1]) * 100)
print 'Single histogram instersection: %d%%' % int(total_intersect * 100)
subplot(311)
imshow(left.data, cmap='gray')
subplot(312)
imshow(match, cmap='gray')
subplot(313)
imshow(right.data, cmap='gray')
show()
#!/usr/bin/python
from GaussianFilter import GaussianFilter
from GrayscaleImage import GrayscaleImage
image = GrayscaleImage('../images/plate.png')
filter = GaussianFilter(1.4)
output_image = filter.get_filtered_copy(image)
output_image.show()
#!/usr/bin/python
from LetterCropper import LetterCropper
from GrayscaleImage import GrayscaleImage
image = GrayscaleImage("../images/test.png")
cropper = LetterCropper(image)
cropped_letter = cropper.get_cropped_letter()
cropped_letter.show()
...@@ -6,7 +6,8 @@ from time import time ...@@ -6,7 +6,8 @@ from time import time
from GrayscaleImage import GrayscaleImage from GrayscaleImage import GrayscaleImage
from NormalizedCharacterImage import NormalizedCharacterImage from NormalizedCharacterImage import NormalizedCharacterImage
from Character import Character from Character import Character
from Classifier import Classifier from data import IMAGES_FOLDER
from create_classifier import load_classifier
if len(argv) < 4: if len(argv) < 4:
print 'Usage: python %s NEIGHBOURS BLUR_SCALE COUNT' % argv[0] print 'Usage: python %s NEIGHBOURS BLUR_SCALE COUNT' % argv[0]
...@@ -15,28 +16,15 @@ if len(argv) < 4: ...@@ -15,28 +16,15 @@ if len(argv) < 4:
neighbours = int(argv[1]) neighbours = int(argv[1])
blur_scale = float(argv[2]) blur_scale = float(argv[2])
count = int(argv[3]) count = int(argv[3])
suffix = '_%s_%s' % (blur_scale, neighbours)
#chars_file = 'characters%s.dat' % suffix
classifier_file = 'classifier%s.dat' % suffix
#print 'Loading characters...'
#chars = load(open(chars_file, 'r'))[:count]
#count = len(chars)
#
#for char in chars:
# del char.feature
#
#print 'Read %d characters' % count
print 'Loading %d characters...' % count print 'Loading %d characters...' % count
chars = [] chars = []
i = 0 i = 0
br = False br = False
for value in sorted(listdir('../images/LearningSet')): for value in sorted(listdir()):
for image in sorted(listdir('../images/LearningSet/' + value)): for image in sorted(listdir(IMAGES_FOLDER + value)):
f = '../images/LearningSet/' + value + '/' + image f = IMAGES_FOLDER + value + '/' + image
image = GrayscaleImage(f) image = GrayscaleImage(f)
char = Character(value, [], image) char = Character(value, [], image)
chars.append(char) chars.append(char)
...@@ -49,10 +37,10 @@ for value in sorted(listdir('../images/LearningSet')): ...@@ -49,10 +37,10 @@ for value in sorted(listdir('../images/LearningSet')):
if br: if br:
break break
print 'Loading classifier...' # Load classifier
classifier = Classifier(filename=classifier_file) classifier = load_classifier(neighbours, blur_scale, verbose=1)
classifier.neighbours = neighbours
# Measure the time it takes to recognize <count> characters
start = time() start = time()
for char in chars: for char in chars:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment