|
|
@@ -0,0 +1,121 @@
|
|
|
+#!/usr/bin/python
|
|
|
+from os import listdir
|
|
|
+
|
|
|
+from GrayscaleImage import GrayscaleImage
|
|
|
+from NormalizedCharacterImage import NormalizedCharacterImage
|
|
|
+from Character import Character
|
|
|
+from data import IMAGES_FOLDER, exists, fload, fdump
|
|
|
+
|
|
|
+
|
|
|
+NORMALIZED_HEIGHT = 42
|
|
|
+
|
|
|
+
|
|
|
+def load_characters(neighbours, blur_scale, verbose=0):
|
|
|
+ chars_file = 'characters_%s_%s.dat' % (blur_scale, neighbours)
|
|
|
+
|
|
|
+ if exists(chars_file):
|
|
|
+ print 'Loading characters...'
|
|
|
+ chars = fload(chars_file)
|
|
|
+ else:
|
|
|
+ print 'Going to generate character objects...'
|
|
|
+ chars = []
|
|
|
+
|
|
|
+ for char in sorted(listdir(IMAGES_FOLDER)):
|
|
|
+ for image in sorted(listdir(IMAGES_FOLDER + char)):
|
|
|
+ image = GrayscaleImage(IMAGES_FOLDER + char + '/' + image)
|
|
|
+ norm = NormalizedCharacterImage(image, blur=blur_scale, \
|
|
|
+ height=NORMALIZED_HEIGHT)
|
|
|
+ character = Character(char, [], norm)
|
|
|
+ character.get_single_cell_feature_vector(neighbours)
|
|
|
+ chars.append(character)
|
|
|
+
|
|
|
+ if verbose:
|
|
|
+ print 'Loaded character %s' % char
|
|
|
+
|
|
|
+ if verbose:
|
|
|
+ print 'Saving characters...'
|
|
|
+
|
|
|
+ fdump(chars, chars_file)
|
|
|
+
|
|
|
+ return chars
|
|
|
+
|
|
|
+
|
|
|
+def load_learning_set(neighbours, blur_scale, verbose=0):
|
|
|
+ learning_set_file = 'learning_set_%s_%s.dat' % (blur_scale, neighbours)
|
|
|
+
|
|
|
+ if exists(learning_set_file):
|
|
|
+ if verbose:
|
|
|
+ print 'Loading learning set...'
|
|
|
+
|
|
|
+ learning_set = fload(learning_set_file)
|
|
|
+
|
|
|
+ if verbose:
|
|
|
+ print 'Learning set:', [c.value for c in learning_set]
|
|
|
+ else:
|
|
|
+ return generate_sets(neighbours, blur_scale, verbose=verbose)[0]
|
|
|
+
|
|
|
+
|
|
|
+def load_test_set(neighbours, blur_scale, verbose=0):
|
|
|
+ test_set_file = 'test_set_%s_%s.dat' % (blur_scale, neighbours)
|
|
|
+
|
|
|
+ if exists(test_set_file):
|
|
|
+ if verbose:
|
|
|
+ print 'Loading test set...'
|
|
|
+
|
|
|
+ test_set = fload(test_set_file)
|
|
|
+
|
|
|
+ if verbose:
|
|
|
+ print 'Test set:', [c.value for c in test_set]
|
|
|
+ else:
|
|
|
+ return generate_sets(neighbours, blur_scale, verbose=verbose)[1]
|
|
|
+
|
|
|
+
|
|
|
+def generate_sets(neighbours, blur_scale, verbose=0):
|
|
|
+ suffix = '_%s_%s' % (blur_scale, neighbours)
|
|
|
+ learning_set_file = 'learning_set%s.dat' % suffix
|
|
|
+ test_set_file = 'test_set%s.dat' % suffix
|
|
|
+
|
|
|
+ chars = load_characters(neighbours, blur_scale, verbose=verbose)
|
|
|
+
|
|
|
+ if verbose:
|
|
|
+ print 'Going to generate learning set and test set...'
|
|
|
+
|
|
|
+ learning_set = []
|
|
|
+ test_set = []
|
|
|
+ learned = []
|
|
|
+
|
|
|
+ for char in chars:
|
|
|
+ if learned.count(char.value) == 70:
|
|
|
+ test_set.append(char)
|
|
|
+ else:
|
|
|
+ learning_set.append(char)
|
|
|
+ learned.append(char.value)
|
|
|
+
|
|
|
+ if verbose:
|
|
|
+ print 'Learning set:', [c.value for c in learning_set]
|
|
|
+ print '\nTest set:', [c.value for c in test_set]
|
|
|
+ print '\nSaving learning set...'
|
|
|
+
|
|
|
+ fdump(learning_set, learning_set_file)
|
|
|
+
|
|
|
+ if verbose:
|
|
|
+ print 'Saving test set...'
|
|
|
+
|
|
|
+ fdump(test_set, test_set_file)
|
|
|
+
|
|
|
+ return learning_set, test_set
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ from sys import argv, exit
|
|
|
+
|
|
|
+ if len(argv) < 3:
|
|
|
+ print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
|
|
|
+ exit(1)
|
|
|
+
|
|
|
+ neighbours = int(argv[1])
|
|
|
+ blur_scale = float(argv[2])
|
|
|
+
|
|
|
+ # Generate the character file and the learning set/test set files
|
|
|
+ load_learning_set(neighbours, blur_scale, verbose=1)
|
|
|
+ load_test_set(neighbours, blur_scale, verbose=1)
|