Просмотр исходного кода

Moved some constants to data.py.

Taddeus Kroes 14 лет назад
Родитель
Сommit
fb2f6bea76

+ 1 - 0
src/.gitignore

@@ -1,2 +1,3 @@
 *.dat
 *.dat
+data/*
 results*.txt
 results*.txt

+ 0 - 31
src/LetterCropper.py

@@ -1,31 +0,0 @@
-from Rectangle import Rectangle
-
-class LetterCropper:
-
-    def __init__(self, threshold = 0.9):
-        self.threshold = threshold
-
-    def crop_to_letter(self, image):
-        self.image = image
-        self.determine_letter_bounds()
-        self.image.crop(self.letter_bounds)
-
-    def determine_letter_bounds(self):
-        min_x = self.image.width
-        max_x = 0
-        min_y = self.image.height
-        max_y = 0
-
-        for y, x, value in self.image:
-            if value < self.threshold:
-                if x < min_x: min_x = x
-                if y < min_y: min_y = y
-                if x > max_x: max_x = x
-                if y > max_y: max_y = y
-
-        self.letter_bounds = Rectangle(
-            min_x,
-            min_y,
-            max_x - min_x ,
-            max_y - min_y
-        )

+ 121 - 0
src/create_characters.py

@@ -0,0 +1,121 @@
+#!/usr/bin/python
+from os import listdir
+
+from GrayscaleImage import GrayscaleImage
+from NormalizedCharacterImage import NormalizedCharacterImage
+from Character import Character
+from data import IMAGES_FOLDER, exists, fload, fdump
+
+
+NORMALIZED_HEIGHT = 42
+
+
+def load_characters(neighbours, blur_scale, verbose=0):
+    chars_file = 'characters_%s_%s.dat' % (blur_scale, neighbours)
+
+    if exists(chars_file):
+        print 'Loading characters...'
+        chars = fload(chars_file)
+    else:
+        print 'Going to generate character objects...'
+        chars = []
+
+        for char in sorted(listdir(IMAGES_FOLDER)):
+            for image in sorted(listdir(IMAGES_FOLDER + char)):
+                image = GrayscaleImage(IMAGES_FOLDER + char + '/' + image)
+                norm = NormalizedCharacterImage(image, blur=blur_scale, \
+                                                height=NORMALIZED_HEIGHT)
+                character = Character(char, [], norm)
+                character.get_single_cell_feature_vector(neighbours)
+                chars.append(character)
+
+                if verbose:
+                    print 'Loaded character %s' % char
+
+        if verbose:
+            print 'Saving characters...'
+
+        fdump(chars, chars_file)
+
+    return chars
+
+
+def load_learning_set(neighbours, blur_scale, verbose=0):
+    learning_set_file = 'learning_set_%s_%s.dat' % (blur_scale, neighbours)
+
+    if exists(learning_set_file):
+        if verbose:
+            print 'Loading learning set...'
+
+        learning_set = fload(learning_set_file)
+
+        if verbose:
+            print 'Learning set:', [c.value for c in learning_set]
+    else:
+        return generate_sets(neighbours, blur_scale, verbose=verbose)[0]
+
+
+def load_test_set(neighbours, blur_scale, verbose=0):
+    test_set_file = 'test_set_%s_%s.dat' % (blur_scale, neighbours)
+
+    if exists(test_set_file):
+        if verbose:
+            print 'Loading test set...'
+
+        test_set = fload(test_set_file)
+
+        if verbose:
+            print 'Test set:', [c.value for c in test_set]
+    else:
+        return generate_sets(neighbours, blur_scale, verbose=verbose)[1]
+
+
+def generate_sets(neighbours, blur_scale, verbose=0):
+    suffix = '_%s_%s' % (blur_scale, neighbours)
+    learning_set_file = 'learning_set%s.dat' % suffix
+    test_set_file = 'test_set%s.dat' % suffix
+
+    chars = load_characters(neighbours, blur_scale, verbose=verbose)
+
+    if verbose:
+        print 'Going to generate learning set and test set...'
+
+    learning_set = []
+    test_set = []
+    learned = []
+
+    for char in chars:
+        if learned.count(char.value) == 70:
+            test_set.append(char)
+        else:
+            learning_set.append(char)
+            learned.append(char.value)
+
+    if verbose:
+        print 'Learning set:', [c.value for c in learning_set]
+        print '\nTest set:', [c.value for c in test_set]
+        print '\nSaving learning set...'
+
+    fdump(learning_set, learning_set_file)
+
+    if verbose:
+        print 'Saving test set...'
+
+    fdump(test_set, test_set_file)
+
+    return learning_set, test_set
+
+
+if __name__ == '__main__':
+    from sys import argv, exit
+
+    if len(argv) < 3:
+        print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
+        exit(1)
+
+    neighbours = int(argv[1])
+    blur_scale = float(argv[2])
+
+    # Generate the character file and the learning set/test set files
+    load_learning_set(neighbours, blur_scale, verbose=1)
+    load_test_set(neighbours, blur_scale, verbose=1)

+ 48 - 0
src/create_classifier.py

@@ -0,0 +1,48 @@
+#!/usr/bin/python
+from Classifier import Classifier
+from create_characters import load_learning_set
+from data import exists, DATA_FOLDER
+
+
+def load_classifier(neighbours, blur_scale, c=None, gamma=None, verbose=0):
+    classifier_file = DATA_FOLDER + 'classifier_%s_%s.dat' \
+            % (blur_scale, neighbours)
+
+    if exists(classifier_file):
+        if verbose:
+            print 'Loading classifier...'
+
+        classifier = Classifier(filename=classifier_file, verbose=verbose)
+        classifier.neighbours = neighbours
+    elif c != None and gamma != None:
+        if verbose:
+            print 'Training new classifier...'
+
+        classifier = Classifier(c=c, gamma=gamma, neighbours=neighbours, \
+                verbose=verbose)
+        learning_set = load_learning_set(neighbours, blur_scale, \
+                verbose=verbose)
+        classifier.train(learning_set)
+    else:
+        raise Exception('No soft margin and gamma specified.')
+
+    return classifier
+
+
+if __name__ == '__main__':
+    from sys import argv, exit
+
+    if len(argv) < 3:
+        print 'Usage: python %s NEIGHBOURS BLUR_SCALE [ C GAMMA ]' % argv[0]
+        exit(1)
+
+    neighbours = int(argv[1])
+    blur_scale = float(argv[2])
+
+    # Generate the classifier file
+    if len(argv) > 4:
+        c = float(argv[3])
+        gamma = float(argv[4])
+        load_classifier(neighbours, blur_scale, c=c, gamma=gamma, verbose=1)
+    else:
+        load_classifier(neighbours, blur_scale, verbose=1)

+ 31 - 0
src/data.py

@@ -0,0 +1,31 @@
+import os
+from cPickle import load, dump
+
+
+DATA_FOLDER = 'data/'
+IMAGES_FOLDER = '../images/LearningSet/'
+RESULTS_FOLDER = 'results/'
+
+
+def assert_data_folder_exists():
+    if not os.path.exists(DATA_FOLDER):
+        os.mkdir(DATA_FOLDER)
+
+
+def exists(filename):
+    return os.path.exists(DATA_FOLDER + filename)
+
+
+def fload(filename):
+    f = open(DATA_FOLDER + filename, 'r')
+    l = load(f)
+    f.close()
+
+    return l
+
+
+def fdump(obj, filename):
+    assert_data_folder_exists()
+    f = open(DATA_FOLDER + filename, 'w+')
+    dump(obj, f)
+    f.close()

+ 9 - 61
src/find_svm_params.py

@@ -1,13 +1,10 @@
 #!/usr/bin/python
 #!/usr/bin/python
-from os import listdir
-from os.path import exists
-from cPickle import load, dump
+import os
 from sys import argv, exit
 from sys import argv, exit
 
 
-from GrayscaleImage import GrayscaleImage
-from NormalizedCharacterImage import NormalizedCharacterImage
-from Character import Character
 from Classifier import Classifier
 from Classifier import Classifier
+from data import DATA_FOLDER, RESULTS_FOLDER
+from create_characters import load_learning_set, load_test_set
 
 
 if len(argv) < 3:
 if len(argv) < 3:
     print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
     print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
@@ -17,64 +14,15 @@ neighbours = int(argv[1])
 blur_scale = float(argv[2])
 blur_scale = float(argv[2])
 suffix = '_%s_%s' % (blur_scale, neighbours)
 suffix = '_%s_%s' % (blur_scale, neighbours)
 
 
-chars_file = 'characters%s.dat' % suffix
-learning_set_file = 'learning_set%s.dat' % suffix
-test_set_file = 'test_set%s.dat' % suffix
-classifier_file = 'classifier%s.dat' % suffix
-results_file = 'results%s.txt' % suffix
-
-
-# Load characters
-if exists(chars_file):
-    print 'Loading characters...'
-    chars = load(open(chars_file, 'r'))
-else:
-    print 'Going to generate character objects...'
-    chars = []
-
-    for char in sorted(listdir('../images/LearningSet')):
-        for image in sorted(listdir('../images/LearningSet/' + char)):
-            f = '../images/LearningSet/' + char + '/' + image
-            image = GrayscaleImage(f)
-            norm = NormalizedCharacterImage(image, blur=blur_scale, height=42)
-            #imshow(norm.data, cmap='gray'); show()
-            character = Character(char, [], norm)
-            character.get_single_cell_feature_vector(neighbours)
-            chars.append(character)
-            print char
-
-    print 'Saving characters...'
-    dump(chars, open(chars_file, 'w+'))
+if not os.path.exists(RESULTS_FOLDER):
+    os.mkdir(RESULTS_FOLDER)
 
 
+classifier_file = DATA_FOLDER + 'classifier%s.dat' % suffix
+results_file = '%sresult%s.txt' % (RESULTS_FOLDER, suffix)
 
 
 # Load learning set and test set
 # Load learning set and test set
-if exists(learning_set_file):
-    print 'Loading learning set...'
-    learning_set = load(open(learning_set_file, 'r'))
-    print 'Learning set:', [c.value for c in learning_set]
-    print 'Loading test set...'
-    test_set = load(open(test_set_file, 'r'))
-    print 'Test set:', [c.value for c in test_set]
-else:
-    print 'Going to generate learning set and test set...'
-    learning_set = []
-    test_set = []
-    learned = []
-
-    for char in chars:
-        if learned.count(char.value) == 70:
-            test_set.append(char)
-        else:
-            learning_set.append(char)
-            learned.append(char.value)
-
-    print 'Learning set:', [c.value for c in learning_set]
-    print '\nTest set:', [c.value for c in test_set]
-    print '\nSaving learning set...'
-    dump(learning_set, file(learning_set_file, 'w+'))
-    print 'Saving test set...'
-    dump(test_set, file(test_set_file, 'w+'))
-
+learning_set = load_learning_set(neighbours, blur_scale, verbose=1)
+test_set = load_test_set(neighbours, blur_scale, verbose=1)
 
 
 # Perform a grid-search to find the optimal values for C and gamma
 # Perform a grid-search to find the optimal values for C and gamma
 C = [float(2 ** p) for p in xrange(-5, 16, 2)]
 C = [float(2 ** p) for p in xrange(-5, 16, 2)]

+ 0 - 29
src/load_characters.py

@@ -1,29 +0,0 @@
-#!/usr/bin/python
-from os import listdir
-from cPickle import dump
-from sys import argv, exit
-
-from GrayscaleImage import GrayscaleImage
-from NormalizedCharacterImage import NormalizedCharacterImage
-from Character import Character
-
-if len(argv) < 4:
-    print 'Usage: python %s FILE_SUFFIX BLUR_SCALE NEIGHBOURS' % argv[0]
-    exit(1)
-
-c = []
-
-for char in sorted(listdir('../images/LearningSet')):
-    for image in sorted(listdir('../images/LearningSet/' + char)):
-        f = '../images/LearningSet/' + char + '/' + image
-        image = GrayscaleImage(f)
-        norm = NormalizedCharacterImage(image, blur=float(argv[2]), height=42)
-        #from pylab import imshow, show
-        #imshow(norm.data, cmap='gray'); show()
-        character = Character(char, [], norm)
-        character.get_single_cell_feature_vector(int(argv[3]))
-        c.append(character)
-        print char
-
-print 'Saving characters...'
-dump(c, open('characters%s.dat' % argv[1], 'w+'))

+ 0 - 40
src/load_learning_set.py

@@ -1,40 +0,0 @@
-#!/usr/bin/python
-from cPickle import dump, load
-from sys import argv, exit
-
-if len(argv) < 2:
-    print 'Usage: python %s FILE_SUFFIX' % argv[0]
-    exit(1)
-
-print 'Loading characters...'
-chars = load(file('characters%s.dat' % argv[1], 'r'))
-learning_set = []
-test_set = []
-
-#s = {}
-#
-#for char in chars:
-#    if char.value not in s:
-#        s[char.value] = [char]
-#    else:
-#        s[char.value].append(char)
-#
-#for value, chars in s.iteritems():
-#    learning_set += chars[::2]
-#    test_set += chars[1::2]
-
-learned = []
-
-for char in chars:
-    if learned.count(char.value) == 70:
-        test_set.append(char)
-    else:
-        learning_set.append(char)
-        learned.append(char.value)
-
-print 'Learning set:', [c.value for c in learning_set]
-print '\nTest set:', [c.value for c in test_set]
-print '\nSaving learning set...'
-dump(learning_set, file('learning_set%s.dat' % argv[1], 'w+'))
-print 'Saving test set...'
-dump(test_set, file('test_set%s.dat' % argv[1], 'w+'))

+ 10 - 35
src/run_classifier.py

@@ -1,11 +1,10 @@
 #!/usr/bin/python
 #!/usr/bin/python
-from cPickle import load
 from sys import argv, exit
 from sys import argv, exit
-from pylab import imsave, plot, subplot, imshow, show, axis, title
+from pylab import subplot, imshow, show, axis, title
 from math import sqrt, ceil
 from math import sqrt, ceil
-import os
 
 
-from Classifier import Classifier
+from create_characters import load_test_set
+from create_classifier import load_classifier
 
 
 if len(argv) < 3:
 if len(argv) < 3:
     print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
     print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
@@ -13,20 +12,17 @@ if len(argv) < 3:
 
 
 neighbours = int(argv[1])
 neighbours = int(argv[1])
 blur_scale = float(argv[2])
 blur_scale = float(argv[2])
-suffix = '_%s_%s' % (blur_scale, neighbours)
 
 
-test_set_file = 'test_set%s.dat' % suffix
-classifier_file = 'classifier%s.dat' % suffix
+# Load classifier
+classifier = load_classifier(neighbours, blur_scale, verbose=1)
 
 
-print 'Loading classifier...'
-classifier = Classifier(filename=classifier_file)
-classifier.neighbours = neighbours
+# Load test set
+test_set = load_test_set(neighbours, blur_scale, verbose=1)
 
 
-print 'Loading test set...'
-test_set = load(file(test_set_file, 'r'))
+# Classify each character in the test set, remembering all faulty
+# classified characters
 l = len(test_set)
 l = len(test_set)
 matches = 0
 matches = 0
-#classified = {}
 classified = []
 classified = []
 
 
 for i, char in enumerate(test_set):
 for i, char in enumerate(test_set):
@@ -35,13 +31,6 @@ for i, char in enumerate(test_set):
     if char.value != prediction:
     if char.value != prediction:
         classified.append((char, prediction))
         classified.append((char, prediction))
 
 
-        #key = '%s_as_%s' % (char.value, prediction)
-
-        #if key not in classified:
-        #    classified[key] = [char]
-        #else:
-        #    classified[key].append(char)
-
         print '"%s" was classified as "%s"' \
         print '"%s" was classified as "%s"' \
                 % (char.value, prediction)
                 % (char.value, prediction)
     else:
     else:
@@ -50,8 +39,7 @@ for i, char in enumerate(test_set):
     print '%d of %d (%d%% done)' % (i + 1, l, round(100 * (i + 1) / l))
     print '%d of %d (%d%% done)' % (i + 1, l, round(100 * (i + 1) / l))
 
 
 print '\n%d matches (%d%%), %d fails' % (matches, \
 print '\n%d matches (%d%%), %d fails' % (matches, \
-        round(100 * matches / l), \
-        len(test_set) - matches)
+        round(100 * matches / l), len(test_set) - matches)
 
 
 # Show a grid plot of all faulty classified characters
 # Show a grid plot of all faulty classified characters
 print 'Plotting faulty classified characters...'
 print 'Plotting faulty classified characters...'
@@ -66,16 +54,3 @@ for i, pair in enumerate(classified):
     axis('off')
     axis('off')
 
 
 show()
 show()
-
-#print 'Saving faulty classified characters...'
-#folder = '../images/faulty/'
-#
-#if not os.path.exists(folder):
-#    os.mkdir(folder)
-#
-#for filename, chars in classified.iteritems():
-#    if len(chars) == 1:
-#        imsave('%s%s' % (folder, filename), char.image.data, cmap='gray')
-#    else:
-#        for i, char in enumerate(chars):
-#            imsave('%s%s_%d' % (folder, filename, i), char.image.data, cmap='gray')

+ 0 - 15
src/test_chars.py

@@ -1,15 +0,0 @@
-#!/usr/bin/python
-from pylab import subplot, show, imshow, axis
-from cPickle import load
-
-x, y = 25, 25
-chars = load(file('characters.dat', 'r'))[:(x * y)]
-
-for i in range(x):
-    for j in range(y):
-        index = j * x + i
-        subplot(x, y, index + 1)
-        axis('off')
-        imshow(chars[index].image.data, cmap='gray')
-
-show()

+ 0 - 70
src/test_compare.py

@@ -1,70 +0,0 @@
-#!/usr/bin/python
-from matplotlib.pyplot import imshow, subplot, show
-from LocalBinaryPatternizer import LocalBinaryPatternizer
-from cPickle import load
-from numpy import zeros
-
-chars = load(file('characters.dat', 'r'))[::2]
-left = None
-right = None
-
-s = {}
-
-for char in chars:
-    if char.value not in s:
-        s[char.value] = [char]
-    else:
-        s[char.value].append(char)
-
-left = s['F'][2].image
-right = s['A'][0].image
-
-size = 12
-
-d = (left.size[0] * 4, left.size[1] * 4)
-#GrayscaleImage.resize(left, d)
-#GrayscaleImage.resize(right, d)
-
-p1 = LocalBinaryPatternizer(left, size)
-h1 = p1.get_single_histogram()
-p1.create_features_vector()
-p1 = p1.features
-
-p2 = LocalBinaryPatternizer(right, size)
-h2 = p2.get_single_histogram()
-p2.create_features_vector()
-p2 = p2.features
-
-total_intersect = h1.intersect(h2)
-
-s = (len(p1), len(p1[0]))
-match = zeros(left.shape)
-m = 0
-
-for y in range(s[0]):
-    for x in range(s[1]):
-        h1 = p1[y][x]
-        h2 = p2[y][x]
-        intersect = h1.intersect(h2)
-        print intersect
-
-        for i in xrange(size):
-            for j in xrange(size):
-                try:
-                    match[y*size + i, x*size + j] = 1 - intersect
-                except IndexError:
-                    pass
-
-        m += intersect
-
-print 'Match: %d%%' % int(m / (s[0] * s[1]) * 100)
-print 'Single histogram instersection: %d%%' % int(total_intersect * 100)
-
-subplot(311)
-imshow(left.data, cmap='gray')
-subplot(312)
-imshow(match, cmap='gray')
-subplot(313)
-imshow(right.data, cmap='gray')
-
-show()

+ 0 - 10
src/test_gauss.py

@@ -1,10 +0,0 @@
-#!/usr/bin/python
-from GaussianFilter import GaussianFilter
-from GrayscaleImage import GrayscaleImage
-
-image = GrayscaleImage('../images/plate.png')
-
-filter = GaussianFilter(1.4)
-output_image = filter.get_filtered_copy(image)
-
-output_image.show()

+ 0 - 11
src/test_lettercropper.py

@@ -1,11 +0,0 @@
-#!/usr/bin/python
-from LetterCropper import LetterCropper
-from GrayscaleImage import GrayscaleImage
-
-image = GrayscaleImage("../images/test.png")
-
-cropper = LetterCropper(image)
-
-cropped_letter = cropper.get_cropped_letter()
-
-cropped_letter.show()

+ 0 - 0
src/test_combined.py → src/test_normalized_character.py


+ 8 - 20
src/test_performance.py

@@ -6,7 +6,8 @@ from time import time
 from GrayscaleImage import GrayscaleImage
 from GrayscaleImage import GrayscaleImage
 from NormalizedCharacterImage import NormalizedCharacterImage
 from NormalizedCharacterImage import NormalizedCharacterImage
 from Character import Character
 from Character import Character
-from Classifier import Classifier
+from data import IMAGES_FOLDER
+from create_classifier import load_classifier
 
 
 if len(argv) < 4:
 if len(argv) < 4:
     print 'Usage: python %s NEIGHBOURS BLUR_SCALE COUNT' % argv[0]
     print 'Usage: python %s NEIGHBOURS BLUR_SCALE COUNT' % argv[0]
@@ -15,28 +16,15 @@ if len(argv) < 4:
 neighbours = int(argv[1])
 neighbours = int(argv[1])
 blur_scale = float(argv[2])
 blur_scale = float(argv[2])
 count = int(argv[3])
 count = int(argv[3])
-suffix = '_%s_%s' % (blur_scale, neighbours)
-
-#chars_file = 'characters%s.dat' % suffix
-classifier_file = 'classifier%s.dat' % suffix
-
-#print 'Loading characters...'
-#chars = load(open(chars_file, 'r'))[:count]
-#count = len(chars)
-#
-#for char in chars:
-#    del char.feature
-#
-#print 'Read %d characters' % count
 
 
 print 'Loading %d characters...' % count
 print 'Loading %d characters...' % count
 chars = []
 chars = []
 i = 0
 i = 0
 br = False
 br = False
 
 
-for value in sorted(listdir('../images/LearningSet')):
-    for image in sorted(listdir('../images/LearningSet/' + value)):
-        f = '../images/LearningSet/' + value + '/' + image
+for value in sorted(listdir()):
+    for image in sorted(listdir(IMAGES_FOLDER + value)):
+        f = IMAGES_FOLDER + value + '/' + image
         image = GrayscaleImage(f)
         image = GrayscaleImage(f)
         char = Character(value, [], image)
         char = Character(value, [], image)
         chars.append(char)
         chars.append(char)
@@ -49,10 +37,10 @@ for value in sorted(listdir('../images/LearningSet')):
     if br:
     if br:
         break
         break
 
 
-print 'Loading classifier...'
-classifier = Classifier(filename=classifier_file)
-classifier.neighbours = neighbours
+# Load classifier
+classifier = load_classifier(neighbours, blur_scale, verbose=1)
 
 
+# Measure the time it takes to recognize <count> characters
 start = time()
 start = time()
 
 
 for char in chars:
 for char in chars: