taddeus
/
licenseplates


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
							#!/usr/bin/python
from os import listdir
from os.path import exists
from cPickle import load, dump
from sys import argv, exit

from GrayscaleImage import GrayscaleImage
from NormalizedCharacterImage import NormalizedCharacterImage
from Character import Character
from Classifier import Classifier

if len(argv) < 3:
    print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
    exit(1)

neighbours = int(argv[1])
blur_scale = float(argv[2])
suffix = '_%s_%s' % (blur_scale, neighbours)

chars_file = 'characters%s.dat' % suffix
learning_set_file = 'learning_set%s.dat' % suffix
test_set_file = 'test_set%s.dat' % suffix
classifier_file = 'classifier%s.dat' % suffix
results_file = 'results%s.txt' % suffix


# Load characters
if exists(chars_file):
    print 'Loading characters...'
    chars = load(open(chars_file, 'r'))
else:
    print 'Going to generate character objects...'
    chars = []

    for char in sorted(listdir('../images/LearningSet')):
        for image in sorted(listdir('../images/LearningSet/' + char)):
            f = '../images/LearningSet/' + char + '/' + image
            image = GrayscaleImage(f)
            norm = NormalizedCharacterImage(image, blur=blur_scale, height=42)
            #imshow(norm.data, cmap='gray'); show()
            character = Character(char, [], norm)
            character.get_single_cell_feature_vector(neighbours)
            chars.append(character)
            print char

    print 'Saving characters...'
    dump(chars, open(chars_file, 'w+'))


# Load learning set and test set
if exists(learning_set_file):
    print 'Loading learning set...'
    learning_set = load(open(learning_set_file, 'r'))
    print 'Learning set:', [c.value for c in learning_set]
    print 'Loading test set...'
    test_set = load(open(test_set_file, 'r'))
    print 'Test set:', [c.value for c in test_set]
else:
    print 'Going to generate learning set and test set...'
    learning_set = []
    test_set = []
    learned = []

    for char in chars:
        if learned.count(char.value) == 70:
            test_set.append(char)
        else:
            learning_set.append(char)
            learned.append(char.value)

    print 'Learning set:', [c.value for c in learning_set]
    print '\nTest set:', [c.value for c in test_set]
    print '\nSaving learning set...'
    dump(learning_set, file(learning_set_file, 'w+'))
    print 'Saving test set...'
    dump(test_set, file(test_set_file, 'w+'))


# Perform a grid-search to find the optimal values for C and gamma
C = [float(2 ** p) for p in xrange(-5, 16, 2)]
Y = [float(2 ** p) for p in xrange(-15, 4, 2)]

results = []
best = (0,)
i = 0

for c in C:
    for y in Y:
        classifier = Classifier(c=c, gamma=y, neighbours=neighbours)
        classifier.train(learning_set)
        result = classifier.test(test_set)

        if result > best[0]:
            best = (result, c, y, classifier)

        results.append(result)
        i += 1
        print '%d of %d, c = %f, gamma = %f, result = %d%%' \
              % (i, len(C) * len(Y), c, y, int(round(result * 100)))

i = 0
s = '     c\y'

for y in Y:
    s += '| %f' % y

s += '\n'

for c in C:
    s += ' %7s' % c

    for y in Y:
        s +=  '| %8d' % int(round(results[i] * 100))
        i += 1

    s += '\n'

s += '\nBest result: %.3f%% for C = %f and gamma = %f' % best[:3]

print 'Saving results...'
f = open(results_file, 'w+')
f.write(s + '\n')
f.close()

print 'Saving best classifier...'
best[3].save(classifier_file)

print '\n' + s