| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128 |
- #!/usr/bin/python
- from os import listdir
- from os.path import exists
- from cPickle import load, dump
- from sys import argv, exit
- from GrayscaleImage import GrayscaleImage
- from NormalizedCharacterImage import NormalizedCharacterImage
- from Character import Character
- from Classifier import Classifier
- if len(argv) < 3:
- print 'Usage: python %s NEIGHBOURS BLUR_SCALE' % argv[0]
- exit(1)
- neighbours = int(argv[1])
- blur_scale = float(argv[2])
- suffix = '_%s_%s' % (blur_scale, neighbours)
- chars_file = 'characters%s.dat' % suffix
- learning_set_file = 'learning_set%s.dat' % suffix
- test_set_file = 'test_set%s.dat' % suffix
- classifier_file = 'classifier%s.dat' % suffix
- results_file = 'results%s.txt' % suffix
- # Load characters
- if exists(chars_file):
- print 'Loading characters...'
- chars = load(open(chars_file, 'r'))
- else:
- print 'Going to generate character objects...'
- chars = []
- for char in sorted(listdir('../images/LearningSet')):
- for image in sorted(listdir('../images/LearningSet/' + char)):
- f = '../images/LearningSet/' + char + '/' + image
- image = GrayscaleImage(f)
- norm = NormalizedCharacterImage(image, blur=blur_scale, height=42)
- #imshow(norm.data, cmap='gray'); show()
- character = Character(char, [], norm)
- character.get_single_cell_feature_vector(neighbours)
- chars.append(character)
- print char
- print 'Saving characters...'
- dump(chars, open(chars_file, 'w+'))
- # Load learning set and test set
- if exists(learning_set_file):
- print 'Loading learning set...'
- learning_set = load(open(learning_set_file, 'r'))
- print 'Learning set:', [c.value for c in learning_set]
- print 'Loading test set...'
- test_set = load(open(test_set_file, 'r'))
- print 'Test set:', [c.value for c in test_set]
- else:
- print 'Going to generate learning set and test set...'
- learning_set = []
- test_set = []
- learned = []
- for char in chars:
- if learned.count(char.value) == 70:
- test_set.append(char)
- else:
- learning_set.append(char)
- learned.append(char.value)
- print 'Learning set:', [c.value for c in learning_set]
- print '\nTest set:', [c.value for c in test_set]
- print '\nSaving learning set...'
- dump(learning_set, file(learning_set_file, 'w+'))
- print 'Saving test set...'
- dump(test_set, file(test_set_file, 'w+'))
- # Perform a grid-search to find the optimal values for C and gamma
- C = [float(2 ** p) for p in xrange(-5, 16, 2)]
- Y = [float(2 ** p) for p in xrange(-15, 4, 2)]
- results = []
- best = (0,)
- i = 0
- for c in C:
- for y in Y:
- classifier = Classifier(c=c, gamma=y, neighbours=neighbours)
- classifier.train(learning_set)
- result = classifier.test(test_set)
- if result > best[0]:
- best = (result, c, y, classifier)
- results.append(result)
- i += 1
- print '%d of %d, c = %f, gamma = %f, result = %d%%' \
- % (i, len(C) * len(Y), c, y, int(round(result * 100)))
- i = 0
- s = ' c\y'
- for y in Y:
- s += '| %f' % y
- s += '\n'
- for c in C:
- s += ' %7s' % c
- for y in Y:
- s += '| %8d' % int(round(results[i] * 100))
- i += 1
- s += '\n'
- s += '\nBest result: %.3f%% for C = %f and gamma = %f' % best[:3]
- print 'Saving results...'
- f = open(results_file, 'w+')
- f.write(s + '\n')
- f.close()
- print 'Saving best classifier...'
- best[3].save(classifier_file)
- print '\n' + s
|