Commit e2507c65 authored by Taddeüs Kroes's avatar Taddeüs Kroes

Implemented SVM classifier and added test file to test it.

parent cce4f7e3
......@@ -7,3 +7,14 @@
*.synctex.gz
*.toc
*.out
*.jpg
images/BBB
images/Images
images/Infos
images/licenseplates
chars
learning_set
test_set
classifier
classifier-model
classifier-characters
# TODO cleanup the getElements stuff
from LocalBinaryPatternizer import LocalBinaryPatternizer
class Character:
def __init__(self, value, corners, image):
self.value = value
self.corners = corners
self.image = image
# Testing purposes
def show(self):
from pylab import imshow, show
......@@ -12,4 +13,6 @@ class Character:
show()
def get_feature_vector(self):
pass
pattern = LocalBinaryPatternizer(self.image)
return pattern.create_features_vector()
from svmutil import svm_model, svm_problem, svm_parameter, svm_predict, LINEAR
from cPicle import dump, load
from svmutil import svm_train, svm_problem, svm_parameter, svm_predict, \
LINEAR, svm_save_model, svm_load_model
from cPickle import dump, load
class Classifier:
def __init__(self, c=None, filename=None):
if filename:
# If a filename is given, load a modl from the fiven filename
f = file(filename, 'r')
self.model, self.param, self.character_map = load(f)
self.model = svm_load_model(filename + '-model')
f = file(filename + '-characters', 'r')
self.character_map = load(f)
f.close()
else:
self.param = svm_parameter()
......@@ -18,8 +20,9 @@ class Classifier:
def save(self, filename):
"""Save the SVM model in the given filename."""
f = file(filename, 'w+')
dump((self.model, self.param, self.character_map), f)
svm_save_model(filename + '-model', self.model)
f = file(filename + '-characters', 'w+')
dump(self.character_map, f)
f.close()
def train(self, learning_set):
......@@ -27,8 +30,11 @@ class Classifier:
known values."""
classes = []
features = []
l = len(learning_set)
for char in learning_set:
for i, char in enumerate(learning_set):
print 'Training "%s" -- %d of %d (%d%% done)' \
% (char.value, i + 1, l, int(100 * (i + 1) / l))
# Map the character to an integer for use in the SVM model
if char.value not in self.character_map:
self.character_map[char.value] = len(self.character_map)
......@@ -36,15 +42,13 @@ class Classifier:
classes.append(self.character_map[char.value])
features.append(char.get_feature_vector())
problem = svm_problem(self.c, features)
self.model = svm_model(problem, self.param)
# Add prediction function that returns a numeric class prediction
self.model.predict = lambda self, x: svm_predict([0], [x], self)[0][0]
problem = svm_problem(classes, features)
self.model = svm_train(problem, self.param)
def classify(self, character):
"""Classify a character object and assign its value."""
prediction = self.model.predict(character.get_feature_vector())
predict = lambda x: svm_predict([0], [x], self.model)[0][0]
prediction = predict(character.get_feature_vector())
for value, svm_class in self.character_map.iteritems():
if svm_class == prediction:
......
#!/usr/bin/python
from LicensePlate import LicensePlate
from Classifier import Classifier
from cPickle import dump, load
#chars = []
#
#for i in range(9):
# for j in range(100):
# try:
# filename = '%04d/00991_%04d%02d.info' % (i, i, j)
# print 'loading file "%s"' % filename
# plate = LicensePlate(i, j)
#
# if hasattr(plate, 'characters'):
# chars.extend(plate.characters)
# except:
# print 'epic fail'
#
#print 'loaded %d chars' % len(chars)
#
#dump(chars, file('chars', 'w+'))
#----------------------------------------------------------------
#chars = load(file('chars', 'r'))
#learned = []
#learning_set = []
#test_set = []
#
#for char in chars:
# if learned.count(char.value) > 80:
# test_set.append(char)
# else:
# learning_set.append(char)
# learned.append(char.value)
#
#dump(learning_set, file('learning_set', 'w+'))
#dump(test_set, file('test_set', 'w+'))
#----------------------------------------------------------------
learning_set = load(file('learning_set', 'r'))
# Train the classifier with the learning set
classifier = Classifier(c=3)
classifier.train(learning_set)
#classifier.save('classifier')
#----------------------------------------------------------------
#classifier = Classifier(filename='classifier')
#test_set = load(file('test_set', 'r'))
#l = len(test_set)
#matches = 0
#
#for i, char in enumerate(test_set):
# prediction = classifier.classify(char)
#
# if char.value == prediction:
# print ':) ------> Successfully recognized "%s"' % char.value
# matches += 1
# else:
# print ':( Expected character "%s", got "%s"' \
# % (char.value, prediction),
#
# print ' -- %d of %d (%d%% done)' % (i + 1, l, int(100 * (i + 1) / l))
#
#print '\n%d matches (%d%%), %d fails' % (matches, \
# int(100 * matches / len(test_set)), \
# len(test_set) - matches)
from pylab import array, zeros, inv, dot, svd, shape, floor
from xml.dom.minidom import parse
from xml.dom.minidom import parse
from Error import Error
from Point import Point
from Character import Character
from GrayscaleImage import GrayscaleImage
from NormalizedCharacterImage import NormalizedCharacterImage
'''
Creates a license plate object based on an XML file. The image should be
placed in a folder 'images' the xml file in a folder 'xml'
TODO: perhaps remove non required XML lookups
TODO: perhaps remove non required XML lookups
'''
class LicensePlate:
def __init__(self, xml_title):
try:
self.dom = parse('../XML/' + str(xml_title))
except IOError:
Error("Incorrect file name given.")
else:
properties = self.get_properties()
def __init__(self, folder_nr, file_nr):
filename = '%04d/00991_%04d%02d' % (folder_nr, folder_nr, file_nr)
self.dom = parse('../images/Infos/%s.info' % filename)
properties = self.get_properties()
self.image = GrayscaleImage('../images/Images/%s.jpg' % filename)
self.width = int(properties['width'])
self.height = int(properties['height'])
self.image = GrayscaleImage('../images/' + str(properties['uii']) + '.' + str(properties['type']))
self.width = int(properties['width'])
self.height = int(properties['height'])
self.read_xml()
self.read_xml()
# sets the entire license plate of an image
def retrieve_data(self, corners):
def retrieve_data(self, corners):
x0, y0 = corners[0].to_tuple()
x1, y1 = corners[1].to_tuple()
x2, y2 = corners[2].to_tuple()
x3, y3 = corners[3].to_tuple()
M = max(x0, x1, x2, x3) - min(x0, x1, x2, x3)
N = max(y0, y1, y2, y3) - min(y0, y1, y2, y3)
matrix = array([
[x0, y0, 1, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, x0, y0, 1, 0, 0, 0],
......@@ -44,10 +43,10 @@ class LicensePlate:
[ 0, 0, 0, x1, y1, 1, 0, 0, 0],
[x2, y2, 1, 0, 0, 0, -M*x2, -M*y2, -M],
[ 0, 0, 0, x2, y2, 1, -N*x2, -N*y2, -N],
[x3, y3, 1, 0, 0, 0, 0, 0, 0],
[x3, y3, 1, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, x3, y3, 1, -N*x3, -N*y3, -N]
])
P = inv(self.get_transformation_matrix(matrix))
data = array([zeros(M, float)] * N)
......@@ -56,23 +55,23 @@ class LicensePlate:
or_coor = dot(P, ([[i],[j],[1]]))
or_coor_h = or_coor[1][0] / or_coor[2][0], or_coor[0][0] / or_coor[2][0]
data[j][i] = self.pV(or_coor_h[0], or_coor_h[1])
return data
def get_transformation_matrix(self, matrix):
# Get the vector p and the values that are in there by taking the SVD.
# Get the vector p and the values that are in there by taking the SVD.
# Since D is diagonal with the eigenvalues sorted from large to small on
# the diagonal, the optimal q in min ||Dq|| is q = [[0]..[1]]. Therefore,
# the diagonal, the optimal q in min ||Dq|| is q = [[0]..[1]]. Therefore,
# p = Vq means p is the last column in V.
U, D, V = svd(matrix)
p = V[8][:]
return array([
[ p[0], p[1], p[2] ],
[ p[3], p[4], p[5] ],
[ p[0], p[1], p[2] ],
[ p[3], p[4], p[5] ],
[ p[6], p[7], p[8] ]
])
def pV(self, x, y):
image = self.image
......@@ -85,25 +84,25 @@ class LicensePlate:
y_low = floor(y)
y_high = floor(y + 1)
x_y = (x_high - x_low) * (y_high - y_low)
a = x_high - x
b = y_high - y
c = x - x_low
d = y - y_low
return image[x_low, y_low] / x_y * a * b \
+ image[x_high, y_low] / x_y * c * b \
+ image[x_low , y_high] / x_y * a * d \
+ image[x_high, y_high] / x_y * c * d
return 0
# Testing purposes
def show(self):
from pylab import imshow, show
imshow(self.data, cmap="gray")
show()
def get_properties(self):
children = self.get_children("properties")
......@@ -120,7 +119,7 @@ class LicensePlate:
# TODO : create function for location / characters as they do the same
def read_xml(self):
children = self.get_children("plate") # most recent version
for child in children:
if child.nodeName == "regnum":
self.license_full = child.firstChild.data
......@@ -130,7 +129,7 @@ class LicensePlate:
self.corners = self.get_corners(child)
elif child.nodeName == "characters":
nodes = child.childNodes
self.characters = []
for character in nodes:
......@@ -139,11 +138,11 @@ class LicensePlate:
corners = self.get_corners(character)
data = self.retrieve_data(corners)
image = NormalizedCharacterImage(data=data)
self.characters.append(Character(value, corners, image))
else:
pass
def get_node(self, node, dom=None):
if not dom:
dom = self.dom
......@@ -152,14 +151,14 @@ class LicensePlate:
def get_children(self, node, dom=None):
return self.get_node(node, dom).childNodes
def get_corners(self, child):
nodes = self.get_children("quadrangle", child)
corners = []
for corner in nodes:
if corner.nodeName == "point":
corners.append(Point(corner))
return corners
......@@ -3,7 +3,7 @@ from numpy import zeros, byte
from math import ceil
class LocalBinaryPatternizer:
def __init__(self, image, cell_size=16):
self.cell_size = cell_size
self.image = image
......@@ -23,7 +23,7 @@ class LocalBinaryPatternizer:
at each neighbour starting at 7 in the top-left corner. This gives a
8-bit feature number of a pixel'''
for y, x, value in self.image:
pattern = (self.is_pixel_darker(y - 1, x - 1, value) << 7) \
| (self.is_pixel_darker(y - 1, x , value) << 6) \
| (self.is_pixel_darker(y - 1, x + 1, value) << 5) \
......@@ -32,17 +32,17 @@ class LocalBinaryPatternizer:
| (self.is_pixel_darker(y + 1, x , value) << 2) \
| (self.is_pixel_darker(y + 1, x - 1, value) << 1) \
| (self.is_pixel_darker(y , x - 1, value) << 0)
cy, cx = self.get_cell_index(y, x)
self.features[cy][cx].add(pattern)
return self.get_features_as_array()
def is_pixel_darker(self, y, x, value):
return self.image.in_bounds(y, x) and self.image[y, x] > value
def get_cell_index(self, y, x):
return (y / self.cell_size, x / self.cell_size)
def get_features_as_array(self):
return [item for sublist in self.features for item in sublist]
\ No newline at end of file
return [h.bins for h in [h for sub in self.features for h in sub]][0]
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment