StatRed: Added remaining comments to all assignments.

parent 9c2c1adc
from pylab import imread, figure, subplot, imshow, savefig
a = imread('trui.png')
figure(1)
subplot(1, 2, 1)
imshow(a)
d = a[100:126, 100:126]
subplot(1,2,2)
imshow(d)
savefig('trui_with_details.pdf', bbox_inches='tight')
print d.shape
......@@ -6,7 +6,8 @@ def sortedeig(M):
si = argsort(d)[-1::-1]
return (d[si], U[:,si])
def calc_PCA(**kwargs):
def calc_sortedeig(**kwargs):
"""Calculate the sorted eigenvalues and eigenvectors of the data set."""
if kwargs['data'] == 'natural':
X = loadtxt('natural400_700_5.asc').T
N = 219
......@@ -21,13 +22,15 @@ def calc_PCA(**kwargs):
return sortedeig(S)
def PCA(**kwargs):
d, U = calc_PCA(**kwargs)
"""Show scree diagram of a data set."""
d, U = calc_sortedeig(**kwargs)
figure(1)
plot(d)
show()
def EigenImages(k, **kwargs):
d, U = calc_PCA(**kwargs)
"""Plot the first k eigenvectors of the data set."""
d, U = calc_sortedeig(**kwargs)
if kwargs['data'] == 'natural':
min, max, step = 400, 701, 5
elif kwargs['data'] == 'munsell':
......@@ -40,7 +43,8 @@ def EigenImages(k, **kwargs):
show()
def Reconstruct(k, sample, **kwargs):
d, U = calc_PCA(**kwargs)
"""Reconstruct the original spectrum from the k principle components."""
d, U = calc_sortedeig(**kwargs)
if kwargs['data'] == 'natural':
X = loadtxt('natural400_700_5.asc').T
min, max, step = 400, 701, 5
......@@ -49,9 +53,9 @@ def Reconstruct(k, sample, **kwargs):
min, max, step = 380, 801, 1
# Select the specified vector, subtract the mean from it and multiply with
# the transposed eigenvector basis to get the coordinates with respect to U
# the transposed eigenvector basis to get the coordinates with respect to U.
# Then, take the first k components and try to reconstruct the original
# spectrum
# spectrum.
x = X[:,sample]
xbar = mean(X, 1)
yzm = dot(U.T, x - xbar)[:k]
......@@ -64,9 +68,10 @@ def Reconstruct(k, sample, **kwargs):
legend()
show()
#PCA(data='natural')
#PCA(data='munsell')
if __name__ == '__main__':
PCA(data='natural')
PCA(data='munsell')
#EigenImages(5, data='natural')
EigenImages(5, data='natural')
Reconstruct(5, 23, data='natural')
Reconstruct(5, 23, data='natural')
......@@ -3,31 +3,30 @@ from pylab import argmin, argmax, tile, unique, argwhere, array, mean, \
from svm import svm_model, svm_problem, svm_parameter, LINEAR
class NNb:
"""Nearest neighbour classifier."""
def __init__(self, X, c):
self.n, self.N = X.shape
self.X, self.c = X, c
def classify(self, x):
d = self.X - tile(x.reshape(self.n, 1), self.N);
d = self.X - tile(x.reshape(self.n, 1), self.N)
dsq = sum(d*d, 0)
return self.c[argmin(dsq)]
class kNNb:
"""k-Nearest neighbour classifier."""
def __init__(self, X, c, k):
self.n, self.N = X.shape
self.X, self.c, self.k = X, c, k
def classify(self, x):
d = self.X - tile(x.reshape(self.n, 1), self.N);
d = self.X - tile(x.reshape(self.n, 1), self.N)
dsq = sum(d*d, 0)
minindices = dsq.argsort()
# Count class occurrences in k nearest neighbours
hist = {}
hist = dict([(c, 1) for c in self.c[minindices[:self.k]]])
for c in self.c[minindices[:self.k]]:
try:
hist[c] += 1
except KeyError:
hist[c] = 1
hist[c] += 1
# Return the majority class
max_nbb = (0, None)
for c, count in hist.iteritems():
......@@ -36,6 +35,7 @@ class kNNb:
return max_nnb[1]
class MEC:
"""Minimum error classifier."""
def __init__(self, X, c):
self.n, self.N = X.shape
self.X, self.c = X, c
......@@ -53,6 +53,10 @@ class MEC:
mu = mean(X, 1)
Yzm = X - tile(mu[:,newaxis], X.shape[1])
S = matrix(dot(Yzm, Yzm.T) / (self.n - 1))
# Calculate the coefficient needed for the calculation in
# classify(). This is just an optimization, because only the
# covariance matrix is needed for the coefficient, and not the
# vector that is being classified itself.
coeff = 1 / (S.A**-.5 * (2 * pi)**(self.n / 2))
self.class_data.append((mu, S, coeff))
......@@ -64,9 +68,10 @@ class MEC:
return self.classes[argmax([i.sum() for i in p])]
class SVM:
"""Support vector machine classifier."""
def __init__(self, X, c):
self.model = svm_model(svm_problem(c, X.T),
pm,svm_parameter(kernel_type=LINEAR))
svm_parameter(kernel_type=LINEAR))
def classify(self, x):
return self.model.predict(x)
from pylab import loadtxt, array, scatter, figure, show, mean, argmin, append
from pylab import array, scatter, figure, show, mean, argmin, append
from random import random, seed
from sys import argv, exit
......@@ -47,11 +47,11 @@ if not pp:
initial_means = init
k = int(argv[1])
if not 1 <= k <= 6:
print 'K must be a value from 1-6'
print 'K must be a value from 1-6 (we only defined six colors).'
exit()
# Generate dataset, add a multiplication of k so that clusters are formed
n, N = 2, 100
n, N = 2, 200
X = array([[100 * random() + 70 for j in range(n)] for i in \
range(int(N / k + N % k))])
for c in range(k - 1):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment