StatRed: Added remaining comments to all assignments.

parent 9c2c1adc
from pylab import imread, figure, subplot, imshow, savefig
a = imread('trui.png')
figure(1)
subplot(1, 2, 1)
imshow(a)
d = a[100:126, 100:126]
subplot(1,2,2)
imshow(d)
savefig('trui_with_details.pdf', bbox_inches='tight')
print d.shape
...@@ -6,7 +6,8 @@ def sortedeig(M): ...@@ -6,7 +6,8 @@ def sortedeig(M):
si = argsort(d)[-1::-1] si = argsort(d)[-1::-1]
return (d[si], U[:,si]) return (d[si], U[:,si])
def calc_PCA(**kwargs): def calc_sortedeig(**kwargs):
"""Calculate the sorted eigenvalues and eigenvectors of the data set."""
if kwargs['data'] == 'natural': if kwargs['data'] == 'natural':
X = loadtxt('natural400_700_5.asc').T X = loadtxt('natural400_700_5.asc').T
N = 219 N = 219
...@@ -21,13 +22,15 @@ def calc_PCA(**kwargs): ...@@ -21,13 +22,15 @@ def calc_PCA(**kwargs):
return sortedeig(S) return sortedeig(S)
def PCA(**kwargs): def PCA(**kwargs):
d, U = calc_PCA(**kwargs) """Show scree diagram of a data set."""
d, U = calc_sortedeig(**kwargs)
figure(1) figure(1)
plot(d) plot(d)
show() show()
def EigenImages(k, **kwargs): def EigenImages(k, **kwargs):
d, U = calc_PCA(**kwargs) """Plot the first k eigenvectors of the data set."""
d, U = calc_sortedeig(**kwargs)
if kwargs['data'] == 'natural': if kwargs['data'] == 'natural':
min, max, step = 400, 701, 5 min, max, step = 400, 701, 5
elif kwargs['data'] == 'munsell': elif kwargs['data'] == 'munsell':
...@@ -40,7 +43,8 @@ def EigenImages(k, **kwargs): ...@@ -40,7 +43,8 @@ def EigenImages(k, **kwargs):
show() show()
def Reconstruct(k, sample, **kwargs): def Reconstruct(k, sample, **kwargs):
d, U = calc_PCA(**kwargs) """Reconstruct the original spectrum from the k principle components."""
d, U = calc_sortedeig(**kwargs)
if kwargs['data'] == 'natural': if kwargs['data'] == 'natural':
X = loadtxt('natural400_700_5.asc').T X = loadtxt('natural400_700_5.asc').T
min, max, step = 400, 701, 5 min, max, step = 400, 701, 5
...@@ -49,9 +53,9 @@ def Reconstruct(k, sample, **kwargs): ...@@ -49,9 +53,9 @@ def Reconstruct(k, sample, **kwargs):
min, max, step = 380, 801, 1 min, max, step = 380, 801, 1
# Select the specified vector, subtract the mean from it and multiply with # Select the specified vector, subtract the mean from it and multiply with
# the transposed eigenvector basis to get the coordinates with respect to U # the transposed eigenvector basis to get the coordinates with respect to U.
# Then, take the first k components and try to reconstruct the original # Then, take the first k components and try to reconstruct the original
# spectrum # spectrum.
x = X[:,sample] x = X[:,sample]
xbar = mean(X, 1) xbar = mean(X, 1)
yzm = dot(U.T, x - xbar)[:k] yzm = dot(U.T, x - xbar)[:k]
...@@ -64,9 +68,10 @@ def Reconstruct(k, sample, **kwargs): ...@@ -64,9 +68,10 @@ def Reconstruct(k, sample, **kwargs):
legend() legend()
show() show()
#PCA(data='natural') if __name__ == '__main__':
#PCA(data='munsell') PCA(data='natural')
PCA(data='munsell')
#EigenImages(5, data='natural') EigenImages(5, data='natural')
Reconstruct(5, 23, data='natural') Reconstruct(5, 23, data='natural')
...@@ -3,31 +3,30 @@ from pylab import argmin, argmax, tile, unique, argwhere, array, mean, \ ...@@ -3,31 +3,30 @@ from pylab import argmin, argmax, tile, unique, argwhere, array, mean, \
from svm import svm_model, svm_problem, svm_parameter, LINEAR from svm import svm_model, svm_problem, svm_parameter, LINEAR
class NNb: class NNb:
"""Nearest neighbour classifier."""
def __init__(self, X, c): def __init__(self, X, c):
self.n, self.N = X.shape self.n, self.N = X.shape
self.X, self.c = X, c self.X, self.c = X, c
def classify(self, x): def classify(self, x):
d = self.X - tile(x.reshape(self.n, 1), self.N); d = self.X - tile(x.reshape(self.n, 1), self.N)
dsq = sum(d*d, 0) dsq = sum(d*d, 0)
return self.c[argmin(dsq)] return self.c[argmin(dsq)]
class kNNb: class kNNb:
"""k-Nearest neighbour classifier."""
def __init__(self, X, c, k): def __init__(self, X, c, k):
self.n, self.N = X.shape self.n, self.N = X.shape
self.X, self.c, self.k = X, c, k self.X, self.c, self.k = X, c, k
def classify(self, x): def classify(self, x):
d = self.X - tile(x.reshape(self.n, 1), self.N); d = self.X - tile(x.reshape(self.n, 1), self.N)
dsq = sum(d*d, 0) dsq = sum(d*d, 0)
minindices = dsq.argsort() minindices = dsq.argsort()
# Count class occurrences in k nearest neighbours # Count class occurrences in k nearest neighbours
hist = {} hist = dict([(c, 1) for c in self.c[minindices[:self.k]]])
for c in self.c[minindices[:self.k]]: for c in self.c[minindices[:self.k]]:
try:
hist[c] += 1 hist[c] += 1
except KeyError:
hist[c] = 1
# Return the majority class # Return the majority class
max_nbb = (0, None) max_nbb = (0, None)
for c, count in hist.iteritems(): for c, count in hist.iteritems():
...@@ -36,6 +35,7 @@ class kNNb: ...@@ -36,6 +35,7 @@ class kNNb:
return max_nnb[1] return max_nnb[1]
class MEC: class MEC:
"""Minimum error classifier."""
def __init__(self, X, c): def __init__(self, X, c):
self.n, self.N = X.shape self.n, self.N = X.shape
self.X, self.c = X, c self.X, self.c = X, c
...@@ -53,6 +53,10 @@ class MEC: ...@@ -53,6 +53,10 @@ class MEC:
mu = mean(X, 1) mu = mean(X, 1)
Yzm = X - tile(mu[:,newaxis], X.shape[1]) Yzm = X - tile(mu[:,newaxis], X.shape[1])
S = matrix(dot(Yzm, Yzm.T) / (self.n - 1)) S = matrix(dot(Yzm, Yzm.T) / (self.n - 1))
# Calculate the coefficient needed for the calculation in
# classify(). This is just an optimization, because only the
# covariance matrix is needed for the coefficient, and not the
# vector that is being classified itself.
coeff = 1 / (S.A**-.5 * (2 * pi)**(self.n / 2)) coeff = 1 / (S.A**-.5 * (2 * pi)**(self.n / 2))
self.class_data.append((mu, S, coeff)) self.class_data.append((mu, S, coeff))
...@@ -64,9 +68,10 @@ class MEC: ...@@ -64,9 +68,10 @@ class MEC:
return self.classes[argmax([i.sum() for i in p])] return self.classes[argmax([i.sum() for i in p])]
class SVM: class SVM:
"""Support vector machine classifier."""
def __init__(self, X, c): def __init__(self, X, c):
self.model = svm_model(svm_problem(c, X.T), self.model = svm_model(svm_problem(c, X.T),
pm,svm_parameter(kernel_type=LINEAR)) svm_parameter(kernel_type=LINEAR))
def classify(self, x): def classify(self, x):
return self.model.predict(x) return self.model.predict(x)
from pylab import loadtxt, array, scatter, figure, show, mean, argmin, append from pylab import array, scatter, figure, show, mean, argmin, append
from random import random, seed from random import random, seed
from sys import argv, exit from sys import argv, exit
...@@ -47,11 +47,11 @@ if not pp: ...@@ -47,11 +47,11 @@ if not pp:
initial_means = init initial_means = init
k = int(argv[1]) k = int(argv[1])
if not 1 <= k <= 6: if not 1 <= k <= 6:
print 'K must be a value from 1-6' print 'K must be a value from 1-6 (we only defined six colors).'
exit() exit()
# Generate dataset, add a multiplication of k so that clusters are formed # Generate dataset, add a multiplication of k so that clusters are formed
n, N = 2, 100 n, N = 2, 200
X = array([[100 * random() + 70 for j in range(n)] for i in \ X = array([[100 * random() + 70 for j in range(n)] for i in \
range(int(N / k + N % k))]) range(int(N / k + N % k))])
for c in range(k - 1): for c in range(k - 1):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment