StatRed: Added remaining comments to all assignments.

d287e7ed · Sander Mathijs van Veen · 9c2c1adc · 9c2c1adc · d287e7ed · 9c2c1adc
Commit d287e7ed authored May 29, 2011 by Sander Mathijs van Veen
5 changed files
--- a/statred/ass2/image.py
+++ b/statred/ass2/image.py
-from pylab import imread, figure, subplot, imshow, savefig
-
-a = imread('trui.png')
-
-figure(1)
-subplot(1, 2, 1)
-imshow(a)
-
-d = a[100:126, 100:126]
-subplot(1,2,2)
-imshow(d)
-
-savefig('trui_with_details.pdf', bbox_inches='tight')
-print d.shape
--- a/statred/ass2/pca.py
+++ b/statred/ass2/pca.py
@@ -6,7 +6,8 @@ def sortedeig(M):
    si = argsort(d)[-1::-1]
    return (d[si], U[:,si])

-def calc_PCA(**kwargs):
+def calc_sortedeig(**kwargs):
+    """Calculate the sorted eigenvalues and eigenvectors of the data set."""
    if kwargs['data'] == 'natural':
        X = loadtxt('natural400_700_5.asc').T
        N = 219
@@ -21,13 +22,15 @@ def calc_PCA(**kwargs):
    return sortedeig(S)

 def PCA(**kwargs):
-    d, U = calc_PCA(**kwargs)
+    """Show scree diagram of a data set."""
+    d, U = calc_sortedeig(**kwargs)
    figure(1)
    plot(d)
    show()

 def EigenImages(k, **kwargs):
-    d, U = calc_PCA(**kwargs)
+    """Plot the first k eigenvectors of the data set."""
+    d, U = calc_sortedeig(**kwargs)
    if kwargs['data'] == 'natural':
        min, max, step = 400, 701, 5
    elif kwargs['data'] == 'munsell':
@@ -40,7 +43,8 @@ def EigenImages(k, **kwargs):
    show()

 def Reconstruct(k, sample, **kwargs):
-    d, U = calc_PCA(**kwargs)
+    """Reconstruct the original spectrum from the k principle components."""
+    d, U = calc_sortedeig(**kwargs)
    if kwargs['data'] == 'natural':
        X = loadtxt('natural400_700_5.asc').T
        min, max, step = 400, 701, 5
@@ -49,9 +53,9 @@ def Reconstruct(k, sample, **kwargs):
        min, max, step = 380, 801, 1

    # Select the specified vector, subtract the mean from it and multiply with
-    # the transposed eigenvector basis to get the coordinates with respect to U
+    # the transposed eigenvector basis to get the coordinates with respect to U.
    # Then, take the first k components and try to reconstruct the original
-    # spectrum
+    # spectrum.
    x = X[:,sample]
    xbar = mean(X, 1)
    yzm = dot(U.T, x - xbar)[:k]
@@ -64,9 +68,10 @@ def Reconstruct(k, sample, **kwargs):
    legend()
    show()

-#PCA(data='natural')
-#PCA(data='munsell')
+if __name__ == '__main__':
+    PCA(data='natural')
+    PCA(data='munsell')

-#EigenImages(5, data='natural')
+    EigenImages(5, data='natural')

-Reconstruct(5, 23, data='natural')
+    Reconstruct(5, 23, data='natural')
--- a/statred/ass2/trui.png
+++ b/statred/ass2/trui.png
--- a/statred/ass3/classifiers.py
+++ b/statred/ass3/classifiers.py
@@ -3,31 +3,30 @@ from pylab import argmin, argmax, tile, unique, argwhere, array, mean, \
 from svm import svm_model, svm_problem, svm_parameter, LINEAR

 class NNb:
+    """Nearest neighbour classifier."""
    def __init__(self, X, c):
        self.n, self.N = X.shape
        self.X, self.c = X, c

    def classify(self, x):
-        d = self.X - tile(x.reshape(self.n, 1), self.N);
+        d = self.X - tile(x.reshape(self.n, 1), self.N)
        dsq = sum(d*d, 0)
        return self.c[argmin(dsq)]

 class kNNb:
+    """k-Nearest neighbour classifier."""
    def __init__(self, X, c, k):
        self.n, self.N = X.shape
        self.X, self.c, self.k = X, c, k

    def classify(self, x):
-        d = self.X - tile(x.reshape(self.n, 1), self.N);
+        d = self.X - tile(x.reshape(self.n, 1), self.N)
        dsq = sum(d*d, 0)
        minindices = dsq.argsort()
        # Count class occurrences in k nearest neighbours
-        hist = {}
+        hist = dict([(c, 1) for c in self.c[minindices[:self.k]]])
        for c in self.c[minindices[:self.k]]:
-            try:
            hist[c] += 1
-            except KeyError:
-                hist[c] = 1
        # Return the majority class
        max_nbb = (0, None)
        for c, count in hist.iteritems():
@@ -36,6 +35,7 @@ class kNNb:
        return max_nnb[1]

 class MEC:
+    """Minimum error classifier."""
    def __init__(self, X, c):
        self.n, self.N = X.shape
        self.X, self.c = X, c
@@ -53,6 +53,10 @@ class MEC:
            mu = mean(X, 1)
            Yzm = X - tile(mu[:,newaxis], X.shape[1])
            S = matrix(dot(Yzm, Yzm.T) / (self.n - 1))
+            # Calculate the coefficient needed for the calculation in
+            # classify(). This is just an optimization, because only the
+            # covariance matrix is needed for the coefficient, and not the
+            # vector that is being classified itself.
            coeff = 1 / (S.A**-.5 * (2 * pi)**(self.n / 2))
            self.class_data.append((mu, S, coeff))

@@ -64,9 +68,10 @@ class MEC:
        return self.classes[argmax([i.sum() for i in p])]

 class SVM:
+    """Support vector machine classifier."""
    def __init__(self, X, c):
        self.model = svm_model(svm_problem(c, X.T),
-                pm,svm_parameter(kernel_type=LINEAR))
+                svm_parameter(kernel_type=LINEAR))

    def classify(self, x):
        return self.model.predict(x)
--- a/statred/ass4/k-means.py
+++ b/statred/ass4/k-means.py
-from pylab import loadtxt, array, scatter, figure, show, mean, argmin, append
+from pylab import array, scatter, figure, show, mean, argmin, append
 from random import random, seed
 from sys import argv, exit

@@ -47,11 +47,11 @@ if not pp:
    initial_means = init
 k = int(argv[1])
 if not 1 <= k <= 6:
-    print 'K must be a value from 1-6'
+    print 'K must be a value from 1-6 (we only defined six colors).'
    exit()

 # Generate dataset, add a multiplication of k so that clusters are formed
-n, N = 2, 100
+n, N = 2, 200
 X = array([[100 * random() + 70 for j in range(n)] for i in \
        range(int(N / k + N % k))])
 for c in range(k - 1):