StatRed: Added comments to code.

96dd3447 · Sander Mathijs van Veen · da8076a2 · 96dd3447 · 96dd3447 · 96dd3447
Commit 96dd3447 authored Apr 12, 2011 by Sander Mathijs van Veen
4 changed files
--- a/modsim/ass3/Makefile
+++ b/modsim/ass3/Makefile
-CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -O0
+CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -ggdb -O0
 LDFLAGS=-lm

 PROGS=test main

--- a/statred/ass1/q21_multivariate.py
+++ b/statred/ass1/q21_multivariate.py
 from pylab import array, eig, diagflat, dot, sqrt, randn, tile, \
        plot, subplot, axis, figure, clf, savefig

+# The used mu (mean vector) and cov (covariance matrix).
 mu = array([[3],
            [4],
            [5],
@@ -12,10 +13,14 @@ cov  = array(
     [-3.60224613, -3.98616664, 13.04508284, -1.59255406],
     [-2.08792829,  0.48723704, -1.59255406,  8.28742469]])

+# Samples is the constant `N' which is the total amount of numbers to generate
+# according to the normal distribution.
 samples = 1000
 vector_size = 4

 def dataset():
+    # The covariance matrix is used to transform the generated dataset into a
+    # multivariant normal distribution dataset.
    d, U = eig(cov)
    L = diagflat(d)
    A = dot(U, sqrt(L))
@@ -23,11 +28,13 @@ def dataset():
    return dot(A,X) + tile(mu, samples)

 if __name__ == '__main__':
+    # Create a n*n grid of subplots and generate a new dataset.
    figure(vector_size**2)
    clf()
    Y = dataset()
    for i in range(vector_size):
        for j in range(vector_size):
+            # Skip the diagonal subplots since those are irrelevant.
            if i != j:
                subplot(vector_size, vector_size, (i+1) + j*vector_size)
                plot(Y[i], Y[j], 'x')

--- a/statred/ass1/q22_estimate.py
+++ b/statred/ass1/q22_estimate.py
 from q21_multivariate import dataset
 from numpy import array, mean, tile, newaxis, dot
-from pylab import eigvals, diagflat, axis, figure, clf, show, plot, subplot
+from pylab import eigvals, axis, figure, clf, show, plot

 def eigenvalues(n):
    Y = array([mean(dataset(), 1) for i in range(n)]).T

--- a/statred/ass1/q23_iris.py
+++ b/statred/ass1/q23_iris.py
-from numpy import loadtxt
-from pylab import figure, plot, subplot, show, axis, clf
+from pylab import loadtxt, figure, plot, subplot, axis, clf, savefig

-def cnvt(s):
-    try:
-        return {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, \
-                'Iris-virginica': 2.0}[s]
-    except KeyError:
-        ireturn -1.0
+# The last column of the data sets is a label, which is used to distinguish the
+# three groups of data in the data sets. This label should be translated to a
+# floating point, or a conversion error will occur (since ``dtype=float'').
+cnvt_dict = {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, 'Iris-virginica': 2.0}
+data = loadtxt('iris.data', delimiter=',', dtype=float, \
+        converters={4: lambda s: not s in cnvt_dict and -1.0 or cnvt_dict[s]})

-data = loadtxt('iris.data', delimiter=',', dtype=float, converters={4: cnvt})
+# Transform the data set into
 graph_data = [[[] for i in range(3)] for j in range(16)]
-colors = ['r', 'g', 'b']
-figure(16)
-clf()
 for i in range(4):
    for j in range(4):
        if i != j:
            for d in data:
                graph_data[i + j*4][int(d[4])].append((d[i], d[j]));
+
+colors = ['r', 'g', 'b']
+figure(16)
+clf()
+
 for i in range(4):
    for j in range(4):
        if i != j:
            subplot(4, 4, (i+1) + j*4)
            axis('equal')
+            # Plot the three data sets.
            for c in range(3):
                tmp = zip(*graph_data[i + j*4][c])
                plot(tmp[0], tmp[1], 'x' + colors[c])