StatRed: Added comments to code.

parent da8076a2
CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -O0 CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -ggdb -O0
LDFLAGS=-lm LDFLAGS=-lm
PROGS=test main PROGS=test main
......
from pylab import array, eig, diagflat, dot, sqrt, randn, tile, \ from pylab import array, eig, diagflat, dot, sqrt, randn, tile, \
plot, subplot, axis, figure, clf, savefig plot, subplot, axis, figure, clf, savefig
# The used mu (mean vector) and cov (covariance matrix).
mu = array([[3], mu = array([[3],
[4], [4],
[5], [5],
[6]]) [6]])
cov = array( cov = array(
[[ 3.01602775, 1.02746769, -3.60224613, -2.08792829], [[ 3.01602775, 1.02746769, -3.60224613, -2.08792829],
...@@ -12,10 +13,14 @@ cov = array( ...@@ -12,10 +13,14 @@ cov = array(
[-3.60224613, -3.98616664, 13.04508284, -1.59255406], [-3.60224613, -3.98616664, 13.04508284, -1.59255406],
[-2.08792829, 0.48723704, -1.59255406, 8.28742469]]) [-2.08792829, 0.48723704, -1.59255406, 8.28742469]])
# Samples is the constant `N' which is the total amount of numbers to generate
# according to the normal distribution.
samples = 1000 samples = 1000
vector_size = 4 vector_size = 4
def dataset(): def dataset():
# The covariance matrix is used to transform the generated dataset into a
# multivariant normal distribution dataset.
d, U = eig(cov) d, U = eig(cov)
L = diagflat(d) L = diagflat(d)
A = dot(U, sqrt(L)) A = dot(U, sqrt(L))
...@@ -23,11 +28,13 @@ def dataset(): ...@@ -23,11 +28,13 @@ def dataset():
return dot(A,X) + tile(mu, samples) return dot(A,X) + tile(mu, samples)
if __name__ == '__main__': if __name__ == '__main__':
# Create a n*n grid of subplots and generate a new dataset.
figure(vector_size**2) figure(vector_size**2)
clf() clf()
Y = dataset() Y = dataset()
for i in range(vector_size): for i in range(vector_size):
for j in range(vector_size): for j in range(vector_size):
# Skip the diagonal subplots since those are irrelevant.
if i != j: if i != j:
subplot(vector_size, vector_size, (i+1) + j*vector_size) subplot(vector_size, vector_size, (i+1) + j*vector_size)
plot(Y[i], Y[j], 'x') plot(Y[i], Y[j], 'x')
......
from q21_multivariate import dataset from q21_multivariate import dataset
from numpy import array, mean, tile, newaxis, dot from numpy import array, mean, tile, newaxis, dot
from pylab import eigvals, diagflat, axis, figure, clf, show, plot, subplot from pylab import eigvals, axis, figure, clf, show, plot
def eigenvalues(n): def eigenvalues(n):
Y = array([mean(dataset(), 1) for i in range(n)]).T Y = array([mean(dataset(), 1) for i in range(n)]).T
......
from numpy import loadtxt from pylab import loadtxt, figure, plot, subplot, axis, clf, savefig
from pylab import figure, plot, subplot, show, axis, clf
def cnvt(s): # The last column of the data sets is a label, which is used to distinguish the
try: # three groups of data in the data sets. This label should be translated to a
return {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, \ # floating point, or a conversion error will occur (since ``dtype=float'').
'Iris-virginica': 2.0}[s] cnvt_dict = {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, 'Iris-virginica': 2.0}
except KeyError: data = loadtxt('iris.data', delimiter=',', dtype=float, \
ireturn -1.0 converters={4: lambda s: not s in cnvt_dict and -1.0 or cnvt_dict[s]})
data = loadtxt('iris.data', delimiter=',', dtype=float, converters={4: cnvt}) # Transform the data set into
graph_data = [[[] for i in range(3)] for j in range(16)] graph_data = [[[] for i in range(3)] for j in range(16)]
colors = ['r', 'g', 'b']
figure(16)
clf()
for i in range(4): for i in range(4):
for j in range(4): for j in range(4):
if i != j: if i != j:
for d in data: for d in data:
graph_data[i + j*4][int(d[4])].append((d[i], d[j])); graph_data[i + j*4][int(d[4])].append((d[i], d[j]));
colors = ['r', 'g', 'b']
figure(16)
clf()
for i in range(4): for i in range(4):
for j in range(4): for j in range(4):
if i != j: if i != j:
subplot(4, 4, (i+1) + j*4) subplot(4, 4, (i+1) + j*4)
axis('equal') axis('equal')
# Plot the three data sets.
for c in range(3): for c in range(3):
tmp = zip(*graph_data[i + j*4][c]) tmp = zip(*graph_data[i + j*4][c])
plot(tmp[0], tmp[1], 'x' + colors[c]) plot(tmp[0], tmp[1], 'x' + colors[c])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment