StatRed: Added comments to code.

parent da8076a2
CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -O0
CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -ggdb -O0
LDFLAGS=-lm
PROGS=test main
......
from pylab import array, eig, diagflat, dot, sqrt, randn, tile, \
plot, subplot, axis, figure, clf, savefig
# The used mu (mean vector) and cov (covariance matrix).
mu = array([[3],
[4],
[5],
[6]])
[4],
[5],
[6]])
cov = array(
[[ 3.01602775, 1.02746769, -3.60224613, -2.08792829],
......@@ -12,10 +13,14 @@ cov = array(
[-3.60224613, -3.98616664, 13.04508284, -1.59255406],
[-2.08792829, 0.48723704, -1.59255406, 8.28742469]])
# Samples is the constant `N' which is the total amount of numbers to generate
# according to the normal distribution.
samples = 1000
vector_size = 4
def dataset():
# The covariance matrix is used to transform the generated dataset into a
# multivariant normal distribution dataset.
d, U = eig(cov)
L = diagflat(d)
A = dot(U, sqrt(L))
......@@ -23,11 +28,13 @@ def dataset():
return dot(A,X) + tile(mu, samples)
if __name__ == '__main__':
# Create a n*n grid of subplots and generate a new dataset.
figure(vector_size**2)
clf()
Y = dataset()
for i in range(vector_size):
for j in range(vector_size):
# Skip the diagonal subplots since those are irrelevant.
if i != j:
subplot(vector_size, vector_size, (i+1) + j*vector_size)
plot(Y[i], Y[j], 'x')
......
from q21_multivariate import dataset
from numpy import array, mean, tile, newaxis, dot
from pylab import eigvals, diagflat, axis, figure, clf, show, plot, subplot
from pylab import eigvals, axis, figure, clf, show, plot
def eigenvalues(n):
Y = array([mean(dataset(), 1) for i in range(n)]).T
......
from numpy import loadtxt
from pylab import figure, plot, subplot, show, axis, clf
from pylab import loadtxt, figure, plot, subplot, axis, clf, savefig
def cnvt(s):
try:
return {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, \
'Iris-virginica': 2.0}[s]
except KeyError:
ireturn -1.0
# The last column of the data sets is a label, which is used to distinguish the
# three groups of data in the data sets. This label should be translated to a
# floating point, or a conversion error will occur (since ``dtype=float'').
cnvt_dict = {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, 'Iris-virginica': 2.0}
data = loadtxt('iris.data', delimiter=',', dtype=float, \
converters={4: lambda s: not s in cnvt_dict and -1.0 or cnvt_dict[s]})
data = loadtxt('iris.data', delimiter=',', dtype=float, converters={4: cnvt})
# Transform the data set into
graph_data = [[[] for i in range(3)] for j in range(16)]
colors = ['r', 'g', 'b']
figure(16)
clf()
for i in range(4):
for j in range(4):
if i != j:
for d in data:
graph_data[i + j*4][int(d[4])].append((d[i], d[j]));
colors = ['r', 'g', 'b']
figure(16)
clf()
for i in range(4):
for j in range(4):
if i != j:
subplot(4, 4, (i+1) + j*4)
axis('equal')
# Plot the three data sets.
for c in range(3):
tmp = zip(*graph_data[i + j*4][c])
plot(tmp[0], tmp[1], 'x' + colors[c])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment