Commit b5d82c8c authored by Taddeüs Kroes's avatar Taddeüs Kroes

Merge branch 'master' of ssh://vo20.nl/home/git/repos/uva

parents 8293e6a3 bae9b637
CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -O0
CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -ggdb -O0
LDFLAGS=-lm
PROGS=test main
......
.PHONY: all clean
all:
clean:
rm -vf *.pyc q*.pdf
from pylab import array, eig, diagflat, dot, sqrt, randn, tile, \
plot, subplot, axis, figure, clf, savefig
# The used mu (mean vector) and cov (covariance matrix).
mu = array([[3],
[4],
[5],
......@@ -12,10 +13,14 @@ cov = array(
[-3.60224613, -3.98616664, 13.04508284, -1.59255406],
[-2.08792829, 0.48723704, -1.59255406, 8.28742469]])
# Samples is the constant `N' which is the total amount of numbers to generate
# according to the normal distribution.
samples = 1000
vector_size = 4
def dataset():
# The covariance matrix is used to transform the generated dataset into a
# multivariant normal distribution dataset.
d, U = eig(cov)
L = diagflat(d)
A = dot(U, sqrt(L))
......@@ -23,11 +28,13 @@ def dataset():
return dot(A,X) + tile(mu, samples)
if __name__ == '__main__':
# Create a n*n grid of subplots and generate a new dataset.
figure(vector_size**2)
clf()
Y = dataset()
for i in range(vector_size):
for j in range(vector_size):
# Skip the diagonal subplots since those are irrelevant.
if i != j:
subplot(vector_size, vector_size, (i+1) + j*vector_size)
plot(Y[i], Y[j], 'x')
......
from q21_multivariate import dataset
from numpy import array, mean, tile, newaxis, dot
from pylab import eigvals, diagflat, axis, figure, clf, show, plot, subplot
from pylab import array, mean, tile, newaxis, dot, eigvals, \
axis, figure, clf, show, plot
def eigenvalues(n):
"""Return eigenvalues of unbiased estimators for the covariance matrix
Sigma (based on a pseudo-random generated dataset)."""
Y = array([mean(dataset(), 1) for i in range(n)]).T
# Sigma = 1 / (n - 1) * Sum for i=1 to n: (x_i - x_mean) T(x_i - x_mean),
# where T(x) is the transpose of `x'. Mu = x_mean and
# Yzm = Sum for i=1 to n: x_i - x_mean.
mu = mean(Y, 1)
Yzm = Y - tile(mu[:,newaxis], n)
S = dot(Yzm, Yzm.T) / (n - 1)
......@@ -11,14 +18,17 @@ def eigenvalues(n):
figure(1)
clf()
samples = range(2, 10000, 500)
max_range = 10000
samples = range(2, max_range, 500)
data = [[] for i in range(4)]
for n in samples:
e = eigenvalues(n)
for i in range(4):
data[i].append(e[i])
for i in range(4):
#subplot(2, 2, i+1)
plot(samples, data[i], 'x')
axis([0, 10000, 0., 0.025])
axis([0, max_range, 0., 0.025])
show()
from numpy import loadtxt
from pylab import figure, plot, subplot, show, axis, clf
from pylab import loadtxt, figure, plot, subplot, axis, clf, savefig
def cnvt(s):
try:
return {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, \
'Iris-virginica': 2.0}[s]
except KeyError:
ireturn -1.0
# The last column of the data sets is a label, which is used to distinguish the
# three groups of data in the data sets. This label should be translated to a
# floating point, or a conversion error will occur (since ``dtype=float'').
cnvt_dict = {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, 'Iris-virginica': 2.0}
data = loadtxt('iris.data', delimiter=',', dtype=float, \
converters={4: lambda s: not s in cnvt_dict and -1.0 or cnvt_dict[s]})
data = loadtxt('iris.data', delimiter=',', dtype=float, converters={4: cnvt})
# Transform the data set into
graph_data = [[[] for i in range(3)] for j in range(16)]
colors = ['r', 'g', 'b']
figure(16)
clf()
for i in range(4):
for j in range(4):
if i != j:
for d in data:
graph_data[i + j*4][int(d[4])].append((d[i], d[j]));
colors = ['r', 'g', 'b']
figure(16)
clf()
for i in range(4):
for j in range(4):
if i != j:
subplot(4, 4, (i+1) + j*4)
axis('equal')
# Plot the three data sets.
for c in range(3):
tmp = zip(*graph_data[i + j*4][c])
plot(tmp[0], tmp[1], 'x' + colors[c])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment