Commit b5d82c8c authored by Taddeüs Kroes's avatar Taddeüs Kroes

Merge branch 'master' of ssh://vo20.nl/home/git/repos/uva

parents 8293e6a3 bae9b637
CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -O0 CFLAGS=-Wall -Wextra -pedantic -std=c99 -D_GNU_SOURCE -g -ggdb -O0
LDFLAGS=-lm LDFLAGS=-lm
PROGS=test main PROGS=test main
......
.PHONY: all clean
all:
clean:
rm -vf *.pyc q*.pdf
from pylab import array, eig, diagflat, dot, sqrt, randn, tile, \ from pylab import array, eig, diagflat, dot, sqrt, randn, tile, \
plot, subplot, axis, figure, clf, savefig plot, subplot, axis, figure, clf, savefig
# The used mu (mean vector) and cov (covariance matrix).
mu = array([[3], mu = array([[3],
[4], [4],
[5], [5],
[6]]) [6]])
cov = array( cov = array(
[[ 3.01602775, 1.02746769, -3.60224613, -2.08792829], [[ 3.01602775, 1.02746769, -3.60224613, -2.08792829],
...@@ -12,10 +13,14 @@ cov = array( ...@@ -12,10 +13,14 @@ cov = array(
[-3.60224613, -3.98616664, 13.04508284, -1.59255406], [-3.60224613, -3.98616664, 13.04508284, -1.59255406],
[-2.08792829, 0.48723704, -1.59255406, 8.28742469]]) [-2.08792829, 0.48723704, -1.59255406, 8.28742469]])
# Samples is the constant `N' which is the total amount of numbers to generate
# according to the normal distribution.
samples = 1000 samples = 1000
vector_size = 4 vector_size = 4
def dataset(): def dataset():
# The covariance matrix is used to transform the generated dataset into a
# multivariant normal distribution dataset.
d, U = eig(cov) d, U = eig(cov)
L = diagflat(d) L = diagflat(d)
A = dot(U, sqrt(L)) A = dot(U, sqrt(L))
...@@ -23,11 +28,13 @@ def dataset(): ...@@ -23,11 +28,13 @@ def dataset():
return dot(A,X) + tile(mu, samples) return dot(A,X) + tile(mu, samples)
if __name__ == '__main__': if __name__ == '__main__':
# Create a n*n grid of subplots and generate a new dataset.
figure(vector_size**2) figure(vector_size**2)
clf() clf()
Y = dataset() Y = dataset()
for i in range(vector_size): for i in range(vector_size):
for j in range(vector_size): for j in range(vector_size):
# Skip the diagonal subplots since those are irrelevant.
if i != j: if i != j:
subplot(vector_size, vector_size, (i+1) + j*vector_size) subplot(vector_size, vector_size, (i+1) + j*vector_size)
plot(Y[i], Y[j], 'x') plot(Y[i], Y[j], 'x')
......
from q21_multivariate import dataset from q21_multivariate import dataset
from numpy import array, mean, tile, newaxis, dot from pylab import array, mean, tile, newaxis, dot, eigvals, \
from pylab import eigvals, diagflat, axis, figure, clf, show, plot, subplot axis, figure, clf, show, plot
def eigenvalues(n): def eigenvalues(n):
"""Return eigenvalues of unbiased estimators for the covariance matrix
Sigma (based on a pseudo-random generated dataset)."""
Y = array([mean(dataset(), 1) for i in range(n)]).T Y = array([mean(dataset(), 1) for i in range(n)]).T
# Sigma = 1 / (n - 1) * Sum for i=1 to n: (x_i - x_mean) T(x_i - x_mean),
# where T(x) is the transpose of `x'. Mu = x_mean and
# Yzm = Sum for i=1 to n: x_i - x_mean.
mu = mean(Y, 1) mu = mean(Y, 1)
Yzm = Y - tile(mu[:,newaxis], n) Yzm = Y - tile(mu[:,newaxis], n)
S = dot(Yzm, Yzm.T) / (n - 1) S = dot(Yzm, Yzm.T) / (n - 1)
...@@ -11,14 +18,17 @@ def eigenvalues(n): ...@@ -11,14 +18,17 @@ def eigenvalues(n):
figure(1) figure(1)
clf() clf()
samples = range(2, 10000, 500)
max_range = 10000
samples = range(2, max_range, 500)
data = [[] for i in range(4)] data = [[] for i in range(4)]
for n in samples: for n in samples:
e = eigenvalues(n) e = eigenvalues(n)
for i in range(4): for i in range(4):
data[i].append(e[i]) data[i].append(e[i])
for i in range(4): for i in range(4):
#subplot(2, 2, i+1)
plot(samples, data[i], 'x') plot(samples, data[i], 'x')
axis([0, 10000, 0., 0.025]) axis([0, max_range, 0., 0.025])
show() show()
from numpy import loadtxt from pylab import loadtxt, figure, plot, subplot, axis, clf, savefig
from pylab import figure, plot, subplot, show, axis, clf
def cnvt(s): # The last column of the data sets is a label, which is used to distinguish the
try: # three groups of data in the data sets. This label should be translated to a
return {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, \ # floating point, or a conversion error will occur (since ``dtype=float'').
'Iris-virginica': 2.0}[s] cnvt_dict = {'Iris-setosa': 0.0, 'Iris-versicolor': 1.0, 'Iris-virginica': 2.0}
except KeyError: data = loadtxt('iris.data', delimiter=',', dtype=float, \
ireturn -1.0 converters={4: lambda s: not s in cnvt_dict and -1.0 or cnvt_dict[s]})
data = loadtxt('iris.data', delimiter=',', dtype=float, converters={4: cnvt}) # Transform the data set into
graph_data = [[[] for i in range(3)] for j in range(16)] graph_data = [[[] for i in range(3)] for j in range(16)]
colors = ['r', 'g', 'b']
figure(16)
clf()
for i in range(4): for i in range(4):
for j in range(4): for j in range(4):
if i != j: if i != j:
for d in data: for d in data:
graph_data[i + j*4][int(d[4])].append((d[i], d[j])); graph_data[i + j*4][int(d[4])].append((d[i], d[j]));
colors = ['r', 'g', 'b']
figure(16)
clf()
for i in range(4): for i in range(4):
for j in range(4): for j in range(4):
if i != j: if i != j:
subplot(4, 4, (i+1) + j*4) subplot(4, 4, (i+1) + j*4)
axis('equal') axis('equal')
# Plot the three data sets.
for c in range(3): for c in range(3):
tmp = zip(*graph_data[i + j*4][c]) tmp = zip(*graph_data[i + j*4][c])
plot(tmp[0], tmp[1], 'x' + colors[c]) plot(tmp[0], tmp[1], 'x' + colors[c])
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment