Commit 8abd27f0 authored by Taddeüs Kroes's avatar Taddeüs Kroes

StatRed ass4: Implemented part 1 (with very nice dataset generator :)).

parent 0b97547d
......@@ -69,10 +69,10 @@ for i in range(4):
facecolor=[1,1,1]*len(T))
scatter(XC[T, i], XC[T, j], s=30, marker='+',
edgecolor=color[c.astype(int) - 1])
from pylab import show
show()
#if method == 'knnb':
# filename = 'knnb-%d.pdf' % k
#else:
# filename = '%s.pdf' % method
#savefig(filename)
#from pylab import show
#show()
if method == 'knnb':
filename = 'knnb-%d.pdf' % k
else:
filename = '%s.pdf' % method
savefig(filename)
from pylab import loadtxt, array, scatter, figure, show, mean, argmin, \
ones, append, savefig
from random import random, seed
from sys import argv, exit
def init(X, k):
return X[:k]
def init_pp(X, k):
return X[:k]
if len(argv) == 3:
if argv[2] != 'pp':
print 'Usage: python %s K [ "pp" ]' % argv[0]
exit()
print 'Using k-means++'
initial_means = init_pp
else:
print 'Using normal k-means'
initial_means = init
k = int(argv[1])
# Generate dataset
seed(700)
n, N = 2, 100
X = array([[100 * random() for j in range(n)] for i in range(int(N / k + N % k))])
for c in range(k - 1):
d = (k + 1) * 100 * random()
X = append(X, [[100 * random() + d for j in range(n)] for i in \
range(int(N / k))], 0)
M = initial_means(X, k)
Mp = M - 1
steps = 0
# Divide in clusters
while (Mp - M).any():
Mp = M
clusters = [[] for i in range(k)]
# Assignment step
for x in X:
clusters[argmin([((x - m)**2).sum() for m in M])].append(x)
# Update step
M = array([mean(c, 0) for c in clusters])
steps += 1
print 'Completed in %d steps' % steps
# Plot clusters
figure(1)
colors = [[1,0,0], [0,1,0], [0,0,1]]
for i in range(k):
c = array(clusters[i])
scatter(c[:,0], c[:,1], c=colors[i])
savefig('k-means.pdf')
show()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment