Commit 512148cf authored by Taddeüs Kroes's avatar Taddeüs Kroes

StatRed ass4: Added some comments.

parent f6eeea9a
from pylab import loadtxt, array, scatter, figure, show, mean, argmin, \ from pylab import loadtxt, array, scatter, figure, show, mean, argmin, \
append, savefig append, savefig
from random import random, seed, randint from random import random, seed
from sys import argv, exit from sys import argv, exit
def init(X, k): def init(X, k):
...@@ -9,18 +9,21 @@ def init(X, k): ...@@ -9,18 +9,21 @@ def init(X, k):
def init_pp(X, k): def init_pp(X, k):
"""Use the k-means++ algorithm to find initial means.""" """Use the k-means++ algorithm to find initial means."""
# Choose first center at random # Choose first mean at random
N = X.shape[0] N = X.shape[0]
indices = [int(N * random())] indices = [int(N * random())]
m = [X[indices[0]]] m = [X[indices[0]]]
# Initial distances
D = [((x - m[0])**2).sum() for x in X] D = [((x - m[0])**2).sum() for x in X]
while len(m) < k: while len(m) < k:
# Find new best mean
best_sum = new = -1 best_sum = new = -1
for i in range(N): for i in range(N):
if i not in indices: if i not in indices:
Dsum = sum([min(D[j], ((X[j] - X[i])**2).sum()) for j in range(N)]) Dsum = sum([min(D[j], ((X[j] - X[i])**2).sum()) for j in range(N)])
if best_sum == -1 or best_sum < Dsum: if best_sum == -1 or best_sum < Dsum:
best_sum, new = Dsum, i best_sum, new = Dsum, i
# Add new mean and update distances
m.append(X[new]) m.append(X[new])
indices.append(new) indices.append(new)
D = [min(D[i], ((X[i] - X[new])**2).sum()) for i in range(N)] D = [min(D[i], ((X[i] - X[new])**2).sum()) for i in range(N)]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment