import math import random centroid = lambda dots: tuple(map(lambda seq: sum(seq) / len(seq), zip(*dots))) distance = lambda a, b: math.sqrt(sum((ai - bi) * (ai - bi) for ai, bi in zip(a, b))) def kmeans(dots, k=None): n = len(dots) if k is None: k = int(math.sqrt(n / 2)) means = random.sample(dots, k) while True: clusters = [[] for i in range(k)] for dot in dots: distances = [distance(dot, mean) for mean in means] i = distances.index(min(distances)) clusters[i].append(dot) newmeans = [centroid(cluster) for cluster in clusters] if newmeans == means: break else: means = newmeans return clusters if __name__ == "__main__": d = random.randint(1, 5) n = random.randint(10, 100) dots = [tuple(random.randint(0, 99) for i in range(d)) for j in range(n)] clusters = kmeans(dots) for cluster in clusters: print cluster
Standard input is empty
[(6, 11), (19, 41), (3, 4), (22, 43), (1, 19), (40, 14), (20, 46), (32, 8), (21, 0), (42, 5), (15, 19), (26, 29), (8, 34), (5, 11), (26, 10), (13, 23), (13, 41), (31, 1), (36, 13), (0, 28), (35, 13), (41, 18)] [(79, 11), (79, 3), (57, 3), (69, 7), (81, 11), (89, 2)] [(76, 75), (67, 94), (78, 77), (91, 74), (52, 97), (82, 61), (72, 99), (99, 97), (64, 92), (72, 62), (84, 97), (90, 95), (97, 73)] [(59, 60), (70, 45), (47, 24), (40, 37), (66, 44), (57, 60), (48, 22), (55, 41), (57, 54), (47, 58)] [(33, 62), (23, 90), (22, 85), (20, 75), (7, 66), (29, 59), (43, 92), (16, 77), (24, 47), (20, 73), (5, 90), (16, 90), (45, 78), (31, 75), (12, 80), (12, 76)] [(87, 37), (87, 26), (65, 21), (79, 33), (65, 30), (92, 25), (72, 26), (74, 25), (89, 49)]