fork download
  1. import math
  2. import random
  3.  
  4. centroid = lambda dots: tuple(map(lambda seq: sum(seq) / len(seq), zip(*dots)))
  5. distance = lambda a, b: math.sqrt(sum((ai - bi) * (ai - bi) for ai, bi in zip(a, b)))
  6.  
  7. def kmeans(dots, k=None):
  8. n = len(dots)
  9. if k is None:
  10. k = int(math.sqrt(n / 2))
  11. means = random.sample(dots, k)
  12. while True:
  13. clusters = [[] for i in range(k)]
  14. for dot in dots:
  15. distances = [distance(dot, mean) for mean in means]
  16. i = distances.index(min(distances))
  17. clusters[i].append(dot)
  18. newmeans = [centroid(cluster) for cluster in clusters]
  19. if newmeans == means:
  20. break
  21. else:
  22. means = newmeans
  23. return clusters
  24.  
  25. if __name__ == "__main__":
  26. d = random.randint(1, 5)
  27. n = random.randint(10, 100)
  28. dots = [tuple(random.randint(0, 99) for i in range(d)) for j in range(n)]
  29. clusters = kmeans(dots)
  30. for cluster in clusters:
  31. print cluster
  32.  
Success #stdin #stdout 0.04s 5448KB
stdin
Standard input is empty
stdout
[(6, 11), (19, 41), (3, 4), (22, 43), (1, 19), (40, 14), (20, 46), (32, 8), (21, 0), (42, 5), (15, 19), (26, 29), (8, 34), (5, 11), (26, 10), (13, 23), (13, 41), (31, 1), (36, 13), (0, 28), (35, 13), (41, 18)]
[(79, 11), (79, 3), (57, 3), (69, 7), (81, 11), (89, 2)]
[(76, 75), (67, 94), (78, 77), (91, 74), (52, 97), (82, 61), (72, 99), (99, 97), (64, 92), (72, 62), (84, 97), (90, 95), (97, 73)]
[(59, 60), (70, 45), (47, 24), (40, 37), (66, 44), (57, 60), (48, 22), (55, 41), (57, 54), (47, 58)]
[(33, 62), (23, 90), (22, 85), (20, 75), (7, 66), (29, 59), (43, 92), (16, 77), (24, 47), (20, 73), (5, 90), (16, 90), (45, 78), (31, 75), (12, 80), (12, 76)]
[(87, 37), (87, 26), (65, 21), (79, 33), (65, 30), (92, 25), (72, 26), (74, 25), (89, 49)]