fork download
  1. import numpy as np
  2.  
  3. class KMeans:
  4. def __init__(self, n_clusters, max_iter=300):
  5. self.n_clusters = n_clusters
  6. self.max_iter = max_iter
  7. self.centroids = []
  8.  
  9. def fit(self, X):
  10. # Initialize centroids randomly
  11. self.centroids = X[np.random.choice(X.shape[0], self.n_clusters, replace=False)]
  12.  
  13. for _ in range(self.max_iter):
  14. # Assign each data point to the nearest centroid
  15. labels = self._assign_clusters(X)
  16.  
  17. # Update centroids based on the mean of the points in each cluster
  18. new_centroids = []
  19. for i in range(self.n_clusters):
  20. cluster_points = X[labels == i]
  21. if len(cluster_points) > 0:
  22. new_centroids.append(np.mean(cluster_points, axis=0))
  23. else:
  24. new_centroids.append(self.centroids[i]) # Keep the centroid unchanged if the cluster is empty
  25. new_centroids = np.array(new_centroids)
  26.  
  27. # Check for convergence
  28. if np.allclose(self.centroids, new_centroids):
  29. break
  30.  
  31. self.centroids = new_centroids
  32.  
  33. def _assign_clusters(self, X):
  34. distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)
  35. return np.argmin(distances, axis=1)
  36.  
  37. # Example usage:
  38. # Generate some random data
  39. np.random.seed(42)
  40. data = np.random.rand(100, 2)
  41.  
  42. # Initialize and fit KMeans
  43. kmeans = KMeans(n_clusters=3)
  44. kmeans.fit(data)
  45.  
  46. # Get the cluster centroids
  47. centroids = kmeans.centroids
  48. print("Cluster centroids:")
  49. print(centroids)
  50.  
Success #stdin #stdout 0.25s 28892KB
stdin
Standard input is empty
stdout
Cluster centroids:
[[0.8039633  0.57026999]
 [0.18520943 0.72228065]
 [0.36376248 0.20008043]]