import numpy as np
class KMeans:
def __init__(self, n_clusters, max_iter=300):
self.n_clusters = n_clusters
self.max_iter = max_iter
self.centroids = []
def fit(self, X):
# Initialize centroids randomly
self.centroids = X[np.random.choice(X.shape[0], self.n_clusters, replace=False)]
for _ in range(self.max_iter):
# Assign each data point to the nearest centroid
labels = self._assign_clusters(X)
# Update centroids based on the mean of the points in each cluster
new_centroids = []
for i in range(self.n_clusters):
cluster_points = X[labels == i]
if len(cluster_points) > 0:
new_centroids.append(np.mean(cluster_points, axis=0))
else:
new_centroids.append(self.centroids[i]) # Keep the centroid unchanged if the cluster is empty
new_centroids = np.array(new_centroids)
# Check for convergence
if np.allclose(self.centroids, new_centroids):
break
self.centroids = new_centroids
def _assign_clusters(self, X):
distances = np.linalg.norm(X[:, np.newaxis] - self.centroids, axis=2)
return np.argmin(distances, axis=1)
# Example usage:
# Generate some random data
np.random.seed(42)
data
= np.
random.
rand(100, 2)
# Initialize and fit KMeans
kmeans = KMeans(n_clusters=3)
kmeans.fit(data)
# Get the cluster centroids
centroids = kmeans.centroids
print("Cluster centroids:")
print(centroids)
aW1wb3J0IG51bXB5IGFzIG5wCgpjbGFzcyBLTWVhbnM6CiAgICBkZWYgX19pbml0X18oc2VsZiwgbl9jbHVzdGVycywgbWF4X2l0ZXI9MzAwKToKICAgICAgICBzZWxmLm5fY2x1c3RlcnMgPSBuX2NsdXN0ZXJzCiAgICAgICAgc2VsZi5tYXhfaXRlciA9IG1heF9pdGVyCiAgICAgICAgc2VsZi5jZW50cm9pZHMgPSBbXQoKICAgIGRlZiBmaXQoc2VsZiwgWCk6CiAgICAgICAgIyBJbml0aWFsaXplIGNlbnRyb2lkcyByYW5kb21seQogICAgICAgIHNlbGYuY2VudHJvaWRzID0gWFtucC5yYW5kb20uY2hvaWNlKFguc2hhcGVbMF0sIHNlbGYubl9jbHVzdGVycywgcmVwbGFjZT1GYWxzZSldCgogICAgICAgIGZvciBfIGluIHJhbmdlKHNlbGYubWF4X2l0ZXIpOgogICAgICAgICAgICAjIEFzc2lnbiBlYWNoIGRhdGEgcG9pbnQgdG8gdGhlIG5lYXJlc3QgY2VudHJvaWQKICAgICAgICAgICAgbGFiZWxzID0gc2VsZi5fYXNzaWduX2NsdXN0ZXJzKFgpCgogICAgICAgICAgICAjIFVwZGF0ZSBjZW50cm9pZHMgYmFzZWQgb24gdGhlIG1lYW4gb2YgdGhlIHBvaW50cyBpbiBlYWNoIGNsdXN0ZXIKICAgICAgICAgICAgbmV3X2NlbnRyb2lkcyA9IFtdCiAgICAgICAgICAgIGZvciBpIGluIHJhbmdlKHNlbGYubl9jbHVzdGVycyk6CiAgICAgICAgICAgICAgICBjbHVzdGVyX3BvaW50cyA9IFhbbGFiZWxzID09IGldCiAgICAgICAgICAgICAgICBpZiBsZW4oY2x1c3Rlcl9wb2ludHMpID4gMDoKICAgICAgICAgICAgICAgICAgICBuZXdfY2VudHJvaWRzLmFwcGVuZChucC5tZWFuKGNsdXN0ZXJfcG9pbnRzLCBheGlzPTApKQogICAgICAgICAgICAgICAgZWxzZToKICAgICAgICAgICAgICAgICAgICBuZXdfY2VudHJvaWRzLmFwcGVuZChzZWxmLmNlbnRyb2lkc1tpXSkgICMgS2VlcCB0aGUgY2VudHJvaWQgdW5jaGFuZ2VkIGlmIHRoZSBjbHVzdGVyIGlzIGVtcHR5CiAgICAgICAgICAgIG5ld19jZW50cm9pZHMgPSBucC5hcnJheShuZXdfY2VudHJvaWRzKQoKICAgICAgICAgICAgIyBDaGVjayBmb3IgY29udmVyZ2VuY2UKICAgICAgICAgICAgaWYgbnAuYWxsY2xvc2Uoc2VsZi5jZW50cm9pZHMsIG5ld19jZW50cm9pZHMpOgogICAgICAgICAgICAgICAgYnJlYWsKCiAgICAgICAgICAgIHNlbGYuY2VudHJvaWRzID0gbmV3X2NlbnRyb2lkcwoKICAgIGRlZiBfYXNzaWduX2NsdXN0ZXJzKHNlbGYsIFgpOgogICAgICAgIGRpc3RhbmNlcyA9IG5wLmxpbmFsZy5ub3JtKFhbOiwgbnAubmV3YXhpc10gLSBzZWxmLmNlbnRyb2lkcywgYXhpcz0yKQogICAgICAgIHJldHVybiBucC5hcmdtaW4oZGlzdGFuY2VzLCBheGlzPTEpCgojIEV4YW1wbGUgdXNhZ2U6CiMgR2VuZXJhdGUgc29tZSByYW5kb20gZGF0YQpucC5yYW5kb20uc2VlZCg0MikKZGF0YSA9IG5wLnJhbmRvbS5yYW5kKDEwMCwgMikKCiMgSW5pdGlhbGl6ZSBhbmQgZml0IEtNZWFucwprbWVhbnMgPSBLTWVhbnMobl9jbHVzdGVycz0zKQprbWVhbnMuZml0KGRhdGEpCgojIEdldCB0aGUgY2x1c3RlciBjZW50cm9pZHMKY2VudHJvaWRzID0ga21lYW5zLmNlbnRyb2lkcwpwcmludCgiQ2x1c3RlciBjZW50cm9pZHM6IikKcHJpbnQoY2VudHJvaWRzKQo=