Algorithm comparison

Comparison CLARANS vs FastCLARANS vs KMeans

Side-by-side comparison of CLARANS, FastCLARANS and scikit-learn’s KMeans on the same dataset.

 1"""Generate `comparison_clustering.png`.
 2
 3Side-by-side comparison of CLARANS, FastCLARANS and scikit-learn's KMeans.
 4"""
 5from pathlib import Path
 6import time
 7import matplotlib
 8matplotlib.use("Agg")
 9import matplotlib.pyplot as plt
10from sklearn.datasets import make_blobs
11from clarans import CLARANS, FastCLARANS
12from sklearn.cluster import KMeans
13
14
15def main():
16    X, _ = make_blobs(n_samples=500, centers=3, random_state=42)
17    models = [
18        ("CLARANS", CLARANS(n_clusters=3, numlocal=5, random_state=42)),
19        ("FastCLARANS", FastCLARANS(n_clusters=3, numlocal=5, random_state=42)),
20        ("KMeans", KMeans(n_clusters=3, random_state=42)),
21    ]
22
23    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
24
25    for ax, (name, model) in zip(axes, models):
26        t0 = time.perf_counter()
27        model.fit(X)
28        t1 = time.perf_counter()
29        labels = model.labels_
30        centers = getattr(model, "cluster_centers_", None)
31        ax.scatter(X[:, 0], X[:, 1], c=labels, s=20, cmap="tab10", alpha=0.8)
32        if centers is not None:
33            ax.scatter(centers[:, 0], centers[:, 1], c="black", marker="x", s=100, linewidths=2)
34        ax.set_title(f"{name}\n{(t1-t0):.3f}s")
35        ax.set_xticks([])
36        ax.set_yticks([])
37
38    out = "comparison_clustering.png"
39    fig.savefig(out, bbox_inches="tight", dpi=150)
40    print(f"Saved {out}")
41
42
43if __name__ == "__main__":
44    main()