Loading W Code...
7
Topics
sklearn
Implementation
Indian Context
Customer segmentation for Flipkart, Zomato restaurant grouping, Ola driver clustering
Concept Level: Beginner
# CLUSTERING: THE UNSUPERVISED APPROACH
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
# Generate sample data - like customer spending patterns
np.random.seed(42)
X, true_labels = make_blobs(
n_samples=300,
centers=4, # 4 hidden groups
cluster_std=0.8,
random_state=42
)
# Visualize the data WITHOUT labels
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X[:, 0], X[:, 1], c='gray', alpha=0.6, edgecolor='black')
plt.title('Raw Data (No Labels)\nCan you see the clusters?')
plt.xlabel('Feature 1 (e.g., Annual Spending)')
plt.ylabel('Feature 2 (e.g., Frequency)')
# Show with true labels (what clustering tries to find)
plt.subplot(1, 2, 2)
plt.scatter(X[:, 0], X[:, 1], c=true_labels, cmap='viridis',
edgecolor='black', alpha=0.7)
plt.title('True Clusters (Hidden Structure)\nClustering reveals this!')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.tight_layout()
plt.show()
# Calculate cluster properties
print("\n" + "="*50)
print("CLUSTERING GOAL")
print("="*50)
print("✅ Minimize INTRA-cluster distance (tight groups)")
print("✅ Maximize INTER-cluster distance (well-separated)")