Loading W Code...
6
Topics
sklearn
Implementation
🏡 Indian Context
We predict Bangalore house prices with a mix of useful features (Area, Bedrooms) and junk features (Owner's Zodiac, Wall Color) — Lasso automatically removes the junk!
Problem Statement
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
# Generate simple data with noise
np.random.seed(42)
X = np.random.rand(30, 1) * 4 - 2 # -2 to 2
y = 2 * X.ravel() + np.random.randn(30) * 0.5 # Simple linear + noise
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print("=" * 60)
print("🔴 DEMONSTRATING OVERFITTING")
print("=" * 60)
# Overfit with high-degree polynomial
poly = PolynomialFeatures(degree=15)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)
overfit_model = LinearRegression()
overfit_model.fit(X_train_poly, y_train)
train_r2 = overfit_model.score(X_train_poly, y_train)
test_r2 = overfit_model.score(X_test_poly, y_test)
print(f"\nPolynomial Degree: 15 (Way too complex!)")
print(f"Training R²: {train_r2:.4f} (Almost perfect!)")
print(f"Test R²: {test_r2:.4f} (Terrible!)")
print(f"\n⚠️ Gap of {abs(train_r2 - test_r2):.2f} = OVERFITTING SIGNAL")
# Check coefficient magnitudes
print(f"\n📊 COEFFICIENT ANALYSIS:")
print(f"Max coefficient: {np.max(np.abs(overfit_model.coef_)):.2f}")
print(f"Min coefficient: {np.min(np.abs(overfit_model.coef_)):.2f}")
print("\n💡 Huge coefficients cause unstable predictions!")