Loading W Code...
• Precision = TP / (TP + FP)
• Recall = TP / (TP + FN)
• F1 = 2PR / (P + R)
• AUC: 1 = perfect, 0.5 = random
Concept Level: Beginner
# CONFUSION MATRIX BASICS
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
# Create imbalanced dataset (like fraud detection)
X, y = make_classification(
n_samples=1000, n_features=10,
n_informative=5, n_redundant=2,
weights=[0.95, 0.05], # 95% negative, 5% positive
random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("="*60)
print("IMBALANCED DATASET EXAMPLE")
print("="*60)
print(f"Class distribution: {np.bincount(y)}")
print(f"Positive class (fraud): {sum(y)} ({sum(y)/len(y)*100:.1f}%)")
# Train simple model
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print(f"\nConfusion Matrix:")
print(f" Predicted")
print(f" Neg Pos")
print(f"Actual Neg | {cm[0,0]:4d} {cm[0,1]:4d} | TN, FP")
print(f"Actual Pos | {cm[1,0]:4d} {cm[1,1]:4d} | FN, TP")
# Accuracy (misleading!)
accuracy = (cm[0,0] + cm[1,1]) / cm.sum()
print(f"\nAccuracy: {accuracy:.1%}")
print(f"⚠️ Looks good but may miss most frauds!")
# Visualize
fig, ax = plt.subplots(figsize=(8, 6))
ConfusionMatrixDisplay(cm, display_labels=['Not Fraud', 'Fraud']).plot(ax=ax, cmap='Blues')
plt.title('Confusion Matrix - Fraud Detection')
plt.show()