Loading W Code...
Indian Context
Hinglish sentiment analysis, regional language processing, chatbots
Concept Level: Beginner
# INTRODUCTION TO NLP
import numpy as np
print("="*60)
print("NLP: TEXT TO NUMBERS")
print("="*60)
# The fundamental challenge
text = "Machine learning is amazing!"
print(f"\nOriginal text: '{text}'")
# Computers see ASCII codes
ascii_codes = [ord(char) for char in text]
print(f"ASCII codes: {ascii_codes[:10]}...")
# We need meaningful representations
print("\n" + "="*60)
print("NLP PIPELINE EXAMPLE")
print("="*60)
sample_reviews = [
"This movie is amazing! Best film of the year š¬",
"Waste of time. Terrible acting. Don't watch! š",
"Average movie. Nothing special but watchable.",
"Mind-blowing performance by the lead actor! š„"
]
print("\nSample Reviews:")
for i, review in enumerate(sample_reviews, 1):
print(f" {i}. {review}")
# Simple preprocessing
import re
def preprocess(text):
# Lowercase
text = text.lower()
# Remove emojis and special chars
text = re.sub(r'[^a-z\s]', '', text)
# Remove extra spaces
text = ' '.join(text.split())
return text
print("\nAfter Preprocessing:")
for i, review in enumerate(sample_reviews, 1):
print(f" {i}. {preprocess(review)}")
# Why preprocessing matters
print("\nš” Key Insight:")
print(" 'Amazing', 'AMAZING', 'amazing!' ā should all be the same")
print(" Preprocessing normalizes text for consistent analysis")