Loading...
Loading...
ML: System jo explicitly programmed nahi hai, data se patterns seekh ke predictions karta hai.
Types:
Supervised Learning: Labeled examples → predict output
Unsupervised Learning: Unlabeled data → find patterns
Reinforcement Learning: Agent + environment + rewards
Semi-supervised: Few labeled + many unlabeled
Self-supervised: Labels generated from data itself (BERT, GPT)
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# y = w₀ + w₁x₁ + w₂x₂ + ... + wₙxₙ
# Cost: MSE = (1/n)Σ(yᵢ - ŷᵢ)²
X, y = load_housing_data()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
print(f"Coefficients: {model.coef_}")
Regularization:
from sklearn.linear_model import Ridge, Lasso, ElasticNet
# L2 regularization (Ridge): penalize w²
ridge = Ridge(alpha=1.0) # α = regularization strength
# L1 regularization (Lasso): penalize |w| → sparse solution
lasso = Lasso(alpha=0.1) # feature selection!
# ElasticNet: L1 + L2 combo
elastic = ElasticNet(alpha=0.1, l1_ratio=0.5)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
# sigmoid(z) = 1/(1+e^(-z)) → P(y=1|x)
# Decision boundary: P > 0.5 → class 1
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
# Shows: precision, recall, f1-score, support for each class
cm = confusion_matrix(y_test, y_pred)
# [[TN, FP], [FN, TP]]
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# Decision Tree: splits on best feature (Gini/Entropy)
dt = DecisionTreeClassifier(max_depth=5, min_samples_leaf=10)
dt.fit(X_train, y_train)
# Random Forest: bagging of decision trees
rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf.fit(X_train, y_train)
print(f"Feature importances: {rf.feature_importances_}")
# Gradient Boosting (XGBoost style)
gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
gb.fit(X_train, y_train)
from sklearn.svm import SVC
# Finds hyperplane with maximum margin
# Kernel trick: project to higher dimension
svm = SVC(kernel='rbf', C=1.0, gamma='scale')
# kernel: 'linear', 'rbf', 'poly', 'sigmoid'
# C: regularization (smaller → smoother boundary)
# gamma: RBF kernel width (larger → overfitting)
svm.fit(X_train, y_train)
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
# Scale data (important for distance-based)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Find optimal K using Elbow method
inertias = []
for k in range(1, 11):
km = KMeans(n_clusters=k, random_state=42, n_init=10)
km.fit(X_scaled)
inertias.append(km.inertia_)
# Fit final model
km = KMeans(n_clusters=3, random_state=42, n_init=10)
labels = km.fit_predict(X_scaled)
centers = km.cluster_centers_
from sklearn.decomposition import PCA
# Reduce high-dimensional data
pca = PCA(n_components=2) # or n_components=0.95 → 95% variance
X_reduced = pca.fit_transform(X_scaled)
print(f"Explained variance ratio: {pca.explained_variance_ratio_}")
print(f"Total variance explained: {pca.explained_variance_ratio_.sum():.3f}")
import torch
import torch.nn as nn
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_sizes, output_size):
super().__init__()
layers = []
prev = input_size
for h in hidden_sizes:
layers.extend([nn.Linear(prev, h), nn.ReLU(), nn.Dropout(0.3)])
prev = h
layers.append(nn.Linear(prev, output_size))
self.network = nn.Sequential(*layers)
def forward(self, x):
return self.network(x)
# Training
model = NeuralNet(784, [256, 128], 10) # MNIST: 28x28=784 → 10 digits
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
for epoch in range(50):
for X_batch, y_batch in dataloader:
optimizer.zero_grad()
output = model(X_batch)
loss = criterion(output, y_batch)
loss.backward() # backpropagation
optimizer.step() # gradient descent step
Activation Functions:
ReLU: f(x) = max(0,x) → most common hidden layers
Sigmoid: 1/(1+e^-x) → binary output (0-1)
Softmax: eˣⁱ/Σeˣʲ → multiclass output (probabilities)
Tanh: (eˣ-e⁻ˣ)/(eˣ+e⁻ˣ) → RNN hidden states
Leaky ReLU: max(0.01x, x) → fixes dying ReLU
import torch.nn as nn
class CNN(nn.Module):
def __init__(self):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1), # 3ch→32ch
nn.ReLU(),
nn.MaxPool2d(2), # 224→112
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2), # 112→56
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2), # 56→28
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(128*28*28, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, 10), # 10 classes
)
def forward(self, x):
return self.classifier(self.features(x))
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
f1_score, roc_auc_score, roc_curve)
from sklearn.model_selection import cross_val_score, KFold
# Classification metrics
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted')
rec = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
auc = roc_auc_score(y_test, y_prob[:, 1]) # binary only
print(f"Acc:{acc:.3f} Prec:{prec:.3f} Rec:{rec:.3f} F1:{f1:.3f} AUC:{auc:.3f}")
# Cross validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=kf, scoring='f1_weighted')
print(f"CV F1: {scores.mean():.3f} ± {scores.std():.3f}")
# Hyperparameter tuning
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto', 0.1]}
grid = GridSearchCV(SVC(), param_grid, cv=5, scoring='f1_weighted')
grid.fit(X_train, y_train)
print(f"Best params: {grid.best_params_}")
Q: Bagging aur Boosting mein kya fark hai? A: Bagging: parallel independent models, average/vote (Random Forest). Reduces variance. Boosting: sequential, each model corrects previous errors (AdaBoost, XGBoost, LightGBM). Reduces bias. Boosting generally better accuracy, more prone to overfitting.
Q: Transformer architecture kya hai? A: Self-attention mechanism — sequence ke elements apne beech relationships compute karte hain. Encoder-decoder structure. Positional encoding (no recurrence). BERT, GPT, T5 sab Transformer based. NLP mein revolution.
Q: Feature engineering kya hoti hai? A: Raw data se meaningful features banana — domain knowledge use karke. One-hot encoding (categorical), log transform (skewed), polynomial features, interaction terms, date parts extraction. Often more impactful than model selection.
Complete Machine Learning notes for B.Tech IT Sem 7 — Supervised/Unsupervised learning, Regression, Classification, Neural Networks, CNN, NLP basics, Model evaluation with Python code.
48 pages · 2.4 MB · Updated 2026-03-11
Supervised: labeled data pe train (input-output pairs) — classification, regression. Unsupervised: unlabeled data, patterns find karo — clustering (K-means), dimensionality reduction (PCA). Semi-supervised: kuch labels.
Model training data bahut acche se seekhta hai lekin test data pe poor performance — generalize nahi karta. Prevention: more data, regularization (L1/L2), dropout, early stopping, cross-validation, simpler model.
Loss function minimize karne ka iterative algorithm. θ = θ - α∇L(θ). α = learning rate. Variants: Batch GD (all data), SGD (one sample), Mini-batch GD (small batch). Adam, RMSProp — adaptive learning rate optimizers.
Precision = TP/(TP+FP): predicted positive mein kitne actually positive. Recall = TP/(TP+FN): actual positives mein kitne detect kiye. Spam filter: high precision (no false positives). Cancer: high recall (no missed cases).
Convolutional layers local patterns detect karte hain — edges, textures, shapes. Weight sharing — same filter poori image pe apply hoti hai (translation invariance). Pooling — spatial downsampling. Fully connected layers — classification.
Digital Electronics — Complete Notes IT Sem 1
Digital Electronics
Java Programming — Complete Notes for B.Tech IT Semester 3
Java Programming
Web Technologies — HTML, CSS, JavaScript, Node.js Complete Notes
Web Technologies
Cloud Computing Notes — B.Tech IT Sem 5
Cloud Computing
Information Security Notes — B.Tech IT Sem 6
Information Security
Your feedback helps us improve notes and tutorials.