1"""
2Norms, Distance Metrics, and Regularization
3
4Demonstrates:
5- Lp norms visualization (L1, L2, L∞)
6- Distance metrics (Euclidean, Manhattan, Cosine, Mahalanobis)
7- L1 vs L2 regularization effects on linear regression
8- Sparsity demonstration
9- ML applications: feature scaling, regularization
10
11Dependencies: numpy, matplotlib, sklearn
12"""
13
14import numpy as np
15import matplotlib.pyplot as plt
16from sklearn.linear_model import Ridge, Lasso, LinearRegression
17from sklearn.preprocessing import StandardScaler
18from sklearn.metrics.pairwise import cosine_similarity
19
20
21def demonstrate_lp_norms():
22 """Demonstrate L1, L2, and L∞ norms"""
23 print("=" * 60)
24 print("Lp NORMS")
25 print("=" * 60)
26
27 print("\nLp norm: ||x||_p = (Σ|x_i|^p)^(1/p)")
28
29 x = np.array([3, -4, 0, 2])
30
31 # L1 norm (Manhattan): sum of absolute values
32 l1_norm = np.linalg.norm(x, ord=1)
33 l1_manual = np.sum(np.abs(x))
34
35 # L2 norm (Euclidean): square root of sum of squares
36 l2_norm = np.linalg.norm(x, ord=2)
37 l2_manual = np.sqrt(np.sum(x**2))
38
39 # L∞ norm (Maximum): maximum absolute value
40 linf_norm = np.linalg.norm(x, ord=np.inf)
41 linf_manual = np.max(np.abs(x))
42
43 print(f"\nVector x = {x}")
44 print(f"\nL1 norm (Manhattan): ||x||_1 = {l1_norm:.4f} (manual: {l1_manual:.4f})")
45 print(f"L2 norm (Euclidean): ||x||_2 = {l2_norm:.4f} (manual: {l2_manual:.4f})")
46 print(f"L∞ norm (Maximum): ||x||_∞ = {linf_norm:.4f} (manual: {linf_manual:.4f})")
47
48 # Properties
49 print("\n--- Norm Properties ---")
50 print("1. Non-negativity: ||x|| ≥ 0")
51 print("2. Definiteness: ||x|| = 0 iff x = 0")
52 print("3. Homogeneity: ||αx|| = |α| · ||x||")
53 print("4. Triangle inequality: ||x + y|| ≤ ||x|| + ||y||")
54
55 # Verify triangle inequality
56 y = np.array([1, 2, -1, 3])
57 lhs = np.linalg.norm(x + y, ord=2)
58 rhs = np.linalg.norm(x, ord=2) + np.linalg.norm(y, ord=2)
59 print(f"\nTriangle inequality verification (L2):")
60 print(f"||x + y|| = {lhs:.4f}")
61 print(f"||x|| + ||y|| = {rhs:.4f}")
62 print(f"||x + y|| ≤ ||x|| + ||y||: {lhs <= rhs}")
63
64
65def visualize_unit_balls():
66 """Visualize unit balls for different norms"""
67 print("\n" + "=" * 60)
68 print("VISUALIZING UNIT BALLS")
69 print("=" * 60)
70
71 print("\nUnit ball: {x : ||x||_p ≤ 1}")
72
73 fig, axes = plt.subplots(1, 3, figsize=(15, 5))
74
75 # Generate points
76 theta = np.linspace(0, 2*np.pi, 1000)
77
78 # L∞ norm: square
79 ax = axes[0]
80 square_x = np.array([1, 1, -1, -1, 1])
81 square_y = np.array([1, -1, -1, 1, 1])
82 ax.plot(square_x, square_y, 'b-', linewidth=2)
83 ax.fill(square_x, square_y, alpha=0.3)
84 ax.set_xlim(-1.5, 1.5)
85 ax.set_ylim(-1.5, 1.5)
86 ax.set_aspect('equal')
87 ax.grid(True, alpha=0.3)
88 ax.set_title('L∞ Norm (||x||∞ ≤ 1)')
89 ax.set_xlabel('x₁')
90 ax.set_ylabel('x₂')
91
92 # L2 norm: circle
93 ax = axes[1]
94 circle_x = np.cos(theta)
95 circle_y = np.sin(theta)
96 ax.plot(circle_x, circle_y, 'g-', linewidth=2)
97 ax.fill(circle_x, circle_y, alpha=0.3)
98 ax.set_xlim(-1.5, 1.5)
99 ax.set_ylim(-1.5, 1.5)
100 ax.set_aspect('equal')
101 ax.grid(True, alpha=0.3)
102 ax.set_title('L2 Norm (||x||₂ ≤ 1)')
103 ax.set_xlabel('x₁')
104 ax.set_ylabel('x₂')
105
106 # L1 norm: diamond
107 ax = axes[2]
108 diamond_x = np.array([1, 0, -1, 0, 1])
109 diamond_y = np.array([0, 1, 0, -1, 0])
110 ax.plot(diamond_x, diamond_y, 'r-', linewidth=2)
111 ax.fill(diamond_x, diamond_y, alpha=0.3)
112 ax.set_xlim(-1.5, 1.5)
113 ax.set_ylim(-1.5, 1.5)
114 ax.set_aspect('equal')
115 ax.grid(True, alpha=0.3)
116 ax.set_title('L1 Norm (||x||₁ ≤ 1)')
117 ax.set_xlabel('x₁')
118 ax.set_ylabel('x₂')
119
120 plt.tight_layout()
121 plt.savefig('/opt/projects/01_Personal/03_Study/examples/Math_for_AI/unit_balls.png', dpi=150)
122 print("Unit ball visualization saved to unit_balls.png")
123 plt.close()
124
125
126def distance_metrics():
127 """Demonstrate various distance metrics"""
128 print("\n" + "=" * 60)
129 print("DISTANCE METRICS")
130 print("=" * 60)
131
132 x = np.array([1, 2, 3])
133 y = np.array([4, 5, 6])
134
135 print(f"Vector x: {x}")
136 print(f"Vector y: {y}")
137
138 # Euclidean distance (L2)
139 euclidean = np.linalg.norm(x - y, ord=2)
140 print(f"\n1. Euclidean distance (L2): {euclidean:.4f}")
141 print(f" d(x, y) = ||x - y||₂ = sqrt(Σ(x_i - y_i)²)")
142
143 # Manhattan distance (L1)
144 manhattan = np.linalg.norm(x - y, ord=1)
145 print(f"\n2. Manhattan distance (L1): {manhattan:.4f}")
146 print(f" d(x, y) = ||x - y||₁ = Σ|x_i - y_i|")
147
148 # Chebyshev distance (L∞)
149 chebyshev = np.linalg.norm(x - y, ord=np.inf)
150 print(f"\n3. Chebyshev distance (L∞): {chebyshev:.4f}")
151 print(f" d(x, y) = ||x - y||∞ = max|x_i - y_i|")
152
153 # Cosine similarity/distance
154 cos_sim = np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))
155 cos_dist = 1 - cos_sim
156 print(f"\n4. Cosine similarity: {cos_sim:.4f}")
157 print(f" cos(θ) = (x·y) / (||x|| ||y||)")
158 print(f" Cosine distance: {cos_dist:.4f}")
159
160 # Mahalanobis distance
161 print("\n5. Mahalanobis distance:")
162 print(" Accounts for covariance structure")
163
164 # Generate correlated data
165 np.random.seed(42)
166 mean = [0, 0]
167 cov = [[2, 1], [1, 2]] # Covariance matrix
168 data = np.random.multivariate_normal(mean, cov, size=100)
169
170 # Compute covariance and its inverse
171 cov_matrix = np.cov(data.T)
172 cov_inv = np.linalg.inv(cov_matrix)
173
174 # Two points
175 p1 = np.array([1, 1])
176 p2 = np.array([2, 2])
177
178 # Mahalanobis distance
179 diff = p1 - p2
180 mahal_dist = np.sqrt(diff.T @ cov_inv @ diff)
181
182 print(f" Point 1: {p1}")
183 print(f" Point 2: {p2}")
184 print(f" Mahalanobis distance: {mahal_dist:.4f}")
185 print(f" Euclidean distance: {np.linalg.norm(p1 - p2):.4f}")
186
187
188def regularization_comparison():
189 """Compare L1 (Lasso) and L2 (Ridge) regularization"""
190 print("\n" + "=" * 60)
191 print("L1 vs L2 REGULARIZATION")
192 print("=" * 60)
193
194 # Generate synthetic data with many features
195 np.random.seed(42)
196 n_samples = 100
197 n_features = 50
198
199 # Only first 5 features are truly relevant
200 X = np.random.randn(n_samples, n_features)
201 true_coef = np.zeros(n_features)
202 true_coef[:5] = [3.0, -2.0, 1.5, -1.0, 2.5]
203
204 y = X @ true_coef + np.random.randn(n_samples) * 0.5
205
206 print(f"\nDataset:")
207 print(f"Samples: {n_samples}, Features: {n_features}")
208 print(f"True non-zero coefficients: 5")
209
210 # Linear regression (no regularization)
211 lr = LinearRegression()
212 lr.fit(X, y)
213
214 # Ridge (L2 regularization)
215 ridge = Ridge(alpha=1.0)
216 ridge.fit(X, y)
217
218 # Lasso (L1 regularization)
219 lasso = Lasso(alpha=0.1)
220 lasso.fit(X, y)
221
222 print("\n--- Coefficient Analysis ---")
223 print(f"Linear Regression - Non-zero coefs: {np.sum(np.abs(lr.coef_) > 0.01)}")
224 print(f"Ridge (L2) - Non-zero coefs: {np.sum(np.abs(ridge.coef_) > 0.01)}")
225 print(f"Lasso (L1) - Non-zero coefs: {np.sum(np.abs(lasso.coef_) > 0.01)}")
226
227 print(f"\nL2 norm of coefficients:")
228 print(f"Linear Regression: {np.linalg.norm(lr.coef_, ord=2):.4f}")
229 print(f"Ridge (L2): {np.linalg.norm(ridge.coef_, ord=2):.4f}")
230 print(f"Lasso (L1): {np.linalg.norm(lasso.coef_, ord=2):.4f}")
231
232 print(f"\nL1 norm of coefficients:")
233 print(f"Linear Regression: {np.linalg.norm(lr.coef_, ord=1):.4f}")
234 print(f"Ridge (L2): {np.linalg.norm(ridge.coef_, ord=1):.4f}")
235 print(f"Lasso (L1): {np.linalg.norm(lasso.coef_, ord=1):.4f}")
236
237 print("\nKey insights:")
238 print("- L1 (Lasso) produces sparse solutions (many exact zeros)")
239 print("- L2 (Ridge) shrinks coefficients but rarely to exactly zero")
240 print("- L1 performs feature selection, L2 performs feature shrinkage")
241
242 return X, y, lr, ridge, lasso, true_coef
243
244
245def visualize_regularization(X, y, lr, ridge, lasso, true_coef):
246 """Visualize regularization effects"""
247 print("\n" + "=" * 60)
248 print("VISUALIZING REGULARIZATION EFFECTS")
249 print("=" * 60)
250
251 fig, axes = plt.subplots(2, 2, figsize=(14, 10))
252
253 # Plot 1: Coefficient values
254 ax = axes[0, 0]
255 x_pos = np.arange(len(true_coef))
256
257 ax.plot(x_pos, true_coef, 'ko-', label='True', linewidth=2, markersize=4)
258 ax.plot(x_pos, lr.coef_, 'b.-', label='Linear Reg', alpha=0.7, markersize=3)
259 ax.plot(x_pos, ridge.coef_, 'g.-', label='Ridge (L2)', alpha=0.7, markersize=3)
260 ax.plot(x_pos, lasso.coef_, 'r.-', label='Lasso (L1)', alpha=0.7, markersize=3)
261
262 ax.set_xlabel('Feature Index')
263 ax.set_ylabel('Coefficient Value')
264 ax.set_title('Coefficient Values Comparison')
265 ax.legend()
266 ax.grid(True, alpha=0.3)
267 ax.axhline(y=0, color='k', linestyle='-', linewidth=0.5)
268
269 # Plot 2: First 10 coefficients (zoomed)
270 ax = axes[0, 1]
271 n_show = 10
272 x_pos_zoom = np.arange(n_show)
273
274 width = 0.2
275 ax.bar(x_pos_zoom - 1.5*width, true_coef[:n_show], width, label='True', alpha=0.8)
276 ax.bar(x_pos_zoom - 0.5*width, lr.coef_[:n_show], width, label='Linear Reg', alpha=0.8)
277 ax.bar(x_pos_zoom + 0.5*width, ridge.coef_[:n_show], width, label='Ridge (L2)', alpha=0.8)
278 ax.bar(x_pos_zoom + 1.5*width, lasso.coef_[:n_show], width, label='Lasso (L1)', alpha=0.8)
279
280 ax.set_xlabel('Feature Index')
281 ax.set_ylabel('Coefficient Value')
282 ax.set_title(f'First {n_show} Coefficients (Zoomed)')
283 ax.set_xticks(x_pos_zoom)
284 ax.legend()
285 ax.grid(True, alpha=0.3, axis='y')
286 ax.axhline(y=0, color='k', linestyle='-', linewidth=0.5)
287
288 # Plot 3: Sparsity pattern
289 ax = axes[1, 0]
290
291 threshold = 0.01
292 sparsity_data = np.array([
293 np.sum(np.abs(true_coef) > threshold),
294 np.sum(np.abs(lr.coef_) > threshold),
295 np.sum(np.abs(ridge.coef_) > threshold),
296 np.sum(np.abs(lasso.coef_) > threshold)
297 ])
298
299 bars = ax.bar(['True', 'Linear Reg', 'Ridge (L2)', 'Lasso (L1)'],
300 sparsity_data, color=['black', 'blue', 'green', 'red'], alpha=0.7)
301
302 ax.set_ylabel('Number of Non-Zero Coefficients')
303 ax.set_title(f'Sparsity Comparison (threshold = {threshold})')
304 ax.grid(True, alpha=0.3, axis='y')
305
306 # Add values on bars
307 for bar, value in zip(bars, sparsity_data):
308 height = bar.get_height()
309 ax.text(bar.get_x() + bar.get_width()/2., height,
310 f'{int(value)}', ha='center', va='bottom')
311
312 # Plot 4: Regularization paths
313 ax = axes[1, 1]
314
315 # Compute coefficients for different alpha values
316 alphas = np.logspace(-3, 2, 50)
317 coefs_ridge = []
318 coefs_lasso = []
319
320 for alpha in alphas:
321 ridge_temp = Ridge(alpha=alpha)
322 ridge_temp.fit(X, y)
323 coefs_ridge.append(ridge_temp.coef_)
324
325 lasso_temp = Lasso(alpha=alpha)
326 lasso_temp.fit(X, y)
327 coefs_lasso.append(lasso_temp.coef_)
328
329 coefs_ridge = np.array(coefs_ridge)
330 coefs_lasso = np.array(coefs_lasso)
331
332 # Plot first 5 features only
333 for i in range(5):
334 ax.plot(alphas, coefs_lasso[:, i], '-', linewidth=2, label=f'Feature {i}')
335
336 ax.set_xscale('log')
337 ax.set_xlabel('Alpha (Regularization Strength)')
338 ax.set_ylabel('Coefficient Value')
339 ax.set_title('Lasso Regularization Path (First 5 Features)')
340 ax.legend()
341 ax.grid(True, alpha=0.3)
342 ax.axhline(y=0, color='k', linestyle='-', linewidth=0.5)
343
344 plt.tight_layout()
345 plt.savefig('/opt/projects/01_Personal/03_Study/examples/Math_for_AI/regularization_comparison.png', dpi=150)
346 print("Regularization visualization saved to regularization_comparison.png")
347 plt.close()
348
349
350def demonstrate_sparsity():
351 """Demonstrate sparsity-inducing property of L1 norm"""
352 print("\n" + "=" * 60)
353 print("SPARSITY WITH L1 REGULARIZATION")
354 print("=" * 60)
355
356 print("\nWhy L1 induces sparsity:")
357 print("- L1 penalty: λΣ|w_i| has sharp corners at axes")
358 print("- L2 penalty: λΣw_i² is smooth everywhere")
359 print("- Optimization with L1 tends to hit corners (exact zeros)")
360
361 # Simple 2D example
362 print("\n--- 2D Example ---")
363
364 # Generate data where only x1 is relevant
365 np.random.seed(42)
366 n = 100
367 x1 = np.random.randn(n)
368 x2 = np.random.randn(n)
369 y = 3*x1 + np.random.randn(n) * 0.5 # Only x1 is relevant
370
371 X = np.column_stack([x1, x2])
372
373 # Fit models
374 lr = LinearRegression().fit(X, y)
375 ridge = Ridge(alpha=1.0).fit(X, y)
376 lasso = Lasso(alpha=0.1).fit(X, y)
377
378 print(f"\nTrue relationship: y = 3*x1 + noise")
379 print(f"\nLinear Regression coef: {lr.coef_}")
380 print(f"Ridge (L2) coef: {ridge.coef_}")
381 print(f"Lasso (L1) coef: {lasso.coef_}")
382
383 print(f"\nLasso correctly identifies x2 as irrelevant!")
384 print(f"x2 coefficient ≈ {lasso.coef_[1]:.6f} (effectively zero)")
385
386
387if __name__ == "__main__":
388 # Run all demonstrations
389 demonstrate_lp_norms()
390 visualize_unit_balls()
391 distance_metrics()
392
393 X, y, lr, ridge, lasso, true_coef = regularization_comparison()
394 visualize_regularization(X, y, lr, ridge, lasso, true_coef)
395
396 demonstrate_sparsity()
397
398 print("\n" + "=" * 60)
399 print("All demonstrations completed!")
400 print("=" * 60)