1"""
207_glm.py
3
4Demonstrates Generalized Linear Models (GLM):
5- Logistic regression from scratch
6- Poisson regression
7- Link functions
8- Deviance
9- AIC/BIC model comparison
10"""
11
12import numpy as np
13from scipy import stats
14from scipy.optimize import minimize
15
16try:
17 import matplotlib.pyplot as plt
18 HAS_PLT = True
19except ImportError:
20 HAS_PLT = False
21 print("matplotlib not available; skipping plots\n")
22
23
24def print_section(title):
25 """Print formatted section header."""
26 print("\n" + "=" * 70)
27 print(f" {title}")
28 print("=" * 70)
29
30
31def logistic_regression_scratch():
32 """Demonstrate logistic regression from scratch."""
33 print_section("1. Logistic Regression (From Scratch)")
34
35 # Generate binary classification data
36 np.random.seed(42)
37 n = 200
38 x1 = np.random.normal(0, 1, n)
39 x2 = np.random.normal(0, 1, n)
40
41 # True coefficients
42 true_beta = np.array([-0.5, 1.5, -1.0])
43 z = true_beta[0] + true_beta[1] * x1 + true_beta[2] * x2
44 prob = 1 / (1 + np.exp(-z))
45 y = (np.random.uniform(0, 1, n) < prob).astype(int)
46
47 print(f"Generated data: n = {n}")
48 print(f"True coefficients: {true_beta}")
49 print(f"Class balance: {np.sum(y)}/{n} positive")
50
51 # Design matrix
52 X = np.column_stack([np.ones(n), x1, x2])
53
54 # Logistic function
55 def sigmoid(z):
56 return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
57
58 # Negative log-likelihood
59 def neg_log_likelihood(beta):
60 z = X @ beta
61 p = sigmoid(z)
62 # Add small epsilon to avoid log(0)
63 eps = 1e-15
64 p = np.clip(p, eps, 1 - eps)
65 return -np.sum(y * np.log(p) + (1 - y) * np.log(1 - p))
66
67 # Optimize using scipy
68 beta_init = np.zeros(3)
69 result = minimize(neg_log_likelihood, beta_init, method='BFGS')
70
71 beta_hat = result.x
72
73 print(f"\nEstimated coefficients:")
74 print(f" β₀ (intercept): {beta_hat[0]:.4f}")
75 print(f" β₁ (x1): {beta_hat[1]:.4f}")
76 print(f" β₂ (x2): {beta_hat[2]:.4f}")
77
78 # Predictions
79 z_hat = X @ beta_hat
80 prob_hat = sigmoid(z_hat)
81 y_pred = (prob_hat > 0.5).astype(int)
82
83 # Accuracy
84 accuracy = np.mean(y == y_pred)
85 print(f"\nAccuracy: {accuracy:.4f}")
86
87 # Confusion matrix
88 tp = np.sum((y == 1) & (y_pred == 1))
89 tn = np.sum((y == 0) & (y_pred == 0))
90 fp = np.sum((y == 0) & (y_pred == 1))
91 fn = np.sum((y == 1) & (y_pred == 0))
92
93 print(f"\nConfusion matrix:")
94 print(f" Predicted")
95 print(f" 0 1")
96 print(f" Actual 0 {tn:3d} {fp:3d}")
97 print(f" 1 {fn:3d} {tp:3d}")
98
99 # Log-likelihood at solution
100 log_lik = -neg_log_likelihood(beta_hat)
101 print(f"\nLog-likelihood: {log_lik:.2f}")
102
103 # Null model (intercept only)
104 p_null = np.mean(y)
105 log_lik_null = np.sum(y * np.log(p_null + 1e-15) + (1 - y) * np.log(1 - p_null + 1e-15))
106
107 # McFadden's R²
108 r2_mcfadden = 1 - log_lik / log_lik_null
109 print(f"McFadden's R²: {r2_mcfadden:.4f}")
110
111 if HAS_PLT:
112 # Decision boundary
113 x1_min, x1_max = x1.min() - 1, x1.max() + 1
114 x2_min, x2_max = x2.min() - 1, x2.max() + 1
115 xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max, 100),
116 np.linspace(x2_min, x2_max, 100))
117 X_grid = np.column_stack([np.ones(xx1.ravel().shape), xx1.ravel(), xx2.ravel()])
118 Z = sigmoid(X_grid @ beta_hat).reshape(xx1.shape)
119
120 plt.figure(figsize=(10, 6))
121 plt.contourf(xx1, xx2, Z, levels=20, alpha=0.6, cmap='RdYlBu_r')
122 plt.colorbar(label='P(y=1)')
123 plt.contour(xx1, xx2, Z, levels=[0.5], colors='black', linewidths=2)
124 plt.scatter(x1[y == 0], x2[y == 0], c='blue', marker='o', alpha=0.6, label='Class 0')
125 plt.scatter(x1[y == 1], x2[y == 1], c='red', marker='s', alpha=0.6, label='Class 1')
126 plt.xlabel('x₁')
127 plt.ylabel('x₂')
128 plt.title('Logistic Regression Decision Boundary')
129 plt.legend()
130 plt.grid(True, alpha=0.3)
131 plt.savefig('/tmp/logistic_regression.png', dpi=100)
132 print("\n[Plot saved to /tmp/logistic_regression.png]")
133 plt.close()
134
135
136def poisson_regression():
137 """Demonstrate Poisson regression."""
138 print_section("2. Poisson Regression")
139
140 # Generate count data
141 np.random.seed(123)
142 n = 150
143 x = np.random.uniform(0, 5, n)
144
145 # True model: log(λ) = β₀ + β₁*x
146 true_beta = np.array([0.5, 0.4])
147 log_lambda = true_beta[0] + true_beta[1] * x
148 lambda_true = np.exp(log_lambda)
149 y = np.random.poisson(lambda_true)
150
151 print(f"Generated Poisson data: n = {n}")
152 print(f"True coefficients: {true_beta}")
153 print(f"Response range: [{y.min()}, {y.max()}]")
154
155 # Design matrix
156 X = np.column_stack([np.ones(n), x])
157
158 # Negative log-likelihood for Poisson
159 def neg_log_likelihood(beta):
160 log_lambda = X @ beta
161 lambda_pred = np.exp(log_lambda)
162 # Poisson log-likelihood: y*log(λ) - λ - log(y!)
163 # We can ignore log(y!) for optimization
164 return -np.sum(y * log_lambda - lambda_pred)
165
166 # Optimize
167 beta_init = np.zeros(2)
168 result = minimize(neg_log_likelihood, beta_init, method='BFGS')
169 beta_hat = result.x
170
171 print(f"\nEstimated coefficients:")
172 print(f" β₀ (intercept): {beta_hat[0]:.4f}")
173 print(f" β₁ (slope): {beta_hat[1]:.4f}")
174
175 # Predictions
176 log_lambda_hat = X @ beta_hat
177 lambda_hat = np.exp(log_lambda_hat)
178
179 print(f"\nPredicted λ range: [{lambda_hat.min():.2f}, {lambda_hat.max():.2f}]")
180
181 # Deviance
182 # Saturated model: λ_i = y_i
183 # Null model: λ_i = mean(y)
184 y_safe = np.where(y == 0, 1, y) # Avoid log(0)
185
186 deviance_residuals = 2 * (y * np.log(y_safe / lambda_hat) - (y - lambda_hat))
187 deviance = np.sum(deviance_residuals)
188
189 lambda_null = np.mean(y)
190 deviance_null = 2 * np.sum(y * np.log(y_safe / lambda_null) - (y - lambda_null))
191
192 print(f"\nDeviance: {deviance:.2f}")
193 print(f"Null deviance: {deviance_null:.2f}")
194
195 # Pseudo R²
196 r2 = 1 - deviance / deviance_null
197 print(f"Pseudo R²: {r2:.4f}")
198
199 if HAS_PLT:
200 x_plot = np.linspace(0, 5, 100)
201 X_plot = np.column_stack([np.ones(len(x_plot)), x_plot])
202 lambda_plot = np.exp(X_plot @ beta_hat)
203
204 plt.figure(figsize=(10, 6))
205 plt.scatter(x, y, alpha=0.6, label='Observed counts')
206 plt.plot(x_plot, lambda_plot, 'r-', linewidth=2, label='Fitted λ(x)')
207 plt.xlabel('x')
208 plt.ylabel('Count')
209 plt.title('Poisson Regression')
210 plt.legend()
211 plt.grid(True, alpha=0.3)
212 plt.savefig('/tmp/poisson_regression.png', dpi=100)
213 print("\n[Plot saved to /tmp/poisson_regression.png]")
214 plt.close()
215
216
217def link_functions_demo():
218 """Demonstrate different link functions."""
219 print_section("3. Link Functions")
220
221 print("Common link functions in GLMs:\n")
222
223 x = np.linspace(-5, 5, 200)
224
225 # Logit (logistic regression)
226 logit_inverse = 1 / (1 + np.exp(-x))
227
228 # Probit (alternative for binary)
229 probit_inverse = stats.norm.cdf(x)
230
231 # Log (Poisson regression)
232 log_inverse = np.exp(x)
233
234 # Identity (linear regression)
235 identity_inverse = x
236
237 print("1. Logit link: g(μ) = log(μ/(1-μ))")
238 print(" Inverse: μ = 1/(1+exp(-η))")
239 print(" Used in: Logistic regression\n")
240
241 print("2. Probit link: g(μ) = Φ⁻¹(μ)")
242 print(" Inverse: μ = Φ(η)")
243 print(" Used in: Probit regression\n")
244
245 print("3. Log link: g(μ) = log(μ)")
246 print(" Inverse: μ = exp(η)")
247 print(" Used in: Poisson regression\n")
248
249 print("4. Identity link: g(μ) = μ")
250 print(" Inverse: μ = η")
251 print(" Used in: Linear regression\n")
252
253 if HAS_PLT:
254 fig, axes = plt.subplots(2, 2, figsize=(12, 10))
255
256 axes[0, 0].plot(x, logit_inverse, 'b-', linewidth=2)
257 axes[0, 0].set_xlabel('η (linear predictor)')
258 axes[0, 0].set_ylabel('μ (mean)')
259 axes[0, 0].set_title('Logit: μ = 1/(1+exp(-η))')
260 axes[0, 0].grid(True, alpha=0.3)
261 axes[0, 0].set_ylim([-0.1, 1.1])
262
263 axes[0, 1].plot(x, probit_inverse, 'g-', linewidth=2)
264 axes[0, 1].set_xlabel('η (linear predictor)')
265 axes[0, 1].set_ylabel('μ (mean)')
266 axes[0, 1].set_title('Probit: μ = Φ(η)')
267 axes[0, 1].grid(True, alpha=0.3)
268 axes[0, 1].set_ylim([-0.1, 1.1])
269
270 axes[1, 0].plot(x, log_inverse, 'r-', linewidth=2)
271 axes[1, 0].set_xlabel('η (linear predictor)')
272 axes[1, 0].set_ylabel('μ (mean)')
273 axes[1, 0].set_title('Log: μ = exp(η)')
274 axes[1, 0].grid(True, alpha=0.3)
275 axes[1, 0].set_ylim([0, 20])
276
277 axes[1, 1].plot(x, identity_inverse, 'purple', linewidth=2)
278 axes[1, 1].set_xlabel('η (linear predictor)')
279 axes[1, 1].set_ylabel('μ (mean)')
280 axes[1, 1].set_title('Identity: μ = η')
281 axes[1, 1].grid(True, alpha=0.3)
282
283 plt.tight_layout()
284 plt.savefig('/tmp/link_functions.png', dpi=100)
285 print("[Plot saved to /tmp/link_functions.png]")
286 plt.close()
287
288
289def deviance_analysis():
290 """Demonstrate deviance analysis."""
291 print_section("4. Deviance Analysis")
292
293 # Generate data
294 np.random.seed(456)
295 n = 100
296 x1 = np.random.normal(0, 1, n)
297 x2 = np.random.normal(0, 1, n)
298
299 true_beta = np.array([0.5, 1.2, -0.8])
300 log_lambda = true_beta[0] + true_beta[1] * x1 + true_beta[2] * x2
301 y = np.random.poisson(np.exp(log_lambda))
302
303 print("Comparing nested Poisson regression models")
304
305 # Model 1: Intercept only
306 X1 = np.ones((n, 1))
307
308 # Model 2: Intercept + x1
309 X2 = np.column_stack([np.ones(n), x1])
310
311 # Model 3: Intercept + x1 + x2
312 X3 = np.column_stack([np.ones(n), x1, x2])
313
314 models = [
315 ("Null (intercept only)", X1),
316 ("x1", X2),
317 ("x1 + x2", X3)
318 ]
319
320 results = []
321
322 for name, X in models:
323 # Fit model
324 def neg_ll(beta):
325 log_lambda = X @ beta
326 return -np.sum(y * log_lambda - np.exp(log_lambda))
327
328 beta_init = np.zeros(X.shape[1])
329 result = minimize(neg_ll, beta_init, method='BFGS')
330 beta_hat = result.x
331
332 # Calculate deviance
333 log_lambda_hat = X @ beta_hat
334 lambda_hat = np.exp(log_lambda_hat)
335 y_safe = np.where(y == 0, 1, y)
336 deviance = 2 * np.sum(y * np.log(y_safe / lambda_hat) - (y - lambda_hat))
337
338 # AIC and BIC
339 k = X.shape[1] # number of parameters
340 log_lik = -neg_ll(beta_hat)
341 aic = 2 * k - 2 * log_lik
342 bic = k * np.log(n) - 2 * log_lik
343
344 results.append({
345 'name': name,
346 'k': k,
347 'deviance': deviance,
348 'aic': aic,
349 'bic': bic,
350 'log_lik': log_lik
351 })
352
353 print("\nModel comparison:")
354 print(f"{'Model':<20} {'k':>3} {'Deviance':>12} {'AIC':>12} {'BIC':>12}")
355 print("-" * 70)
356
357 for r in results:
358 print(f"{r['name']:<20} {r['k']:>3} {r['deviance']:>12.2f} {r['aic']:>12.2f} {r['bic']:>12.2f}")
359
360 # Likelihood ratio test (Model 1 vs Model 3)
361 lr_statistic = -2 * (results[0]['log_lik'] - results[2]['log_lik'])
362 df = results[2]['k'] - results[0]['k']
363 p_value = 1 - stats.chi2.cdf(lr_statistic, df)
364
365 print(f"\nLikelihood ratio test (Null vs Full):")
366 print(f" LR statistic: {lr_statistic:.4f}")
367 print(f" df: {df}")
368 print(f" p-value: {p_value:.6f}")
369
370 if p_value < 0.05:
371 print(f" Full model significantly better (p < 0.05)")
372
373 # Best model by AIC
374 best_aic_idx = np.argmin([r['aic'] for r in results])
375 print(f"\nBest model by AIC: {results[best_aic_idx]['name']}")
376
377
378def model_comparison_aic_bic():
379 """Demonstrate AIC/BIC model comparison."""
380 print_section("5. AIC/BIC Model Comparison")
381
382 # Generate data with specific structure
383 np.random.seed(789)
384 n = 200
385 x1 = np.random.normal(0, 1, n)
386 x2 = np.random.normal(0, 1, n)
387 x3 = np.random.normal(0, 1, n)
388 x4 = np.random.normal(0, 1, n) # Irrelevant
389
390 # True model: only x1 and x2 matter
391 z = -1 + 1.5*x1 + 0.8*x2
392 p = 1 / (1 + np.exp(-z))
393 y = (np.random.uniform(0, 1, n) < p).astype(int)
394
395 print(f"True model: logit(p) = -1 + 1.5*x1 + 0.8*x2")
396 print(f"x3, x4 are irrelevant")
397
398 # Candidate models
399 models = [
400 ("x1", np.column_stack([np.ones(n), x1])),
401 ("x1 + x2", np.column_stack([np.ones(n), x1, x2])),
402 ("x1 + x2 + x3", np.column_stack([np.ones(n), x1, x2, x3])),
403 ("x1 + x2 + x3 + x4", np.column_stack([np.ones(n), x1, x2, x3, x4]))
404 ]
405
406 results = []
407
408 def sigmoid(z):
409 return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
410
411 for name, X in models:
412 # Fit
413 def neg_ll(beta):
414 p = sigmoid(X @ beta)
415 p = np.clip(p, 1e-15, 1 - 1e-15)
416 return -np.sum(y * np.log(p) + (1 - y) * np.log(1 - p))
417
418 beta_init = np.zeros(X.shape[1])
419 result = minimize(neg_ll, beta_init, method='BFGS')
420
421 k = X.shape[1]
422 log_lik = -neg_ll(result.x)
423 aic = 2 * k - 2 * log_lik
424 bic = k * np.log(n) - 2 * log_lik
425
426 results.append({'name': name, 'k': k, 'aic': aic, 'bic': bic})
427
428 print(f"\n{'Model':<20} {'k':>3} {'AIC':>12} {'BIC':>12}")
429 print("-" * 50)
430
431 for r in results:
432 print(f"{r['name']:<20} {r['k']:>3} {r['aic']:>12.2f} {r['bic']:>12.2f}")
433
434 best_aic_idx = np.argmin([r['aic'] for r in results])
435 best_bic_idx = np.argmin([r['bic'] for r in results])
436
437 print(f"\nBest model by AIC: {results[best_aic_idx]['name']}")
438 print(f"Best model by BIC: {results[best_bic_idx]['name']}")
439
440 print(f"\nInterpretation:")
441 print(f" AIC prefers predictive accuracy")
442 print(f" BIC penalizes complexity more (prefers simpler models)")
443 print(f" Both should select x1+x2 (true model)")
444
445
446def main():
447 """Run all demonstrations."""
448 print("=" * 70)
449 print(" GENERALIZED LINEAR MODELS DEMONSTRATIONS")
450 print("=" * 70)
451
452 np.random.seed(42)
453
454 logistic_regression_scratch()
455 poisson_regression()
456 link_functions_demo()
457 deviance_analysis()
458 model_comparison_aic_bic()
459
460 print("\n" + "=" * 70)
461 print(" All demonstrations completed successfully!")
462 print("=" * 70)
463
464
465if __name__ == "__main__":
466 main()