07_glm.py

Download
python 467 lines 14.0 KB
  1"""
  207_glm.py
  3
  4Demonstrates Generalized Linear Models (GLM):
  5- Logistic regression from scratch
  6- Poisson regression
  7- Link functions
  8- Deviance
  9- AIC/BIC model comparison
 10"""
 11
 12import numpy as np
 13from scipy import stats
 14from scipy.optimize import minimize
 15
 16try:
 17    import matplotlib.pyplot as plt
 18    HAS_PLT = True
 19except ImportError:
 20    HAS_PLT = False
 21    print("matplotlib not available; skipping plots\n")
 22
 23
 24def print_section(title):
 25    """Print formatted section header."""
 26    print("\n" + "=" * 70)
 27    print(f"  {title}")
 28    print("=" * 70)
 29
 30
 31def logistic_regression_scratch():
 32    """Demonstrate logistic regression from scratch."""
 33    print_section("1. Logistic Regression (From Scratch)")
 34
 35    # Generate binary classification data
 36    np.random.seed(42)
 37    n = 200
 38    x1 = np.random.normal(0, 1, n)
 39    x2 = np.random.normal(0, 1, n)
 40
 41    # True coefficients
 42    true_beta = np.array([-0.5, 1.5, -1.0])
 43    z = true_beta[0] + true_beta[1] * x1 + true_beta[2] * x2
 44    prob = 1 / (1 + np.exp(-z))
 45    y = (np.random.uniform(0, 1, n) < prob).astype(int)
 46
 47    print(f"Generated data: n = {n}")
 48    print(f"True coefficients: {true_beta}")
 49    print(f"Class balance: {np.sum(y)}/{n} positive")
 50
 51    # Design matrix
 52    X = np.column_stack([np.ones(n), x1, x2])
 53
 54    # Logistic function
 55    def sigmoid(z):
 56        return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
 57
 58    # Negative log-likelihood
 59    def neg_log_likelihood(beta):
 60        z = X @ beta
 61        p = sigmoid(z)
 62        # Add small epsilon to avoid log(0)
 63        eps = 1e-15
 64        p = np.clip(p, eps, 1 - eps)
 65        return -np.sum(y * np.log(p) + (1 - y) * np.log(1 - p))
 66
 67    # Optimize using scipy
 68    beta_init = np.zeros(3)
 69    result = minimize(neg_log_likelihood, beta_init, method='BFGS')
 70
 71    beta_hat = result.x
 72
 73    print(f"\nEstimated coefficients:")
 74    print(f"  β₀ (intercept): {beta_hat[0]:.4f}")
 75    print(f"  β₁ (x1): {beta_hat[1]:.4f}")
 76    print(f"  β₂ (x2): {beta_hat[2]:.4f}")
 77
 78    # Predictions
 79    z_hat = X @ beta_hat
 80    prob_hat = sigmoid(z_hat)
 81    y_pred = (prob_hat > 0.5).astype(int)
 82
 83    # Accuracy
 84    accuracy = np.mean(y == y_pred)
 85    print(f"\nAccuracy: {accuracy:.4f}")
 86
 87    # Confusion matrix
 88    tp = np.sum((y == 1) & (y_pred == 1))
 89    tn = np.sum((y == 0) & (y_pred == 0))
 90    fp = np.sum((y == 0) & (y_pred == 1))
 91    fn = np.sum((y == 1) & (y_pred == 0))
 92
 93    print(f"\nConfusion matrix:")
 94    print(f"              Predicted")
 95    print(f"              0    1")
 96    print(f"  Actual  0  {tn:3d}  {fp:3d}")
 97    print(f"          1  {fn:3d}  {tp:3d}")
 98
 99    # Log-likelihood at solution
100    log_lik = -neg_log_likelihood(beta_hat)
101    print(f"\nLog-likelihood: {log_lik:.2f}")
102
103    # Null model (intercept only)
104    p_null = np.mean(y)
105    log_lik_null = np.sum(y * np.log(p_null + 1e-15) + (1 - y) * np.log(1 - p_null + 1e-15))
106
107    # McFadden's R²
108    r2_mcfadden = 1 - log_lik / log_lik_null
109    print(f"McFadden's R²: {r2_mcfadden:.4f}")
110
111    if HAS_PLT:
112        # Decision boundary
113        x1_min, x1_max = x1.min() - 1, x1.max() + 1
114        x2_min, x2_max = x2.min() - 1, x2.max() + 1
115        xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max, 100),
116                               np.linspace(x2_min, x2_max, 100))
117        X_grid = np.column_stack([np.ones(xx1.ravel().shape), xx1.ravel(), xx2.ravel()])
118        Z = sigmoid(X_grid @ beta_hat).reshape(xx1.shape)
119
120        plt.figure(figsize=(10, 6))
121        plt.contourf(xx1, xx2, Z, levels=20, alpha=0.6, cmap='RdYlBu_r')
122        plt.colorbar(label='P(y=1)')
123        plt.contour(xx1, xx2, Z, levels=[0.5], colors='black', linewidths=2)
124        plt.scatter(x1[y == 0], x2[y == 0], c='blue', marker='o', alpha=0.6, label='Class 0')
125        plt.scatter(x1[y == 1], x2[y == 1], c='red', marker='s', alpha=0.6, label='Class 1')
126        plt.xlabel('x₁')
127        plt.ylabel('x₂')
128        plt.title('Logistic Regression Decision Boundary')
129        plt.legend()
130        plt.grid(True, alpha=0.3)
131        plt.savefig('/tmp/logistic_regression.png', dpi=100)
132        print("\n[Plot saved to /tmp/logistic_regression.png]")
133        plt.close()
134
135
136def poisson_regression():
137    """Demonstrate Poisson regression."""
138    print_section("2. Poisson Regression")
139
140    # Generate count data
141    np.random.seed(123)
142    n = 150
143    x = np.random.uniform(0, 5, n)
144
145    # True model: log(λ) = β₀ + β₁*x
146    true_beta = np.array([0.5, 0.4])
147    log_lambda = true_beta[0] + true_beta[1] * x
148    lambda_true = np.exp(log_lambda)
149    y = np.random.poisson(lambda_true)
150
151    print(f"Generated Poisson data: n = {n}")
152    print(f"True coefficients: {true_beta}")
153    print(f"Response range: [{y.min()}, {y.max()}]")
154
155    # Design matrix
156    X = np.column_stack([np.ones(n), x])
157
158    # Negative log-likelihood for Poisson
159    def neg_log_likelihood(beta):
160        log_lambda = X @ beta
161        lambda_pred = np.exp(log_lambda)
162        # Poisson log-likelihood: y*log(λ) - λ - log(y!)
163        # We can ignore log(y!) for optimization
164        return -np.sum(y * log_lambda - lambda_pred)
165
166    # Optimize
167    beta_init = np.zeros(2)
168    result = minimize(neg_log_likelihood, beta_init, method='BFGS')
169    beta_hat = result.x
170
171    print(f"\nEstimated coefficients:")
172    print(f"  β₀ (intercept): {beta_hat[0]:.4f}")
173    print(f"  β₁ (slope): {beta_hat[1]:.4f}")
174
175    # Predictions
176    log_lambda_hat = X @ beta_hat
177    lambda_hat = np.exp(log_lambda_hat)
178
179    print(f"\nPredicted λ range: [{lambda_hat.min():.2f}, {lambda_hat.max():.2f}]")
180
181    # Deviance
182    # Saturated model: λ_i = y_i
183    # Null model: λ_i = mean(y)
184    y_safe = np.where(y == 0, 1, y)  # Avoid log(0)
185
186    deviance_residuals = 2 * (y * np.log(y_safe / lambda_hat) - (y - lambda_hat))
187    deviance = np.sum(deviance_residuals)
188
189    lambda_null = np.mean(y)
190    deviance_null = 2 * np.sum(y * np.log(y_safe / lambda_null) - (y - lambda_null))
191
192    print(f"\nDeviance: {deviance:.2f}")
193    print(f"Null deviance: {deviance_null:.2f}")
194
195    # Pseudo R²
196    r2 = 1 - deviance / deviance_null
197    print(f"Pseudo R²: {r2:.4f}")
198
199    if HAS_PLT:
200        x_plot = np.linspace(0, 5, 100)
201        X_plot = np.column_stack([np.ones(len(x_plot)), x_plot])
202        lambda_plot = np.exp(X_plot @ beta_hat)
203
204        plt.figure(figsize=(10, 6))
205        plt.scatter(x, y, alpha=0.6, label='Observed counts')
206        plt.plot(x_plot, lambda_plot, 'r-', linewidth=2, label='Fitted λ(x)')
207        plt.xlabel('x')
208        plt.ylabel('Count')
209        plt.title('Poisson Regression')
210        plt.legend()
211        plt.grid(True, alpha=0.3)
212        plt.savefig('/tmp/poisson_regression.png', dpi=100)
213        print("\n[Plot saved to /tmp/poisson_regression.png]")
214        plt.close()
215
216
217def link_functions_demo():
218    """Demonstrate different link functions."""
219    print_section("3. Link Functions")
220
221    print("Common link functions in GLMs:\n")
222
223    x = np.linspace(-5, 5, 200)
224
225    # Logit (logistic regression)
226    logit_inverse = 1 / (1 + np.exp(-x))
227
228    # Probit (alternative for binary)
229    probit_inverse = stats.norm.cdf(x)
230
231    # Log (Poisson regression)
232    log_inverse = np.exp(x)
233
234    # Identity (linear regression)
235    identity_inverse = x
236
237    print("1. Logit link: g(μ) = log(μ/(1-μ))")
238    print("   Inverse: μ = 1/(1+exp(-η))")
239    print("   Used in: Logistic regression\n")
240
241    print("2. Probit link: g(μ) = Φ⁻¹(μ)")
242    print("   Inverse: μ = Φ(η)")
243    print("   Used in: Probit regression\n")
244
245    print("3. Log link: g(μ) = log(μ)")
246    print("   Inverse: μ = exp(η)")
247    print("   Used in: Poisson regression\n")
248
249    print("4. Identity link: g(μ) = μ")
250    print("   Inverse: μ = η")
251    print("   Used in: Linear regression\n")
252
253    if HAS_PLT:
254        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
255
256        axes[0, 0].plot(x, logit_inverse, 'b-', linewidth=2)
257        axes[0, 0].set_xlabel('η (linear predictor)')
258        axes[0, 0].set_ylabel('μ (mean)')
259        axes[0, 0].set_title('Logit: μ = 1/(1+exp(-η))')
260        axes[0, 0].grid(True, alpha=0.3)
261        axes[0, 0].set_ylim([-0.1, 1.1])
262
263        axes[0, 1].plot(x, probit_inverse, 'g-', linewidth=2)
264        axes[0, 1].set_xlabel('η (linear predictor)')
265        axes[0, 1].set_ylabel('μ (mean)')
266        axes[0, 1].set_title('Probit: μ = Φ(η)')
267        axes[0, 1].grid(True, alpha=0.3)
268        axes[0, 1].set_ylim([-0.1, 1.1])
269
270        axes[1, 0].plot(x, log_inverse, 'r-', linewidth=2)
271        axes[1, 0].set_xlabel('η (linear predictor)')
272        axes[1, 0].set_ylabel('μ (mean)')
273        axes[1, 0].set_title('Log: μ = exp(η)')
274        axes[1, 0].grid(True, alpha=0.3)
275        axes[1, 0].set_ylim([0, 20])
276
277        axes[1, 1].plot(x, identity_inverse, 'purple', linewidth=2)
278        axes[1, 1].set_xlabel('η (linear predictor)')
279        axes[1, 1].set_ylabel('μ (mean)')
280        axes[1, 1].set_title('Identity: μ = η')
281        axes[1, 1].grid(True, alpha=0.3)
282
283        plt.tight_layout()
284        plt.savefig('/tmp/link_functions.png', dpi=100)
285        print("[Plot saved to /tmp/link_functions.png]")
286        plt.close()
287
288
289def deviance_analysis():
290    """Demonstrate deviance analysis."""
291    print_section("4. Deviance Analysis")
292
293    # Generate data
294    np.random.seed(456)
295    n = 100
296    x1 = np.random.normal(0, 1, n)
297    x2 = np.random.normal(0, 1, n)
298
299    true_beta = np.array([0.5, 1.2, -0.8])
300    log_lambda = true_beta[0] + true_beta[1] * x1 + true_beta[2] * x2
301    y = np.random.poisson(np.exp(log_lambda))
302
303    print("Comparing nested Poisson regression models")
304
305    # Model 1: Intercept only
306    X1 = np.ones((n, 1))
307
308    # Model 2: Intercept + x1
309    X2 = np.column_stack([np.ones(n), x1])
310
311    # Model 3: Intercept + x1 + x2
312    X3 = np.column_stack([np.ones(n), x1, x2])
313
314    models = [
315        ("Null (intercept only)", X1),
316        ("x1", X2),
317        ("x1 + x2", X3)
318    ]
319
320    results = []
321
322    for name, X in models:
323        # Fit model
324        def neg_ll(beta):
325            log_lambda = X @ beta
326            return -np.sum(y * log_lambda - np.exp(log_lambda))
327
328        beta_init = np.zeros(X.shape[1])
329        result = minimize(neg_ll, beta_init, method='BFGS')
330        beta_hat = result.x
331
332        # Calculate deviance
333        log_lambda_hat = X @ beta_hat
334        lambda_hat = np.exp(log_lambda_hat)
335        y_safe = np.where(y == 0, 1, y)
336        deviance = 2 * np.sum(y * np.log(y_safe / lambda_hat) - (y - lambda_hat))
337
338        # AIC and BIC
339        k = X.shape[1]  # number of parameters
340        log_lik = -neg_ll(beta_hat)
341        aic = 2 * k - 2 * log_lik
342        bic = k * np.log(n) - 2 * log_lik
343
344        results.append({
345            'name': name,
346            'k': k,
347            'deviance': deviance,
348            'aic': aic,
349            'bic': bic,
350            'log_lik': log_lik
351        })
352
353    print("\nModel comparison:")
354    print(f"{'Model':<20} {'k':>3} {'Deviance':>12} {'AIC':>12} {'BIC':>12}")
355    print("-" * 70)
356
357    for r in results:
358        print(f"{r['name']:<20} {r['k']:>3} {r['deviance']:>12.2f} {r['aic']:>12.2f} {r['bic']:>12.2f}")
359
360    # Likelihood ratio test (Model 1 vs Model 3)
361    lr_statistic = -2 * (results[0]['log_lik'] - results[2]['log_lik'])
362    df = results[2]['k'] - results[0]['k']
363    p_value = 1 - stats.chi2.cdf(lr_statistic, df)
364
365    print(f"\nLikelihood ratio test (Null vs Full):")
366    print(f"  LR statistic: {lr_statistic:.4f}")
367    print(f"  df: {df}")
368    print(f"  p-value: {p_value:.6f}")
369
370    if p_value < 0.05:
371        print(f"  Full model significantly better (p < 0.05)")
372
373    # Best model by AIC
374    best_aic_idx = np.argmin([r['aic'] for r in results])
375    print(f"\nBest model by AIC: {results[best_aic_idx]['name']}")
376
377
378def model_comparison_aic_bic():
379    """Demonstrate AIC/BIC model comparison."""
380    print_section("5. AIC/BIC Model Comparison")
381
382    # Generate data with specific structure
383    np.random.seed(789)
384    n = 200
385    x1 = np.random.normal(0, 1, n)
386    x2 = np.random.normal(0, 1, n)
387    x3 = np.random.normal(0, 1, n)
388    x4 = np.random.normal(0, 1, n)  # Irrelevant
389
390    # True model: only x1 and x2 matter
391    z = -1 + 1.5*x1 + 0.8*x2
392    p = 1 / (1 + np.exp(-z))
393    y = (np.random.uniform(0, 1, n) < p).astype(int)
394
395    print(f"True model: logit(p) = -1 + 1.5*x1 + 0.8*x2")
396    print(f"x3, x4 are irrelevant")
397
398    # Candidate models
399    models = [
400        ("x1", np.column_stack([np.ones(n), x1])),
401        ("x1 + x2", np.column_stack([np.ones(n), x1, x2])),
402        ("x1 + x2 + x3", np.column_stack([np.ones(n), x1, x2, x3])),
403        ("x1 + x2 + x3 + x4", np.column_stack([np.ones(n), x1, x2, x3, x4]))
404    ]
405
406    results = []
407
408    def sigmoid(z):
409        return 1 / (1 + np.exp(-np.clip(z, -500, 500)))
410
411    for name, X in models:
412        # Fit
413        def neg_ll(beta):
414            p = sigmoid(X @ beta)
415            p = np.clip(p, 1e-15, 1 - 1e-15)
416            return -np.sum(y * np.log(p) + (1 - y) * np.log(1 - p))
417
418        beta_init = np.zeros(X.shape[1])
419        result = minimize(neg_ll, beta_init, method='BFGS')
420
421        k = X.shape[1]
422        log_lik = -neg_ll(result.x)
423        aic = 2 * k - 2 * log_lik
424        bic = k * np.log(n) - 2 * log_lik
425
426        results.append({'name': name, 'k': k, 'aic': aic, 'bic': bic})
427
428    print(f"\n{'Model':<20} {'k':>3} {'AIC':>12} {'BIC':>12}")
429    print("-" * 50)
430
431    for r in results:
432        print(f"{r['name']:<20} {r['k']:>3} {r['aic']:>12.2f} {r['bic']:>12.2f}")
433
434    best_aic_idx = np.argmin([r['aic'] for r in results])
435    best_bic_idx = np.argmin([r['bic'] for r in results])
436
437    print(f"\nBest model by AIC: {results[best_aic_idx]['name']}")
438    print(f"Best model by BIC: {results[best_bic_idx]['name']}")
439
440    print(f"\nInterpretation:")
441    print(f"  AIC prefers predictive accuracy")
442    print(f"  BIC penalizes complexity more (prefers simpler models)")
443    print(f"  Both should select x1+x2 (true model)")
444
445
446def main():
447    """Run all demonstrations."""
448    print("=" * 70)
449    print("  GENERALIZED LINEAR MODELS DEMONSTRATIONS")
450    print("=" * 70)
451
452    np.random.seed(42)
453
454    logistic_regression_scratch()
455    poisson_regression()
456    link_functions_demo()
457    deviance_analysis()
458    model_comparison_aic_bic()
459
460    print("\n" + "=" * 70)
461    print("  All demonstrations completed successfully!")
462    print("=" * 70)
463
464
465if __name__ == "__main__":
466    main()