03_hypothesis_testing.py

Download
python 381 lines 11.2 KB
  1"""
  203_hypothesis_testing.py
  3
  4Demonstrates hypothesis testing methods:
  5- t-tests (one-sample, two-sample, paired)
  6- Chi-square tests
  7- ANOVA (one-way)
  8- p-value computation
  9- Statistical power analysis
 10- Multiple testing correction (Bonferroni, FDR)
 11"""
 12
 13import numpy as np
 14from scipy import stats
 15
 16try:
 17    import matplotlib.pyplot as plt
 18    HAS_PLT = True
 19except ImportError:
 20    HAS_PLT = False
 21    print("matplotlib not available; skipping plots\n")
 22
 23
 24def print_section(title):
 25    """Print formatted section header."""
 26    print("\n" + "=" * 70)
 27    print(f"  {title}")
 28    print("=" * 70)
 29
 30
 31def one_sample_t_test():
 32    """Demonstrate one-sample t-test."""
 33    print_section("1. One-Sample t-Test")
 34
 35    # Known population mean
 36    mu_0 = 100
 37
 38    # Generate sample with different true mean
 39    np.random.seed(42)
 40    sample = np.random.normal(105, 15, 50)
 41
 42    print(f"Null hypothesis: μ = {mu_0}")
 43    print(f"Alternative hypothesis: μ ≠ {mu_0}")
 44    print(f"\nSample size: {len(sample)}")
 45    print(f"Sample mean: {np.mean(sample):.2f}")
 46    print(f"Sample std: {np.std(sample, ddof=1):.2f}")
 47
 48    # Perform t-test
 49    t_stat, p_value = stats.ttest_1samp(sample, mu_0)
 50
 51    print(f"\nTest results:")
 52    print(f"  t-statistic: {t_stat:.4f}")
 53    print(f"  p-value: {p_value:.6f}")
 54    print(f"  degrees of freedom: {len(sample) - 1}")
 55
 56    alpha = 0.05
 57    print(f"\nDecision (α={alpha}):")
 58    if p_value < alpha:
 59        print(f"  Reject H₀ (p={p_value:.6f} < {alpha})")
 60        print(f"  Evidence suggests μ ≠ {mu_0}")
 61    else:
 62        print(f"  Fail to reject H₀ (p={p_value:.6f}{alpha})")
 63        print(f"  Insufficient evidence against μ = {mu_0}")
 64
 65    # Confidence interval
 66    ci = stats.t.interval(0.95, len(sample)-1,
 67                         loc=np.mean(sample),
 68                         scale=stats.sem(sample))
 69    print(f"\n95% Confidence Interval: [{ci[0]:.2f}, {ci[1]:.2f}]")
 70
 71
 72def two_sample_t_test():
 73    """Demonstrate two-sample t-test."""
 74    print_section("2. Two-Sample t-Test")
 75
 76    # Generate two samples
 77    np.random.seed(123)
 78    group1 = np.random.normal(100, 15, 40)
 79    group2 = np.random.normal(108, 15, 45)
 80
 81    print("Group 1:")
 82    print(f"  n = {len(group1)}")
 83    print(f"  mean = {np.mean(group1):.2f}")
 84    print(f"  std = {np.std(group1, ddof=1):.2f}")
 85
 86    print("\nGroup 2:")
 87    print(f"  n = {len(group2)}")
 88    print(f"  mean = {np.mean(group2):.2f}")
 89    print(f"  std = {np.std(group2, ddof=1):.2f}")
 90
 91    # Independent samples t-test (equal variance assumed)
 92    t_stat, p_value = stats.ttest_ind(group1, group2)
 93
 94    print(f"\nIndependent samples t-test (equal variance):")
 95    print(f"  t-statistic: {t_stat:.4f}")
 96    print(f"  p-value: {p_value:.6f}")
 97
 98    # Welch's t-test (unequal variance)
 99    t_stat_welch, p_value_welch = stats.ttest_ind(group1, group2, equal_var=False)
100
101    print(f"\nWelch's t-test (unequal variance):")
102    print(f"  t-statistic: {t_stat_welch:.4f}")
103    print(f"  p-value: {p_value_welch:.6f}")
104
105    # Effect size (Cohen's d)
106    pooled_std = np.sqrt((np.var(group1, ddof=1) + np.var(group2, ddof=1)) / 2)
107    cohens_d = (np.mean(group2) - np.mean(group1)) / pooled_std
108    print(f"\nEffect size (Cohen's d): {cohens_d:.4f}")
109
110
111def paired_t_test():
112    """Demonstrate paired t-test."""
113    print_section("3. Paired t-Test")
114
115    # Generate paired data (before/after)
116    np.random.seed(456)
117    n = 30
118    before = np.random.normal(120, 20, n)
119    treatment_effect = np.random.normal(10, 5, n)
120    after = before + treatment_effect
121
122    print(f"Sample size: {n} pairs")
123    print(f"\nBefore treatment:")
124    print(f"  Mean: {np.mean(before):.2f}")
125    print(f"  Std: {np.std(before, ddof=1):.2f}")
126
127    print(f"\nAfter treatment:")
128    print(f"  Mean: {np.mean(after):.2f}")
129    print(f"  Std: {np.std(after, ddof=1):.2f}")
130
131    # Paired t-test
132    t_stat, p_value = stats.ttest_rel(after, before)
133
134    differences = after - before
135    print(f"\nDifferences (after - before):")
136    print(f"  Mean: {np.mean(differences):.2f}")
137    print(f"  Std: {np.std(differences, ddof=1):.2f}")
138
139    print(f"\nPaired t-test results:")
140    print(f"  t-statistic: {t_stat:.4f}")
141    print(f"  p-value: {p_value:.6f}")
142
143    if p_value < 0.05:
144        print(f"\n  Significant difference detected (p < 0.05)")
145    else:
146        print(f"\n  No significant difference (p ≥ 0.05)")
147
148
149def chi_square_test():
150    """Demonstrate chi-square test."""
151    print_section("4. Chi-Square Test")
152
153    # Goodness of fit test
154    print("Chi-square goodness of fit test:")
155    print("Testing if a die is fair\n")
156
157    observed = np.array([48, 52, 45, 53, 47, 55])
158    expected = np.array([50, 50, 50, 50, 50, 50])
159
160    print("Observed frequencies:", observed)
161    print("Expected frequencies:", expected)
162
163    chi2_stat, p_value = stats.chisquare(observed, expected)
164
165    print(f"\nχ² statistic: {chi2_stat:.4f}")
166    print(f"p-value: {p_value:.6f}")
167    print(f"Degrees of freedom: {len(observed) - 1}")
168
169    if p_value > 0.05:
170        print(f"\nNo evidence against fairness (p = {p_value:.4f})")
171
172    # Contingency table test
173    print("\n" + "-" * 70)
174    print("Chi-square test of independence:")
175    print("Testing relationship between treatment and outcome\n")
176
177    # Contingency table: rows=treatment, columns=outcome
178    contingency_table = np.array([
179        [20, 30],  # Treatment A: success, failure
180        [35, 15]   # Treatment B: success, failure
181    ])
182
183    print("Contingency table:")
184    print("                Success  Failure")
185    print(f"Treatment A:       {contingency_table[0,0]}       {contingency_table[0,1]}")
186    print(f"Treatment B:       {contingency_table[1,0]}       {contingency_table[1,1]}")
187
188    chi2_stat, p_value, dof, expected_freq = stats.chi2_contingency(contingency_table)
189
190    print(f"\nχ² statistic: {chi2_stat:.4f}")
191    print(f"p-value: {p_value:.6f}")
192    print(f"Degrees of freedom: {dof}")
193
194    print("\nExpected frequencies:")
195    print(expected_freq)
196
197    if p_value < 0.05:
198        print(f"\nSignificant association detected (p = {p_value:.4f})")
199
200
201def one_way_anova():
202    """Demonstrate one-way ANOVA."""
203    print_section("5. One-Way ANOVA")
204
205    # Generate three groups with different means
206    np.random.seed(789)
207    group1 = np.random.normal(100, 15, 30)
208    group2 = np.random.normal(105, 15, 30)
209    group3 = np.random.normal(112, 15, 30)
210
211    print("Group statistics:")
212    for i, group in enumerate([group1, group2, group3], 1):
213        print(f"\n  Group {i}:")
214        print(f"    n = {len(group)}")
215        print(f"    mean = {np.mean(group):.2f}")
216        print(f"    std = {np.std(group, ddof=1):.2f}")
217
218    # Perform ANOVA
219    f_stat, p_value = stats.f_oneway(group1, group2, group3)
220
221    print(f"\nOne-way ANOVA results:")
222    print(f"  F-statistic: {f_stat:.4f}")
223    print(f"  p-value: {p_value:.6f}")
224
225    if p_value < 0.05:
226        print(f"\n  Significant differences among groups (p < 0.05)")
227        print(f"  At least one group mean differs from others")
228    else:
229        print(f"\n  No significant differences (p ≥ 0.05)")
230
231    # Calculate effect size (eta-squared)
232    all_data = np.concatenate([group1, group2, group3])
233    grand_mean = np.mean(all_data)
234
235    ss_between = sum(len(g) * (np.mean(g) - grand_mean)**2
236                    for g in [group1, group2, group3])
237    ss_total = np.sum((all_data - grand_mean)**2)
238    eta_squared = ss_between / ss_total
239
240    print(f"\nEffect size (η²): {eta_squared:.4f}")
241
242
243def power_analysis():
244    """Demonstrate statistical power analysis."""
245    print_section("6. Statistical Power Analysis")
246
247    # Parameters
248    effect_size = 0.5  # Cohen's d
249    alpha = 0.05
250    sample_sizes = [10, 20, 30, 50, 100, 200]
251
252    print(f"Effect size (Cohen's d): {effect_size}")
253    print(f"Significance level (α): {alpha}")
254    print(f"\nPower analysis for two-sample t-test:")
255
256    # Simulate power for different sample sizes
257    print(f"\n  {'n':>5} {'Power':>8} {'Description':>15}")
258    print(f"  {'-'*5} {'-'*8} {'-'*15}")
259
260    for n in sample_sizes:
261        # Monte Carlo estimation of power
262        n_simulations = 5000
263        rejections = 0
264
265        for _ in range(n_simulations):
266            # Generate data with true effect
267            group1 = np.random.normal(0, 1, n)
268            group2 = np.random.normal(effect_size, 1, n)
269
270            _, p_value = stats.ttest_ind(group1, group2)
271            if p_value < alpha:
272                rejections += 1
273
274        power = rejections / n_simulations
275
276        # Power interpretation
277        if power < 0.5:
278            desc = "Underpowered"
279        elif power < 0.8:
280            desc = "Moderate"
281        elif power < 0.95:
282            desc = "Good"
283        else:
284            desc = "Excellent"
285
286        print(f"  {n:5d} {power:8.3f} {desc:>15}")
287
288
289def multiple_testing_correction():
290    """Demonstrate multiple testing correction."""
291    print_section("7. Multiple Testing Correction")
292
293    # Simulate multiple hypothesis tests
294    np.random.seed(999)
295    n_tests = 20
296    alpha = 0.05
297
298    print(f"Number of tests: {n_tests}")
299    print(f"Nominal α: {alpha}")
300
301    # Generate p-values (15 null true, 5 alternative true)
302    p_values = []
303
304    # Null true (no effect)
305    for _ in range(15):
306        sample1 = np.random.normal(0, 1, 30)
307        sample2 = np.random.normal(0, 1, 30)
308        _, p = stats.ttest_ind(sample1, sample2)
309        p_values.append(p)
310
311    # Alternative true (real effect)
312    for _ in range(5):
313        sample1 = np.random.normal(0, 1, 30)
314        sample2 = np.random.normal(0.8, 1, 30)
315        _, p = stats.ttest_ind(sample1, sample2)
316        p_values.append(p)
317
318    p_values = np.array(p_values)
319
320    print(f"\nUncorrected results:")
321    print(f"  Significant tests (p < {alpha}): {np.sum(p_values < alpha)}")
322    print(f"  Smallest p-value: {np.min(p_values):.6f}")
323    print(f"  Largest p-value: {np.max(p_values):.6f}")
324
325    # Bonferroni correction
326    bonferroni_alpha = alpha / n_tests
327    bonferroni_rejections = np.sum(p_values < bonferroni_alpha)
328
329    print(f"\nBonferroni correction:")
330    print(f"  Adjusted α: {bonferroni_alpha:.6f}")
331    print(f"  Significant tests: {bonferroni_rejections}")
332
333    # Benjamini-Hochberg (FDR)
334    sorted_p = np.sort(p_values)
335    sorted_indices = np.argsort(p_values)
336
337    # Find largest i where p(i) <= (i/n) * alpha
338    fdr_threshold = 0
339    fdr_rejections = 0
340
341    for i in range(n_tests):
342        threshold = ((i + 1) / n_tests) * alpha
343        if sorted_p[i] <= threshold:
344            fdr_threshold = sorted_p[i]
345            fdr_rejections = i + 1
346
347    print(f"\nBenjamini-Hochberg (FDR) correction:")
348    print(f"  FDR level: {alpha}")
349    print(f"  Threshold p-value: {fdr_threshold:.6f}")
350    print(f"  Significant tests: {fdr_rejections}")
351
352    print(f"\nSummary:")
353    print(f"  No correction: {np.sum(p_values < alpha)} rejections")
354    print(f"  Bonferroni: {bonferroni_rejections} rejections (conservative)")
355    print(f"  FDR: {fdr_rejections} rejections (balanced)")
356
357
358def main():
359    """Run all demonstrations."""
360    print("=" * 70)
361    print("  HYPOTHESIS TESTING DEMONSTRATIONS")
362    print("=" * 70)
363
364    np.random.seed(42)
365
366    one_sample_t_test()
367    two_sample_t_test()
368    paired_t_test()
369    chi_square_test()
370    one_way_anova()
371    power_analysis()
372    multiple_testing_correction()
373
374    print("\n" + "=" * 70)
375    print("  All demonstrations completed successfully!")
376    print("=" * 70)
377
378
379if __name__ == "__main__":
380    main()