01_probability_review.py

  1"""
  201_probability_review.py
  3
  4Demonstrates fundamental probability concepts:
  5- Common probability distributions (Normal, Binomial, Poisson, Exponential)
  6- PDF/CDF plotting
  7- Central Limit Theorem
  8- Law of Large Numbers
  9- Random sampling with numpy/scipy
 10"""
 11
 12import numpy as np
 13from scipy import stats
 14
 15# Optional plotting
 16try:
 17    import matplotlib.pyplot as plt
 18    HAS_PLT = True
 19except ImportError:
 20    HAS_PLT = False
 21    print("matplotlib not available; skipping plots\n")
 22
 23
 24def print_section(title):
 25    """Print formatted section header."""
 26    print("\n" + "=" * 70)
 27    print(f"  {title}")
 28    print("=" * 70)
 29
 30
 31def normal_distribution():
 32    """Demonstrate normal distribution properties."""
 33    print_section("1. Normal Distribution")
 34
 35    mu, sigma = 100, 15
 36    print(f"Normal distribution: μ={mu}, σ={sigma}")
 37
 38    # Generate samples
 39    samples = np.random.normal(mu, sigma, 10000)
 40    print(f"Generated {len(samples)} samples")
 41    print(f"Sample mean: {np.mean(samples):.2f}")
 42    print(f"Sample std: {np.std(samples, ddof=1):.2f}")
 43
 44    # PDF and CDF at specific points
 45    x_values = [70, 85, 100, 115, 130]
 46    print("\nPDF and CDF values:")
 47    for x in x_values:
 48        pdf = stats.norm.pdf(x, mu, sigma)
 49        cdf = stats.norm.cdf(x, mu, sigma)
 50        print(f"  x={x:3d}: PDF={pdf:.6f}, CDF={cdf:.4f}")
 51
 52    # Percentiles
 53    percentiles = [0.025, 0.25, 0.50, 0.75, 0.975]
 54    print("\nPercentiles:")
 55    for p in percentiles:
 56        val = stats.norm.ppf(p, mu, sigma)
 57        print(f"  {p*100:5.1f}%: {val:.2f}")
 58
 59    if HAS_PLT:
 60        x = np.linspace(mu - 4*sigma, mu + 4*sigma, 200)
 61        pdf = stats.norm.pdf(x, mu, sigma)
 62        cdf = stats.norm.cdf(x, mu, sigma)
 63
 64        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
 65        ax1.plot(x, pdf, 'b-', label=f'PDF (μ={mu}, σ={sigma})')
 66        ax1.fill_between(x, pdf, alpha=0.3)
 67        ax1.set_xlabel('x')
 68        ax1.set_ylabel('Probability Density')
 69        ax1.set_title('Normal Distribution PDF')
 70        ax1.legend()
 71        ax1.grid(True, alpha=0.3)
 72
 73        ax2.plot(x, cdf, 'r-', label='CDF')
 74        ax2.set_xlabel('x')
 75        ax2.set_ylabel('Cumulative Probability')
 76        ax2.set_title('Normal Distribution CDF')
 77        ax2.legend()
 78        ax2.grid(True, alpha=0.3)
 79
 80        plt.tight_layout()
 81        plt.savefig('/tmp/normal_dist.png', dpi=100)
 82        print("\n[Plot saved to /tmp/normal_dist.png]")
 83        plt.close()
 84
 85
 86def binomial_distribution():
 87    """Demonstrate binomial distribution."""
 88    print_section("2. Binomial Distribution")
 89
 90    n, p = 20, 0.3
 91    print(f"Binomial distribution: n={n}, p={p}")
 92
 93    # Theoretical properties
 94    mean = n * p
 95    variance = n * p * (1 - p)
 96    print(f"Theoretical mean: {mean:.2f}")
 97    print(f"Theoretical variance: {variance:.2f}")
 98
 99    # Generate samples
100    samples = np.random.binomial(n, p, 10000)
101    print(f"\nSample mean: {np.mean(samples):.2f}")
102    print(f"Sample variance: {np.var(samples, ddof=1):.2f}")
103
104    # PMF for different k values
105    print("\nProbability mass function:")
106    for k in range(0, n+1, 4):
107        pmf = stats.binom.pmf(k, n, p)
108        print(f"  P(X={k:2d}) = {pmf:.6f}")
109
110    # CDF
111    print("\nCumulative probabilities:")
112    for k in [3, 6, 9, 12]:
113        cdf = stats.binom.cdf(k, n, p)
114        print(f"  P(X≤{k:2d}) = {cdf:.6f}")
115
116    if HAS_PLT:
117        k = np.arange(0, n+1)
118        pmf = stats.binom.pmf(k, n, p)
119
120        plt.figure(figsize=(10, 5))
121        plt.bar(k, pmf, alpha=0.7, color='green', edgecolor='black')
122        plt.axvline(mean, color='red', linestyle='--', label=f'Mean={mean:.1f}')
123        plt.xlabel('k (number of successes)')
124        plt.ylabel('Probability')
125        plt.title(f'Binomial Distribution PMF (n={n}, p={p})')
126        plt.legend()
127        plt.grid(True, alpha=0.3)
128        plt.savefig('/tmp/binomial_dist.png', dpi=100)
129        print("\n[Plot saved to /tmp/binomial_dist.png]")
130        plt.close()
131
132
133def poisson_exponential():
134    """Demonstrate Poisson and Exponential distributions."""
135    print_section("3. Poisson and Exponential Distributions")
136
137    # Poisson
138    lambda_poisson = 5
139    print(f"Poisson distribution: λ={lambda_poisson}")
140
141    samples_poisson = np.random.poisson(lambda_poisson, 10000)
142    print(f"Sample mean: {np.mean(samples_poisson):.2f}")
143    print(f"Sample variance: {np.var(samples_poisson, ddof=1):.2f}")
144
145    print("\nPoisson PMF:")
146    for k in range(0, 11):
147        pmf = stats.poisson.pmf(k, lambda_poisson)
148        print(f"  P(X={k:2d}) = {pmf:.6f}")
149
150    # Exponential
151    lambda_exp = 0.5
152    print(f"\nExponential distribution: λ={lambda_exp}")
153
154    samples_exp = np.random.exponential(1/lambda_exp, 10000)
155    print(f"Theoretical mean: {1/lambda_exp:.2f}")
156    print(f"Sample mean: {np.mean(samples_exp):.2f}")
157
158    print("\nExponential CDF:")
159    for x in [0.5, 1.0, 2.0, 3.0, 5.0]:
160        cdf = stats.expon.cdf(x, scale=1/lambda_exp)
161        print(f"  P(X≤{x:.1f}) = {cdf:.6f}")
162
163    if HAS_PLT:
164        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
165
166        # Poisson
167        k = np.arange(0, 15)
168        pmf = stats.poisson.pmf(k, lambda_poisson)
169        ax1.bar(k, pmf, alpha=0.7, color='purple', edgecolor='black')
170        ax1.set_xlabel('k')
171        ax1.set_ylabel('Probability')
172        ax1.set_title(f'Poisson PMF (λ={lambda_poisson})')
173        ax1.grid(True, alpha=0.3)
174
175        # Exponential
176        x = np.linspace(0, 10, 200)
177        pdf = stats.expon.pdf(x, scale=1/lambda_exp)
178        ax2.plot(x, pdf, 'orange', linewidth=2)
179        ax2.fill_between(x, pdf, alpha=0.3, color='orange')
180        ax2.set_xlabel('x')
181        ax2.set_ylabel('Probability Density')
182        ax2.set_title(f'Exponential PDF (λ={lambda_exp})')
183        ax2.grid(True, alpha=0.3)
184
185        plt.tight_layout()
186        plt.savefig('/tmp/poisson_exp_dist.png', dpi=100)
187        print("\n[Plot saved to /tmp/poisson_exp_dist.png]")
188        plt.close()
189
190
191def central_limit_theorem():
192    """Demonstrate Central Limit Theorem."""
193    print_section("4. Central Limit Theorem")
194
195    # Use uniform distribution (clearly non-normal)
196    population = np.random.uniform(0, 10, 100000)
197    print(f"Population distribution: Uniform(0, 10)")
198    print(f"Population mean: {np.mean(population):.2f}")
199    print(f"Population std: {np.std(population):.2f}")
200
201    sample_sizes = [2, 5, 10, 30]
202    n_samples = 5000
203
204    print(f"\nSampling distribution of means (n_samples={n_samples}):")
205
206    sample_means_dict = {}
207    for n in sample_sizes:
208        sample_means = []
209        for _ in range(n_samples):
210            sample = np.random.choice(population, n, replace=True)
211            sample_means.append(np.mean(sample))
212
213        sample_means = np.array(sample_means)
214        sample_means_dict[n] = sample_means
215
216        print(f"\n  Sample size n={n}:")
217        print(f"    Mean of sample means: {np.mean(sample_means):.2f}")
218        print(f"    Std of sample means: {np.std(sample_means):.2f}")
219        print(f"    Theoretical std: {np.std(population)/np.sqrt(n):.2f}")
220
221    if HAS_PLT:
222        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
223        axes = axes.ravel()
224
225        for idx, n in enumerate(sample_sizes):
226            axes[idx].hist(sample_means_dict[n], bins=50, density=True,
227                          alpha=0.7, color='skyblue', edgecolor='black')
228
229            # Overlay normal distribution
230            mu = np.mean(sample_means_dict[n])
231            sigma = np.std(sample_means_dict[n])
232            x = np.linspace(mu - 4*sigma, mu + 4*sigma, 100)
233            axes[idx].plot(x, stats.norm.pdf(x, mu, sigma), 'r-', linewidth=2,
234                          label='Normal fit')
235
236            axes[idx].set_xlabel('Sample Mean')
237            axes[idx].set_ylabel('Density')
238            axes[idx].set_title(f'Sample Size n={n}')
239            axes[idx].legend()
240            axes[idx].grid(True, alpha=0.3)
241
242        plt.tight_layout()
243        plt.savefig('/tmp/clt_demo.png', dpi=100)
244        print("\n[Plot saved to /tmp/clt_demo.png]")
245        plt.close()
246
247
248def law_of_large_numbers():
249    """Demonstrate Law of Large Numbers."""
250    print_section("5. Law of Large Numbers")
251
252    # Die rolling example
253    true_mean = 3.5
254    print(f"Rolling a fair die (theoretical mean = {true_mean})")
255
256    n_rolls = 10000
257    rolls = np.random.randint(1, 7, n_rolls)
258
259    # Cumulative means
260    cumulative_means = np.cumsum(rolls) / np.arange(1, n_rolls + 1)
261
262    # Check convergence at different points
263    checkpoints = [10, 100, 1000, 5000, 10000]
264    print("\nCumulative mean convergence:")
265    for n in checkpoints:
266        mean_n = cumulative_means[n-1]
267        error = abs(mean_n - true_mean)
268        print(f"  n={n:5d}: mean={mean_n:.4f}, error={error:.4f}")
269
270    if HAS_PLT:
271        plt.figure(figsize=(12, 5))
272        plt.plot(cumulative_means, 'b-', alpha=0.7, linewidth=1)
273        plt.axhline(true_mean, color='red', linestyle='--', linewidth=2,
274                   label=f'True mean = {true_mean}')
275        plt.xlabel('Number of rolls')
276        plt.ylabel('Cumulative mean')
277        plt.title('Law of Large Numbers: Die Rolling')
278        plt.legend()
279        plt.grid(True, alpha=0.3)
280        plt.savefig('/tmp/lln_demo.png', dpi=100)
281        print("\n[Plot saved to /tmp/lln_demo.png]")
282        plt.close()
283
284
285def main():
286    """Run all demonstrations."""
287    print("=" * 70)
288    print("  PROBABILITY REVIEW DEMONSTRATIONS")
289    print("=" * 70)
290
291    np.random.seed(42)
292
293    normal_distribution()
294    binomial_distribution()
295    poisson_exponential()
296    central_limit_theorem()
297    law_of_large_numbers()
298
299    print("\n" + "=" * 70)
300    print("  All demonstrations completed successfully!")
301    print("=" * 70)
302
303
304if __name__ == "__main__":
305    main()