1"""
201_probability_review.py
3
4Demonstrates fundamental probability concepts:
5- Common probability distributions (Normal, Binomial, Poisson, Exponential)
6- PDF/CDF plotting
7- Central Limit Theorem
8- Law of Large Numbers
9- Random sampling with numpy/scipy
10"""
11
12import numpy as np
13from scipy import stats
14
15# Optional plotting
16try:
17 import matplotlib.pyplot as plt
18 HAS_PLT = True
19except ImportError:
20 HAS_PLT = False
21 print("matplotlib not available; skipping plots\n")
22
23
24def print_section(title):
25 """Print formatted section header."""
26 print("\n" + "=" * 70)
27 print(f" {title}")
28 print("=" * 70)
29
30
31def normal_distribution():
32 """Demonstrate normal distribution properties."""
33 print_section("1. Normal Distribution")
34
35 mu, sigma = 100, 15
36 print(f"Normal distribution: μ={mu}, σ={sigma}")
37
38 # Generate samples
39 samples = np.random.normal(mu, sigma, 10000)
40 print(f"Generated {len(samples)} samples")
41 print(f"Sample mean: {np.mean(samples):.2f}")
42 print(f"Sample std: {np.std(samples, ddof=1):.2f}")
43
44 # PDF and CDF at specific points
45 x_values = [70, 85, 100, 115, 130]
46 print("\nPDF and CDF values:")
47 for x in x_values:
48 pdf = stats.norm.pdf(x, mu, sigma)
49 cdf = stats.norm.cdf(x, mu, sigma)
50 print(f" x={x:3d}: PDF={pdf:.6f}, CDF={cdf:.4f}")
51
52 # Percentiles
53 percentiles = [0.025, 0.25, 0.50, 0.75, 0.975]
54 print("\nPercentiles:")
55 for p in percentiles:
56 val = stats.norm.ppf(p, mu, sigma)
57 print(f" {p*100:5.1f}%: {val:.2f}")
58
59 if HAS_PLT:
60 x = np.linspace(mu - 4*sigma, mu + 4*sigma, 200)
61 pdf = stats.norm.pdf(x, mu, sigma)
62 cdf = stats.norm.cdf(x, mu, sigma)
63
64 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
65 ax1.plot(x, pdf, 'b-', label=f'PDF (μ={mu}, σ={sigma})')
66 ax1.fill_between(x, pdf, alpha=0.3)
67 ax1.set_xlabel('x')
68 ax1.set_ylabel('Probability Density')
69 ax1.set_title('Normal Distribution PDF')
70 ax1.legend()
71 ax1.grid(True, alpha=0.3)
72
73 ax2.plot(x, cdf, 'r-', label='CDF')
74 ax2.set_xlabel('x')
75 ax2.set_ylabel('Cumulative Probability')
76 ax2.set_title('Normal Distribution CDF')
77 ax2.legend()
78 ax2.grid(True, alpha=0.3)
79
80 plt.tight_layout()
81 plt.savefig('/tmp/normal_dist.png', dpi=100)
82 print("\n[Plot saved to /tmp/normal_dist.png]")
83 plt.close()
84
85
86def binomial_distribution():
87 """Demonstrate binomial distribution."""
88 print_section("2. Binomial Distribution")
89
90 n, p = 20, 0.3
91 print(f"Binomial distribution: n={n}, p={p}")
92
93 # Theoretical properties
94 mean = n * p
95 variance = n * p * (1 - p)
96 print(f"Theoretical mean: {mean:.2f}")
97 print(f"Theoretical variance: {variance:.2f}")
98
99 # Generate samples
100 samples = np.random.binomial(n, p, 10000)
101 print(f"\nSample mean: {np.mean(samples):.2f}")
102 print(f"Sample variance: {np.var(samples, ddof=1):.2f}")
103
104 # PMF for different k values
105 print("\nProbability mass function:")
106 for k in range(0, n+1, 4):
107 pmf = stats.binom.pmf(k, n, p)
108 print(f" P(X={k:2d}) = {pmf:.6f}")
109
110 # CDF
111 print("\nCumulative probabilities:")
112 for k in [3, 6, 9, 12]:
113 cdf = stats.binom.cdf(k, n, p)
114 print(f" P(X≤{k:2d}) = {cdf:.6f}")
115
116 if HAS_PLT:
117 k = np.arange(0, n+1)
118 pmf = stats.binom.pmf(k, n, p)
119
120 plt.figure(figsize=(10, 5))
121 plt.bar(k, pmf, alpha=0.7, color='green', edgecolor='black')
122 plt.axvline(mean, color='red', linestyle='--', label=f'Mean={mean:.1f}')
123 plt.xlabel('k (number of successes)')
124 plt.ylabel('Probability')
125 plt.title(f'Binomial Distribution PMF (n={n}, p={p})')
126 plt.legend()
127 plt.grid(True, alpha=0.3)
128 plt.savefig('/tmp/binomial_dist.png', dpi=100)
129 print("\n[Plot saved to /tmp/binomial_dist.png]")
130 plt.close()
131
132
133def poisson_exponential():
134 """Demonstrate Poisson and Exponential distributions."""
135 print_section("3. Poisson and Exponential Distributions")
136
137 # Poisson
138 lambda_poisson = 5
139 print(f"Poisson distribution: λ={lambda_poisson}")
140
141 samples_poisson = np.random.poisson(lambda_poisson, 10000)
142 print(f"Sample mean: {np.mean(samples_poisson):.2f}")
143 print(f"Sample variance: {np.var(samples_poisson, ddof=1):.2f}")
144
145 print("\nPoisson PMF:")
146 for k in range(0, 11):
147 pmf = stats.poisson.pmf(k, lambda_poisson)
148 print(f" P(X={k:2d}) = {pmf:.6f}")
149
150 # Exponential
151 lambda_exp = 0.5
152 print(f"\nExponential distribution: λ={lambda_exp}")
153
154 samples_exp = np.random.exponential(1/lambda_exp, 10000)
155 print(f"Theoretical mean: {1/lambda_exp:.2f}")
156 print(f"Sample mean: {np.mean(samples_exp):.2f}")
157
158 print("\nExponential CDF:")
159 for x in [0.5, 1.0, 2.0, 3.0, 5.0]:
160 cdf = stats.expon.cdf(x, scale=1/lambda_exp)
161 print(f" P(X≤{x:.1f}) = {cdf:.6f}")
162
163 if HAS_PLT:
164 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
165
166 # Poisson
167 k = np.arange(0, 15)
168 pmf = stats.poisson.pmf(k, lambda_poisson)
169 ax1.bar(k, pmf, alpha=0.7, color='purple', edgecolor='black')
170 ax1.set_xlabel('k')
171 ax1.set_ylabel('Probability')
172 ax1.set_title(f'Poisson PMF (λ={lambda_poisson})')
173 ax1.grid(True, alpha=0.3)
174
175 # Exponential
176 x = np.linspace(0, 10, 200)
177 pdf = stats.expon.pdf(x, scale=1/lambda_exp)
178 ax2.plot(x, pdf, 'orange', linewidth=2)
179 ax2.fill_between(x, pdf, alpha=0.3, color='orange')
180 ax2.set_xlabel('x')
181 ax2.set_ylabel('Probability Density')
182 ax2.set_title(f'Exponential PDF (λ={lambda_exp})')
183 ax2.grid(True, alpha=0.3)
184
185 plt.tight_layout()
186 plt.savefig('/tmp/poisson_exp_dist.png', dpi=100)
187 print("\n[Plot saved to /tmp/poisson_exp_dist.png]")
188 plt.close()
189
190
191def central_limit_theorem():
192 """Demonstrate Central Limit Theorem."""
193 print_section("4. Central Limit Theorem")
194
195 # Use uniform distribution (clearly non-normal)
196 population = np.random.uniform(0, 10, 100000)
197 print(f"Population distribution: Uniform(0, 10)")
198 print(f"Population mean: {np.mean(population):.2f}")
199 print(f"Population std: {np.std(population):.2f}")
200
201 sample_sizes = [2, 5, 10, 30]
202 n_samples = 5000
203
204 print(f"\nSampling distribution of means (n_samples={n_samples}):")
205
206 sample_means_dict = {}
207 for n in sample_sizes:
208 sample_means = []
209 for _ in range(n_samples):
210 sample = np.random.choice(population, n, replace=True)
211 sample_means.append(np.mean(sample))
212
213 sample_means = np.array(sample_means)
214 sample_means_dict[n] = sample_means
215
216 print(f"\n Sample size n={n}:")
217 print(f" Mean of sample means: {np.mean(sample_means):.2f}")
218 print(f" Std of sample means: {np.std(sample_means):.2f}")
219 print(f" Theoretical std: {np.std(population)/np.sqrt(n):.2f}")
220
221 if HAS_PLT:
222 fig, axes = plt.subplots(2, 2, figsize=(12, 10))
223 axes = axes.ravel()
224
225 for idx, n in enumerate(sample_sizes):
226 axes[idx].hist(sample_means_dict[n], bins=50, density=True,
227 alpha=0.7, color='skyblue', edgecolor='black')
228
229 # Overlay normal distribution
230 mu = np.mean(sample_means_dict[n])
231 sigma = np.std(sample_means_dict[n])
232 x = np.linspace(mu - 4*sigma, mu + 4*sigma, 100)
233 axes[idx].plot(x, stats.norm.pdf(x, mu, sigma), 'r-', linewidth=2,
234 label='Normal fit')
235
236 axes[idx].set_xlabel('Sample Mean')
237 axes[idx].set_ylabel('Density')
238 axes[idx].set_title(f'Sample Size n={n}')
239 axes[idx].legend()
240 axes[idx].grid(True, alpha=0.3)
241
242 plt.tight_layout()
243 plt.savefig('/tmp/clt_demo.png', dpi=100)
244 print("\n[Plot saved to /tmp/clt_demo.png]")
245 plt.close()
246
247
248def law_of_large_numbers():
249 """Demonstrate Law of Large Numbers."""
250 print_section("5. Law of Large Numbers")
251
252 # Die rolling example
253 true_mean = 3.5
254 print(f"Rolling a fair die (theoretical mean = {true_mean})")
255
256 n_rolls = 10000
257 rolls = np.random.randint(1, 7, n_rolls)
258
259 # Cumulative means
260 cumulative_means = np.cumsum(rolls) / np.arange(1, n_rolls + 1)
261
262 # Check convergence at different points
263 checkpoints = [10, 100, 1000, 5000, 10000]
264 print("\nCumulative mean convergence:")
265 for n in checkpoints:
266 mean_n = cumulative_means[n-1]
267 error = abs(mean_n - true_mean)
268 print(f" n={n:5d}: mean={mean_n:.4f}, error={error:.4f}")
269
270 if HAS_PLT:
271 plt.figure(figsize=(12, 5))
272 plt.plot(cumulative_means, 'b-', alpha=0.7, linewidth=1)
273 plt.axhline(true_mean, color='red', linestyle='--', linewidth=2,
274 label=f'True mean = {true_mean}')
275 plt.xlabel('Number of rolls')
276 plt.ylabel('Cumulative mean')
277 plt.title('Law of Large Numbers: Die Rolling')
278 plt.legend()
279 plt.grid(True, alpha=0.3)
280 plt.savefig('/tmp/lln_demo.png', dpi=100)
281 print("\n[Plot saved to /tmp/lln_demo.png]")
282 plt.close()
283
284
285def main():
286 """Run all demonstrations."""
287 print("=" * 70)
288 print(" PROBABILITY REVIEW DEMONSTRATIONS")
289 print("=" * 70)
290
291 np.random.seed(42)
292
293 normal_distribution()
294 binomial_distribution()
295 poisson_exponential()
296 central_limit_theorem()
297 law_of_large_numbers()
298
299 print("\n" + "=" * 70)
300 print(" All demonstrations completed successfully!")
301 print("=" * 70)
302
303
304if __name__ == "__main__":
305 main()