13. ์‹ ๋ขฐ๊ตฌ๊ฐ„ (Confidence Intervals)

13. ์‹ ๋ขฐ๊ตฌ๊ฐ„ (Confidence Intervals)

์ด์ „: ํ‘œ๋ณธ๊ณผ ์ถ”์ • | ๋‹ค์Œ: ๊ฐ€์„ค๊ฒ€์ • ์‹ฌํ™”

๊ฐœ์š”

์‹ ๋ขฐ๊ตฌ๊ฐ„(Confidence Interval, CI)์€ ๋ชจ์ˆ˜๊ฐ€ ํฌํ•จ๋  ๊ฒƒ์œผ๋กœ ๊ธฐ๋Œ€๋˜๋Š” ๊ตฌ๊ฐ„์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. ์ ์ถ”์ •์ด "๋‹จ์ผ ๊ฐ’"์„ ์ œ๊ณตํ•œ๋‹ค๋ฉด, ๊ตฌ๊ฐ„์ถ”์ •์€ "๋ถˆํ™•์‹ค์„ฑ์˜ ๋ฒ”์œ„"๋ฅผ ํ•จ๊ป˜ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.


1. ๊ตฌ๊ฐ„์ถ”์ •์˜ ๊ธฐ๋ณธ ๊ฐœ๋…

1.1 ์‹ ๋ขฐ๊ตฌ๊ฐ„์˜ ์ •์˜

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns

# ์‹ ๋ขฐ๊ตฌ๊ฐ„์˜ ์˜ฌ๋ฐ”๋ฅธ ํ•ด์„
# "95% ์‹ ๋ขฐ๊ตฌ๊ฐ„"์˜ ์˜๋ฏธ:
# - ๋™์ผํ•œ ๋ฐฉ๋ฒ•์œผ๋กœ 100๋ฒˆ ํ‘œ๋ณธ์ถ”์ถœํ•˜์—ฌ ๊ตฌ๊ฐ„์„ ๊ตฌํ•˜๋ฉด,
# - ์•ฝ 95๋ฒˆ์€ ๋ชจ์ˆ˜๋ฅผ ํฌํ•จํ•  ๊ฒƒ์ด๋‹ค

# ์‹œ๋ฎฌ๋ ˆ์ด์…˜์œผ๋กœ ์ดํ•ดํ•˜๊ธฐ
np.random.seed(42)

# ๋ชจ์ˆ˜ ์„ค์ • (์‹ค์ œ๋กœ๋Š” ๋ฏธ์ง€)
mu = 100
sigma = 15
n = 30
confidence_level = 0.95

# 100๋ฒˆ ํ‘œ๋ณธ ์ถ”์ถœํ•˜์—ฌ ์‹ ๋ขฐ๊ตฌ๊ฐ„ ๊ณ„์‚ฐ
n_simulations = 100
intervals = []
contains_mu = 0

z_critical = stats.norm.ppf((1 + confidence_level) / 2)

for i in range(n_simulations):
    sample = np.random.normal(mu, sigma, n)
    x_bar = sample.mean()
    se = sigma / np.sqrt(n)  # ๋ชจํ‘œ์ค€ํŽธ์ฐจ๋ฅผ ์•ˆ๋‹ค๊ณ  ๊ฐ€์ •

    lower = x_bar - z_critical * se
    upper = x_bar + z_critical * se

    intervals.append((lower, upper))
    if lower <= mu <= upper:
        contains_mu += 1

print(f"100๊ฐœ์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„ ์ค‘ ๋ชจํ‰๊ท ์„ ํฌํ•จํ•˜๋Š” ๊ตฌ๊ฐ„: {contains_mu}๊ฐœ")

# ์‹œ๊ฐํ™” (์ฒซ 20๊ฐœ ๊ตฌ๊ฐ„)
fig, ax = plt.subplots(figsize=(12, 8))

for i in range(20):
    lower, upper = intervals[i]
    color = 'blue' if lower <= mu <= upper else 'red'
    ax.plot([lower, upper], [i, i], color=color, linewidth=2)
    ax.scatter([(lower + upper)/2], [i], color=color, s=30)

ax.axvline(mu, color='green', linestyle='--', linewidth=2, label=f'ฮผ = {mu}')
ax.set_xlabel('๊ฐ’')
ax.set_ylabel('์‹œ๋ฎฌ๋ ˆ์ด์…˜ ๋ฒˆํ˜ธ')
ax.set_title('์‹ ๋ขฐ๊ตฌ๊ฐ„์˜ ์˜๋ฏธ: 20๊ฐœ ์ค‘ ๋ชจํ‰๊ท ์„ ํฌํ•จํ•˜์ง€ ์•Š๋Š” ๊ตฌ๊ฐ„(๋นจ๊ฐ•)')
ax.legend()
ax.set_xlim(90, 110)
plt.show()

1.2 ์‹ ๋ขฐ์ˆ˜์ค€๊ณผ ๊ตฌ๊ฐ„ ํญ์˜ ๊ด€๊ณ„

# ์‹ ๋ขฐ์ˆ˜์ค€ โ†‘ โ†’ ๊ตฌ๊ฐ„ ํญ โ†‘
# ํ‘œ๋ณธ ํฌ๊ธฐ โ†‘ โ†’ ๊ตฌ๊ฐ„ ํญ โ†“

np.random.seed(42)
sample = np.random.normal(100, 15, 50)
x_bar = sample.mean()
s = sample.std(ddof=1)
n = len(sample)
se = s / np.sqrt(n)

confidence_levels = [0.80, 0.90, 0.95, 0.99]

print("์‹ ๋ขฐ์ˆ˜์ค€์— ๋”ฐ๋ฅธ ์‹ ๋ขฐ๊ตฌ๊ฐ„:")
print("-" * 60)
print(f"ํ‘œ๋ณธํ‰๊ท  = {x_bar:.2f}, ํ‘œ์ค€์˜ค์ฐจ = {se:.2f}")
print("-" * 60)

for cl in confidence_levels:
    t_critical = stats.t.ppf((1 + cl) / 2, df=n-1)
    margin = t_critical * se
    lower = x_bar - margin
    upper = x_bar + margin
    width = upper - lower
    print(f"{int(cl*100)}% CI: [{lower:.2f}, {upper:.2f}], ํญ = {width:.2f}")

# ์‹œ๊ฐํ™”
fig, ax = plt.subplots(figsize=(10, 5))

colors = plt.cm.viridis(np.linspace(0.2, 0.8, len(confidence_levels)))
for i, (cl, color) in enumerate(zip(confidence_levels, colors)):
    t_critical = stats.t.ppf((1 + cl) / 2, df=n-1)
    margin = t_critical * se
    ax.barh(i, 2*margin, left=x_bar-margin, height=0.6, color=color,
            label=f'{int(cl*100)}% CI', alpha=0.7)

ax.axvline(x_bar, color='red', linestyle='-', linewidth=2, label='ํ‘œ๋ณธํ‰๊ท ')
ax.set_yticks(range(len(confidence_levels)))
ax.set_yticklabels([f'{int(cl*100)}%' for cl in confidence_levels])
ax.set_xlabel('๊ฐ’')
ax.set_ylabel('์‹ ๋ขฐ์ˆ˜์ค€')
ax.set_title('์‹ ๋ขฐ์ˆ˜์ค€๊ณผ ๊ตฌ๊ฐ„ ํญ์˜ ๊ด€๊ณ„')
ax.legend(loc='upper right')
plt.show()

2. ๋ชจํ‰๊ท ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„

2.1 ๋ชจ๋ถ„์‚ฐ์ด ์•Œ๋ ค์ง„ ๊ฒฝ์šฐ (Z-๊ตฌ๊ฐ„)

def ci_mean_z(sample, sigma, confidence=0.95):
    """๋ชจ๋ถ„์‚ฐ์ด ์•Œ๋ ค์ง„ ๊ฒฝ์šฐ ๋ชจํ‰๊ท ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„ (Z-๊ตฌ๊ฐ„)"""
    n = len(sample)
    x_bar = np.mean(sample)
    se = sigma / np.sqrt(n)

    z_critical = stats.norm.ppf((1 + confidence) / 2)
    margin = z_critical * se

    return x_bar - margin, x_bar + margin, margin

# ์˜ˆ์‹œ: ์ œ์กฐ ๊ณต์ •์˜ ๋ถˆ๋Ÿ‰๋ฅ  ๋ชจ๋‹ˆํ„ฐ๋ง
# ๊ณผ๊ฑฐ ๋ฐ์ดํ„ฐ๋กœ ฯƒ = 2.5์ž„์„ ์•Œ๊ณ  ์žˆ์Œ
np.random.seed(42)
sigma_known = 2.5
sample = np.random.normal(50, sigma_known, 40)

lower, upper, margin = ci_mean_z(sample, sigma_known, 0.95)

print("๋ชจ๋ถ„์‚ฐ์ด ์•Œ๋ ค์ง„ ๊ฒฝ์šฐ (Z-๊ตฌ๊ฐ„):")
print(f"ํ‘œ๋ณธํ‰๊ท : {sample.mean():.3f}")
print(f"95% ์‹ ๋ขฐ๊ตฌ๊ฐ„: [{lower:.3f}, {upper:.3f}]")
print(f"์˜ค์ฐจ ํ•œ๊ณ„: ยฑ{margin:.3f}")

2.2 ๋ชจ๋ถ„์‚ฐ์ด ์•Œ๋ ค์ง€์ง€ ์•Š์€ ๊ฒฝ์šฐ (t-๊ตฌ๊ฐ„)

def ci_mean_t(sample, confidence=0.95):
    """๋ชจ๋ถ„์‚ฐ์ด ๋ฏธ์ง€์ธ ๊ฒฝ์šฐ ๋ชจํ‰๊ท ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„ (t-๊ตฌ๊ฐ„)"""
    n = len(sample)
    x_bar = np.mean(sample)
    s = np.std(sample, ddof=1)
    se = s / np.sqrt(n)

    t_critical = stats.t.ppf((1 + confidence) / 2, df=n-1)
    margin = t_critical * se

    return x_bar - margin, x_bar + margin, margin

# ์˜ˆ์‹œ
np.random.seed(42)
sample = np.random.normal(50, 2.5, 40)

lower_t, upper_t, margin_t = ci_mean_t(sample, 0.95)

print("๋ชจ๋ถ„์‚ฐ์ด ๋ฏธ์ง€์ธ ๊ฒฝ์šฐ (t-๊ตฌ๊ฐ„):")
print(f"ํ‘œ๋ณธํ‰๊ท : {sample.mean():.3f}")
print(f"ํ‘œ๋ณธํ‘œ์ค€ํŽธ์ฐจ: {sample.std(ddof=1):.3f}")
print(f"95% ์‹ ๋ขฐ๊ตฌ๊ฐ„: [{lower_t:.3f}, {upper_t:.3f}]")
print(f"์˜ค์ฐจ ํ•œ๊ณ„: ยฑ{margin_t:.3f}")

# scipy.stats ํ™œ์šฉ
sem = stats.sem(sample)  # ํ‘œ์ค€์˜ค์ฐจ
ci_scipy = stats.t.interval(0.95, df=len(sample)-1, loc=sample.mean(), scale=sem)
print(f"\nscipy ๊ฒฐ๊ณผ: [{ci_scipy[0]:.3f}, {ci_scipy[1]:.3f}]")

2.3 t-๋ถ„ํฌ ์ดํ•ด

# t-๋ถ„ํฌ: ํ‘œ๋ณธ ํฌ๊ธฐ๊ฐ€ ์ž‘์„ ๋•Œ ์‚ฌ์šฉ
# t = (Xฬ„ - ฮผ) / (S/โˆšn) ~ t(n-1)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# t-๋ถ„ํฌ vs ์ •๊ทœ๋ถ„ํฌ
x = np.linspace(-4, 4, 200)
axes[0].plot(x, stats.norm.pdf(x), 'b-', lw=2, label='N(0,1)')

dfs = [3, 10, 30]
colors = ['red', 'green', 'purple']
for df, color in zip(dfs, colors):
    axes[0].plot(x, stats.t.pdf(x, df), linestyle='--', lw=2,
                 color=color, label=f't(df={df})')

axes[0].set_xlabel('x')
axes[0].set_ylabel('๋ฐ€๋„')
axes[0].set_title('t-๋ถ„ํฌ vs ํ‘œ์ค€์ •๊ทœ๋ถ„ํฌ')
axes[0].legend()
axes[0].grid(alpha=0.3)

# ์ž์œ ๋„์— ๋”ฐ๋ฅธ ์ž„๊ณ„๊ฐ’ ๋ณ€ํ™”
dfs_range = np.arange(2, 101)
t_criticals = [stats.t.ppf(0.975, df) for df in dfs_range]
z_critical = stats.norm.ppf(0.975)

axes[1].plot(dfs_range, t_criticals, 'b-', lw=2, label='t(0.975, df)')
axes[1].axhline(z_critical, color='r', linestyle='--', lw=2,
                label=f'z(0.975) = {z_critical:.3f}')
axes[1].set_xlabel('์ž์œ ๋„ (df)')
axes[1].set_ylabel('์ž„๊ณ„๊ฐ’')
axes[1].set_title('์ž์œ ๋„์— ๋”ฐ๋ฅธ 95% ์ž„๊ณ„๊ฐ’')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

# ํ‘œ๋กœ ์ •๋ฆฌ
print("์ž์œ ๋„์— ๋”ฐ๋ฅธ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„ ์ž„๊ณ„๊ฐ’:")
print("-" * 30)
for df in [5, 10, 20, 30, 50, 100, np.inf]:
    if df == np.inf:
        t_val = stats.norm.ppf(0.975)
        print(f"df = โˆž (์ •๊ทœ): t = {t_val:.4f}")
    else:
        t_val = stats.t.ppf(0.975, df)
        print(f"df = {df:3.0f}: t = {t_val:.4f}")

3. ๋ชจ๋น„์œจ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„

3.1 ์ •๊ทœ๊ทผ์‚ฌ๋ฅผ ์ด์šฉํ•œ ์‹ ๋ขฐ๊ตฌ๊ฐ„

def ci_proportion(successes, n, confidence=0.95):
    """๋ชจ๋น„์œจ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„ (์ •๊ทœ๊ทผ์‚ฌ)"""
    p_hat = successes / n
    se = np.sqrt(p_hat * (1 - p_hat) / n)

    z_critical = stats.norm.ppf((1 + confidence) / 2)
    margin = z_critical * se

    return p_hat - margin, p_hat + margin

# ์˜ˆ์‹œ: ์—ฌ๋ก ์กฐ์‚ฌ
# 1000๋ช… ์ค‘ 420๋ช…์ด ์ฐฌ์„ฑ
successes = 420
n = 1000

lower, upper = ci_proportion(successes, n, 0.95)

print("๋ชจ๋น„์œจ์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„ (์ •๊ทœ๊ทผ์‚ฌ):")
print(f"ํ‘œ๋ณธ๋น„์œจ: pฬ‚ = {successes/n:.3f}")
print(f"95% CI: [{lower:.3f}, {upper:.3f}]")
print(f"ํ•ด์„: ๋ชจ๋น„์œจ์€ {lower*100:.1f}%์—์„œ {upper*100:.1f}% ์‚ฌ์ด์ผ ๊ฒƒ์œผ๋กœ ์ถ”์ •")

# ์ •๊ทœ๊ทผ์‚ฌ ์กฐ๊ฑด ํ™•์ธ: np โ‰ฅ 10 and n(1-p) โ‰ฅ 10
p_hat = successes / n
print(f"\n์ •๊ทœ๊ทผ์‚ฌ ์กฐ๊ฑด ํ™•์ธ:")
print(f"np = {n * p_hat:.1f} โ‰ฅ 10? {n * p_hat >= 10}")
print(f"n(1-p) = {n * (1-p_hat):.1f} โ‰ฅ 10? {n * (1-p_hat) >= 10}")

3.2 Wilson ์‹ ๋ขฐ๊ตฌ๊ฐ„ (๊ฐœ์„ ๋œ ๋ฐฉ๋ฒ•)

def ci_proportion_wilson(successes, n, confidence=0.95):
    """Wilson ์‹ ๋ขฐ๊ตฌ๊ฐ„ (๋” ์ •ํ™•ํ•œ ๋ฐฉ๋ฒ•)"""
    p_hat = successes / n
    z = stats.norm.ppf((1 + confidence) / 2)

    denominator = 1 + z**2 / n
    center = (p_hat + z**2 / (2*n)) / denominator
    margin = (z / denominator) * np.sqrt(p_hat*(1-p_hat)/n + z**2/(4*n**2))

    return center - margin, center + margin

# ์ž‘์€ ํ‘œ๋ณธ์—์„œ ๋น„๊ต
successes = 8
n = 20
p_hat = successes / n

lower_normal, upper_normal = ci_proportion(successes, n, 0.95)
lower_wilson, upper_wilson = ci_proportion_wilson(successes, n, 0.95)

print(f"ํ‘œ๋ณธ๋น„์œจ: pฬ‚ = {p_hat:.3f} (n={n})")
print(f"\n์ •๊ทœ๊ทผ์‚ฌ CI: [{lower_normal:.3f}, {upper_normal:.3f}]")
print(f"Wilson CI:   [{lower_wilson:.3f}, {upper_wilson:.3f}]")

# scipy์˜ proportion_confint
from statsmodels.stats.proportion import proportion_confint

ci_wilson = proportion_confint(successes, n, alpha=0.05, method='wilson')
ci_normal = proportion_confint(successes, n, alpha=0.05, method='normal')

print(f"\nstatsmodels ๊ฒฐ๊ณผ:")
print(f"  Normal: [{ci_normal[0]:.3f}, {ci_normal[1]:.3f}]")
print(f"  Wilson: [{ci_wilson[0]:.3f}, {ci_wilson[1]:.3f}]")

3.3 ๋น„์œจ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„ ๋น„๊ต

from statsmodels.stats.proportion import proportion_confint

# ๋‹ค์–‘ํ•œ ๋ฐฉ๋ฒ• ๋น„๊ต
successes = 15
n = 50
p_hat = successes / n

methods = ['normal', 'wilson', 'jeffreys', 'agresti_coull', 'beta']

print(f"ํ‘œ๋ณธ๋น„์œจ = {p_hat:.3f} (successes={successes}, n={n})")
print("\n๋น„์œจ ์‹ ๋ขฐ๊ตฌ๊ฐ„ ๋ฐฉ๋ฒ• ๋น„๊ต:")
print("-" * 50)

for method in methods:
    lower, upper = proportion_confint(successes, n, alpha=0.05, method=method)
    width = upper - lower
    print(f"{method:<15}: [{lower:.4f}, {upper:.4f}], ํญ={width:.4f}")

# ์‹œ๊ฐํ™”
fig, ax = plt.subplots(figsize=(10, 6))

y_positions = range(len(methods))
for i, method in enumerate(methods):
    lower, upper = proportion_confint(successes, n, alpha=0.05, method=method)
    ax.barh(i, upper-lower, left=lower, height=0.6, alpha=0.7)
    ax.scatter([p_hat], [i], color='red', s=50, zorder=5)

ax.axvline(p_hat, color='red', linestyle='--', alpha=0.5, label='ํ‘œ๋ณธ๋น„์œจ')
ax.set_yticks(y_positions)
ax.set_yticklabels(methods)
ax.set_xlabel('๋น„์œจ')
ax.set_title('๋น„์œจ ์‹ ๋ขฐ๊ตฌ๊ฐ„ ๋ฐฉ๋ฒ• ๋น„๊ต')
ax.legend()
plt.tight_layout()
plt.show()

4. ๋ชจ๋ถ„์‚ฐ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„

4.1 ์นด์ด์ œ๊ณฑ ๋ถ„ํฌ๋ฅผ ์ด์šฉํ•œ ์‹ ๋ขฐ๊ตฌ๊ฐ„

def ci_variance(sample, confidence=0.95):
    """๋ชจ๋ถ„์‚ฐ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„ (์ •๊ทœ๋ชจ์ง‘๋‹จ ๊ฐ€์ •)"""
    n = len(sample)
    s_squared = np.var(sample, ddof=1)

    alpha = 1 - confidence
    chi2_lower = stats.chi2.ppf(alpha/2, df=n-1)
    chi2_upper = stats.chi2.ppf(1 - alpha/2, df=n-1)

    # ์‹ ๋ขฐ๊ตฌ๊ฐ„: ((n-1)sยฒ / ฯ‡ยฒ_upper, (n-1)sยฒ / ฯ‡ยฒ_lower)
    var_lower = (n - 1) * s_squared / chi2_upper
    var_upper = (n - 1) * s_squared / chi2_lower

    return var_lower, var_upper

def ci_std(sample, confidence=0.95):
    """๋ชจํ‘œ์ค€ํŽธ์ฐจ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„"""
    var_lower, var_upper = ci_variance(sample, confidence)
    return np.sqrt(var_lower), np.sqrt(var_upper)

# ์˜ˆ์‹œ: ํ’ˆ์งˆ๊ด€๋ฆฌ์—์„œ ๋ถ„์‚ฐ ์ถ”์ •
np.random.seed(42)
sample = np.random.normal(100, 5, 30)  # ์‹ค์ œ ฯƒ = 5

s_squared = np.var(sample, ddof=1)
s = np.std(sample, ddof=1)

var_lower, var_upper = ci_variance(sample, 0.95)
std_lower, std_upper = ci_std(sample, 0.95)

print("๋ชจ๋ถ„์‚ฐ์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„:")
print(f"ํ‘œ๋ณธ๋ถ„์‚ฐ: sยฒ = {s_squared:.3f}")
print(f"๋ถ„์‚ฐ CI: [{var_lower:.3f}, {var_upper:.3f}]")

print(f"\n๋ชจํ‘œ์ค€ํŽธ์ฐจ์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„:")
print(f"ํ‘œ๋ณธํ‘œ์ค€ํŽธ์ฐจ: s = {s:.3f}")
print(f"ํ‘œ์ค€ํŽธ์ฐจ CI: [{std_lower:.3f}, {std_upper:.3f}]")
print(f"์‹ค์ œ ฯƒ = 5๊ฐ€ ๊ตฌ๊ฐ„์— ํฌํ•จ๋˜๋Š”๊ฐ€? {std_lower <= 5 <= std_upper}")

4.2 ์นด์ด์ œ๊ณฑ ๋ถ„ํฌ์˜ ๋น„๋Œ€์นญ์„ฑ

# ๋ถ„์‚ฐ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„์ด ํ‰๊ท  ์ค‘์‹ฌ์ด ์•„๋‹Œ ์ด์œ 

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# ์นด์ด์ œ๊ณฑ ๋ถ„ํฌ
df = 20
x = np.linspace(0, 50, 200)
chi2_pdf = stats.chi2.pdf(x, df)

# ์ž„๊ณ„๊ฐ’
alpha = 0.05
chi2_lower = stats.chi2.ppf(alpha/2, df)
chi2_upper = stats.chi2.ppf(1 - alpha/2, df)

axes[0].plot(x, chi2_pdf, 'b-', lw=2)
axes[0].fill_between(x, chi2_pdf, where=(x >= chi2_lower) & (x <= chi2_upper),
                      alpha=0.3, color='blue')
axes[0].axvline(chi2_lower, color='r', linestyle='--', label=f'ฯ‡ยฒ_L = {chi2_lower:.2f}')
axes[0].axvline(chi2_upper, color='r', linestyle='--', label=f'ฯ‡ยฒ_U = {chi2_upper:.2f}')
axes[0].axvline(df, color='g', linestyle=':', label=f'E[ฯ‡ยฒ] = {df}')
axes[0].set_xlabel('ฯ‡ยฒ')
axes[0].set_ylabel('๋ฐ€๋„')
axes[0].set_title(f'์นด์ด์ œ๊ณฑ ๋ถ„ํฌ (df={df})')
axes[0].legend()

# ๋ถ„์‚ฐ ์‹ ๋ขฐ๊ตฌ๊ฐ„์˜ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
np.random.seed(42)
true_variance = 25  # ฯƒยฒ = 25
n = 21  # df = 20
n_simulations = 1000

var_estimates = []
ci_lowers = []
ci_uppers = []

for _ in range(n_simulations):
    sample = np.random.normal(0, np.sqrt(true_variance), n)
    s2 = np.var(sample, ddof=1)
    var_estimates.append(s2)

    lower = (n-1) * s2 / chi2_upper
    upper = (n-1) * s2 / chi2_lower
    ci_lowers.append(lower)
    ci_uppers.append(upper)

axes[1].hist(var_estimates, bins=50, density=True, alpha=0.7, label='ํ‘œ๋ณธ๋ถ„์‚ฐ ๋ถ„ํฌ')
axes[1].axvline(true_variance, color='r', linestyle='--', lw=2,
                label=f'ฯƒยฒ = {true_variance}')
axes[1].axvline(np.mean(var_estimates), color='g', linestyle=':',
                label=f'E[sยฒ] = {np.mean(var_estimates):.2f}')
axes[1].set_xlabel('ํ‘œ๋ณธ๋ถ„์‚ฐ')
axes[1].set_ylabel('๋ฐ€๋„')
axes[1].set_title('ํ‘œ๋ณธ๋ถ„์‚ฐ์˜ ๋ถ„ํฌ')
axes[1].legend()

plt.tight_layout()
plt.show()

# ์ปค๋ฒ„๋ฆฌ์ง€ ํ™•์ธ
coverage = np.mean([(l <= true_variance <= u)
                    for l, u in zip(ci_lowers, ci_uppers)])
print(f"95% ์‹ ๋ขฐ๊ตฌ๊ฐ„์˜ ์‹ค์ œ ์ปค๋ฒ„๋ฆฌ์ง€: {coverage*100:.1f}%")

5. ๋‘ ์ง‘๋‹จ ๋น„๊ต๋ฅผ ์œ„ํ•œ ์‹ ๋ขฐ๊ตฌ๊ฐ„

5.1 ๋‘ ๋ชจํ‰๊ท  ์ฐจ์ด์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„

def ci_two_means_independent(sample1, sample2, confidence=0.95, equal_var=True):
    """๋…๋ฆฝ ๋‘ ํ‘œ๋ณธ ํ‰๊ท  ์ฐจ์ด์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„"""
    n1, n2 = len(sample1), len(sample2)
    x1_bar, x2_bar = sample1.mean(), sample2.mean()
    s1, s2 = sample1.std(ddof=1), sample2.std(ddof=1)

    if equal_var:
        # ํ•ฉ๋™๋ถ„์‚ฐ (pooled variance)
        sp_squared = ((n1-1)*s1**2 + (n2-1)*s2**2) / (n1 + n2 - 2)
        se = np.sqrt(sp_squared * (1/n1 + 1/n2))
        df = n1 + n2 - 2
    else:
        # Welch's t-test (๋“ฑ๋ถ„์‚ฐ ๊ฐ€์ • X)
        se = np.sqrt(s1**2/n1 + s2**2/n2)
        # Welch-Satterthwaite ์ž์œ ๋„
        df = (s1**2/n1 + s2**2/n2)**2 / \
             ((s1**2/n1)**2/(n1-1) + (s2**2/n2)**2/(n2-1))

    t_critical = stats.t.ppf((1 + confidence) / 2, df)
    margin = t_critical * se
    diff = x1_bar - x2_bar

    return diff - margin, diff + margin, diff, df

# ์˜ˆ์‹œ: A/B ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ
np.random.seed(42)
group_A = np.random.normal(105, 15, 50)  # ์ฒ˜๋ฆฌ๊ตฐ
group_B = np.random.normal(100, 15, 50)  # ๋Œ€์กฐ๊ตฐ

# ๋“ฑ๋ถ„์‚ฐ ๊ฐ€์ •
lower_eq, upper_eq, diff, df = ci_two_means_independent(group_A, group_B,
                                                        equal_var=True)
print("๋‘ ๋ชจํ‰๊ท  ์ฐจ์ด์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„:")
print(f"๊ทธ๋ฃน A ํ‰๊ท : {group_A.mean():.2f}, ๊ทธ๋ฃน B ํ‰๊ท : {group_B.mean():.2f}")
print(f"์ฐจ์ด (A - B): {diff:.2f}")
print(f"๋“ฑ๋ถ„์‚ฐ ๊ฐ€์ • CI (df={df:.0f}): [{lower_eq:.2f}, {upper_eq:.2f}]")

# ๋“ฑ๋ถ„์‚ฐ ๊ฐ€์ • X (Welch)
lower_welch, upper_welch, diff, df_welch = ci_two_means_independent(group_A, group_B,
                                                                     equal_var=False)
print(f"Welch CI (df={df_welch:.1f}): [{lower_welch:.2f}, {upper_welch:.2f}]")

# scipy ํ™•์ธ
from scipy.stats import ttest_ind
t_stat, p_val = ttest_ind(group_A, group_B, equal_var=False)
print(f"\nscipy t-๊ฒ€์ • ํ†ต๊ณ„๋Ÿ‰: {t_stat:.3f}, p-value: {p_val:.4f}")

5.2 ๋Œ€์‘ํ‘œ๋ณธ ํ‰๊ท  ์ฐจ์ด์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„

def ci_paired_difference(before, after, confidence=0.95):
    """๋Œ€์‘ํ‘œ๋ณธ ํ‰๊ท  ์ฐจ์ด์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„"""
    diff = after - before
    n = len(diff)
    d_bar = diff.mean()
    s_d = diff.std(ddof=1)
    se = s_d / np.sqrt(n)

    t_critical = stats.t.ppf((1 + confidence) / 2, df=n-1)
    margin = t_critical * se

    return d_bar - margin, d_bar + margin, d_bar

# ์˜ˆ์‹œ: ๋‹ค์ด์–ดํŠธ ํ”„๋กœ๊ทธ๋žจ ์ „ํ›„ ์ฒด์ค‘
np.random.seed(42)
weight_before = np.random.normal(75, 10, 30)
weight_after = weight_before - np.random.normal(3, 2, 30)  # ํ‰๊ท  3kg ๊ฐ์†Œ

lower, upper, mean_diff = ci_paired_difference(weight_before, weight_after, 0.95)

print("๋Œ€์‘ํ‘œ๋ณธ ํ‰๊ท  ์ฐจ์ด์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„:")
print(f"์ „: {weight_before.mean():.2f} kg, ํ›„: {weight_after.mean():.2f} kg")
print(f"ํ‰๊ท  ๋ณ€ํ™”: {mean_diff:.2f} kg")
print(f"95% CI: [{lower:.2f}, {upper:.2f}] kg")

if upper < 0:
    print("โ†’ ์‹ ๋ขฐ๊ตฌ๊ฐ„์ด 0์„ ํฌํ•จํ•˜์ง€ ์•Š์œผ๋ฏ€๋กœ, ์ฒด์ค‘ ๊ฐ์†Œ๊ฐ€ ์œ ์˜ํ•จ")

5.3 ๋‘ ๋ชจ๋น„์œจ ์ฐจ์ด์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„

def ci_two_proportions(successes1, n1, successes2, n2, confidence=0.95):
    """๋‘ ๋ชจ๋น„์œจ ์ฐจ์ด์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„"""
    p1 = successes1 / n1
    p2 = successes2 / n2
    diff = p1 - p2

    se = np.sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2)
    z_critical = stats.norm.ppf((1 + confidence) / 2)
    margin = z_critical * se

    return diff - margin, diff + margin, diff

# ์˜ˆ์‹œ: ๋‘ ๊ด‘๊ณ ์˜ ํด๋ฆญ๋ฅ  ๋น„๊ต
# ๊ด‘๊ณ  A: 1000๋ช… ๋…ธ์ถœ, 150๋ช… ํด๋ฆญ
# ๊ด‘๊ณ  B: 1200๋ช… ๋…ธ์ถœ, 132๋ช… ํด๋ฆญ

lower, upper, diff = ci_two_proportions(150, 1000, 132, 1200, 0.95)

print("๋‘ ๋ชจ๋น„์œจ ์ฐจ์ด์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„:")
print(f"๊ด‘๊ณ  A ํด๋ฆญ๋ฅ : {150/1000:.3f}")
print(f"๊ด‘๊ณ  B ํด๋ฆญ๋ฅ : {132/1200:.3f}")
print(f"์ฐจ์ด: {diff:.4f} ({diff*100:.2f}%p)")
print(f"95% CI: [{lower:.4f}, {upper:.4f}]")

if lower > 0:
    print("โ†’ ๊ด‘๊ณ  A์˜ ํด๋ฆญ๋ฅ ์ด ์œ ์˜ํ•˜๊ฒŒ ๋†’์Œ")
elif upper < 0:
    print("โ†’ ๊ด‘๊ณ  B์˜ ํด๋ฆญ๋ฅ ์ด ์œ ์˜ํ•˜๊ฒŒ ๋†’์Œ")
else:
    print("โ†’ ๋‘ ๊ด‘๊ณ ์˜ ํด๋ฆญ๋ฅ ์— ์œ ์˜ํ•œ ์ฐจ์ด ์—†์Œ")

6. ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ์‹ ๋ขฐ๊ตฌ๊ฐ„

6.1 ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ๋ฐฉ๋ฒ• ์†Œ๊ฐœ

def bootstrap_ci(data, statistic_func, confidence=0.95, n_bootstrap=10000):
    """๋ถ€ํŠธ์ŠคํŠธ๋žฉ ์‹ ๋ขฐ๊ตฌ๊ฐ„ (๋ฐฑ๋ถ„์œ„์ˆ˜ ๋ฐฉ๋ฒ•)"""
    np.random.seed(42)
    n = len(data)
    bootstrap_statistics = []

    for _ in range(n_bootstrap):
        # ๋ณต์›์ถ”์ถœ๋กœ ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ํ‘œ๋ณธ ์ƒ์„ฑ
        bootstrap_sample = np.random.choice(data, size=n, replace=True)
        bootstrap_statistics.append(statistic_func(bootstrap_sample))

    # ๋ฐฑ๋ถ„์œ„์ˆ˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„
    alpha = 1 - confidence
    lower = np.percentile(bootstrap_statistics, 100 * alpha / 2)
    upper = np.percentile(bootstrap_statistics, 100 * (1 - alpha / 2))

    return lower, upper, np.array(bootstrap_statistics)

# ์˜ˆ์‹œ: ํ‰๊ท ์˜ ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ์‹ ๋ขฐ๊ตฌ๊ฐ„
np.random.seed(42)
sample = np.random.exponential(scale=5, size=50)  # ๋น„๋Œ€์นญ ๋ถ„ํฌ

lower_boot, upper_boot, boot_means = bootstrap_ci(sample, np.mean, 0.95)

# ๊ธฐ์กด t-๋ถ„ํฌ ๋ฐฉ๋ฒ•๊ณผ ๋น„๊ต
lower_t, upper_t, _ = ci_mean_t(sample, 0.95)

print("ํ‰๊ท ์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„ ๋น„๊ต:")
print(f"ํ‘œ๋ณธํ‰๊ท : {sample.mean():.3f}")
print(f"t-๋ถ„ํฌ:   [{lower_t:.3f}, {upper_t:.3f}]")
print(f"๋ถ€ํŠธ์ŠคํŠธ๋žฉ: [{lower_boot:.3f}, {upper_boot:.3f}]")

# ์‹œ๊ฐํ™”
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# ์›๋ณธ ๋ฐ์ดํ„ฐ
axes[0].hist(sample, bins=20, density=True, alpha=0.7, edgecolor='black')
axes[0].axvline(sample.mean(), color='r', linestyle='--', linewidth=2, label='ํ‘œ๋ณธํ‰๊ท ')
axes[0].set_xlabel('๊ฐ’')
axes[0].set_ylabel('๋ฐ€๋„')
axes[0].set_title('์›๋ณธ ๋ฐ์ดํ„ฐ (์ง€์ˆ˜๋ถ„ํฌ)')
axes[0].legend()

# ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ๋ถ„ํฌ
axes[1].hist(boot_means, bins=50, density=True, alpha=0.7, edgecolor='black')
axes[1].axvline(lower_boot, color='r', linestyle='--', label=f'95% CI: [{lower_boot:.2f}, {upper_boot:.2f}]')
axes[1].axvline(upper_boot, color='r', linestyle='--')
axes[1].axvline(sample.mean(), color='g', linestyle='-', linewidth=2, label='ํ‘œ๋ณธํ‰๊ท ')
axes[1].set_xlabel('๋ถ€ํŠธ์ŠคํŠธ๋žฉ ํ‘œ๋ณธํ‰๊ท ')
axes[1].set_ylabel('๋ฐ€๋„')
axes[1].set_title('๋ถ€ํŠธ์ŠคํŠธ๋žฉ ๋ถ„ํฌ (10,000ํšŒ)')
axes[1].legend()

plt.tight_layout()
plt.show()

6.2 ๋‹ค์–‘ํ•œ ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ๋ฐฉ๋ฒ•

def bootstrap_ci_bca(data, statistic_func, confidence=0.95, n_bootstrap=10000):
    """BCa (Bias-Corrected and Accelerated) ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ์‹ ๋ขฐ๊ตฌ๊ฐ„"""
    from scipy.stats import norm

    np.random.seed(42)
    n = len(data)
    original_stat = statistic_func(data)

    # ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ํ†ต๊ณ„๋Ÿ‰
    boot_stats = []
    for _ in range(n_bootstrap):
        boot_sample = np.random.choice(data, size=n, replace=True)
        boot_stats.append(statistic_func(boot_sample))
    boot_stats = np.array(boot_stats)

    # ํŽธํ–ฅ ์ˆ˜์ • (bias correction)
    z0 = norm.ppf(np.mean(boot_stats < original_stat))

    # ๊ฐ€์† ๊ณ„์ˆ˜ (acceleration) - jackknife ์‚ฌ์šฉ
    jackknife_stats = []
    for i in range(n):
        jack_sample = np.delete(data, i)
        jackknife_stats.append(statistic_func(jack_sample))
    jackknife_stats = np.array(jackknife_stats)
    jack_mean = jackknife_stats.mean()
    a = np.sum((jack_mean - jackknife_stats)**3) / \
        (6 * (np.sum((jack_mean - jackknife_stats)**2))**1.5 + 1e-10)

    # BCa ๋ฐฑ๋ถ„์œ„์ˆ˜ ๊ณ„์‚ฐ
    alpha = 1 - confidence
    z_alpha_lower = norm.ppf(alpha / 2)
    z_alpha_upper = norm.ppf(1 - alpha / 2)

    alpha_lower = norm.cdf(z0 + (z0 + z_alpha_lower) / (1 - a * (z0 + z_alpha_lower)))
    alpha_upper = norm.cdf(z0 + (z0 + z_alpha_upper) / (1 - a * (z0 + z_alpha_upper)))

    lower = np.percentile(boot_stats, 100 * alpha_lower)
    upper = np.percentile(boot_stats, 100 * alpha_upper)

    return lower, upper

# ๋น„๊ต
np.random.seed(42)
sample = np.random.exponential(scale=5, size=30)

# ๋ฐฑ๋ถ„์œ„์ˆ˜ ๋ฐฉ๋ฒ•
lower_pct, upper_pct, _ = bootstrap_ci(sample, np.mean, 0.95)

# BCa ๋ฐฉ๋ฒ•
lower_bca, upper_bca = bootstrap_ci_bca(sample, np.mean, 0.95)

print("๋ถ€ํŠธ์ŠคํŠธ๋žฉ ๋ฐฉ๋ฒ• ๋น„๊ต:")
print(f"ํ‘œ๋ณธํ‰๊ท : {sample.mean():.3f}")
print(f"๋ฐฑ๋ถ„์œ„์ˆ˜:  [{lower_pct:.3f}, {upper_pct:.3f}]")
print(f"BCa:       [{lower_bca:.3f}, {upper_bca:.3f}]")

6.3 ์ค‘์•™๊ฐ’์˜ ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ์‹ ๋ขฐ๊ตฌ๊ฐ„

# ์ค‘์•™๊ฐ’์ฒ˜๋Ÿผ ๋ถ„ํฌ๋ฅผ ๋ชจ๋ฅด๋Š” ํ†ต๊ณ„๋Ÿ‰์— ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ํ™œ์šฉ

np.random.seed(42)
sample = np.random.lognormal(mean=3, sigma=0.5, size=100)

lower_median, upper_median, boot_medians = bootstrap_ci(sample, np.median, 0.95)

print("์ค‘์•™๊ฐ’์˜ 95% ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ์‹ ๋ขฐ๊ตฌ๊ฐ„:")
print(f"ํ‘œ๋ณธ์ค‘์•™๊ฐ’: {np.median(sample):.3f}")
print(f"95% CI: [{lower_median:.3f}, {upper_median:.3f}]")

# ์ƒ๊ด€๊ณ„์ˆ˜์˜ ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ์‹ ๋ขฐ๊ตฌ๊ฐ„
np.random.seed(42)
x = np.random.normal(0, 1, 50)
y = 0.7 * x + np.random.normal(0, 0.5, 50)
data_combined = np.column_stack([x, y])

def correlation(data):
    return np.corrcoef(data[:, 0], data[:, 1])[0, 1]

lower_corr, upper_corr, boot_corrs = bootstrap_ci(data_combined, correlation, 0.95)

print(f"\n์ƒ๊ด€๊ณ„์ˆ˜์˜ 95% ๋ถ€ํŠธ์ŠคํŠธ๋žฉ ์‹ ๋ขฐ๊ตฌ๊ฐ„:")
print(f"ํ‘œ๋ณธ์ƒ๊ด€๊ณ„์ˆ˜: {np.corrcoef(x, y)[0, 1]:.3f}")
print(f"95% CI: [{lower_corr:.3f}, {upper_corr:.3f}]")

7. scipy๋ฅผ ์ด์šฉํ•œ ์‹ ๋ขฐ๊ตฌ๊ฐ„ ๊ณ„์‚ฐ

7.1 ๊ธฐ๋ณธ ์‹ ๋ขฐ๊ตฌ๊ฐ„ ํ•จ์ˆ˜

# scipy.stats์˜ interval ๋ฉ”์„œ๋“œ ํ™œ์šฉ

# ์ •๊ทœ๋ถ„ํฌ์—์„œ ๋ชจํ‰๊ท  ์‹ ๋ขฐ๊ตฌ๊ฐ„
np.random.seed(42)
sample = np.random.normal(100, 15, 50)

# t-๋ถ„ํฌ ์ด์šฉ
mean = sample.mean()
sem = stats.sem(sample)  # ํ‘œ์ค€์˜ค์ฐจ

ci_95 = stats.t.interval(confidence=0.95, df=len(sample)-1, loc=mean, scale=sem)
ci_99 = stats.t.interval(confidence=0.99, df=len(sample)-1, loc=mean, scale=sem)

print("scipy.stats.t.interval ์‚ฌ์šฉ:")
print(f"ํ‘œ๋ณธํ‰๊ท : {mean:.2f}")
print(f"95% CI: [{ci_95[0]:.2f}, {ci_95[1]:.2f}]")
print(f"99% CI: [{ci_99[0]:.2f}, {ci_99[1]:.2f}]")

# bootstrap ๋ฉ”์„œ๋“œ (scipy >= 1.9)
from scipy.stats import bootstrap

np.random.seed(42)
sample_tuple = (sample,)  # tuple๋กœ ์ „๋‹ฌํ•ด์•ผ ํ•จ
result = bootstrap(sample_tuple, np.mean, confidence_level=0.95, n_resamples=9999)

print(f"\nscipy.stats.bootstrap ์‚ฌ์šฉ:")
print(f"95% CI: [{result.confidence_interval.low:.2f}, {result.confidence_interval.high:.2f}]")

7.2 statsmodels ํ™œ์šฉ

import statsmodels.api as sm
from statsmodels.stats.weightstats import DescrStatsW

# DescrStatsW๋กœ ์‹ ๋ขฐ๊ตฌ๊ฐ„ ๊ณ„์‚ฐ
np.random.seed(42)
sample = np.random.normal(100, 15, 50)

d = DescrStatsW(sample)

print("statsmodels DescrStatsW ์‚ฌ์šฉ:")
print(f"ํ‘œ๋ณธํ‰๊ท : {d.mean:.2f}")
print(f"95% CI: {d.tconfint_mean(alpha=0.05)}")
print(f"99% CI: {d.tconfint_mean(alpha=0.01)}")

# ๋‘ ํ‘œ๋ณธ ๋น„๊ต
group1 = np.random.normal(100, 15, 40)
group2 = np.random.normal(105, 15, 45)

from statsmodels.stats.weightstats import CompareMeans

cm = CompareMeans(DescrStatsW(group1), DescrStatsW(group2))
print(f"\n๋‘ ํ‰๊ท  ์ฐจ์ด์˜ 95% CI: {cm.tconfint_diff(alpha=0.05)}")

์—ฐ์Šต ๋ฌธ์ œ

๋ฌธ์ œ 1: t-์‹ ๋ขฐ๊ตฌ๊ฐ„

๋‹ค์Œ ํ‘œ๋ณธ์—์„œ ๋ชจํ‰๊ท ์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„์„ ๊ตฌํ•˜์‹œ์˜ค.

sample = [23, 25, 28, 22, 26, 24, 27, 25, 29, 24]

๋ฌธ์ œ 2: ๋น„์œจ์˜ ์‹ ๋ขฐ๊ตฌ๊ฐ„

500๋ช…์„ ๋Œ€์ƒ์œผ๋กœ ์„ค๋ฌธ์กฐ์‚ฌํ•œ ๊ฒฐ๊ณผ 230๋ช…์ด ์ฐฌ์„ฑํ–ˆ์Šต๋‹ˆ๋‹ค. - (a) ๋ชจ๋น„์œจ์˜ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„์„ ๊ตฌํ•˜์‹œ์˜ค (์ •๊ทœ๊ทผ์‚ฌ) - (b) Wilson ๋ฐฉ๋ฒ•์œผ๋กœ ๋‹ค์‹œ ๊ณ„์‚ฐํ•˜์‹œ์˜ค - (c) ํ‘œ๋ณธ ํฌ๊ธฐ๋ฅผ 2000๋ช…์œผ๋กœ ๋Š˜๋ฆฌ๋ฉด ๊ตฌ๊ฐ„ ํญ์ด ์–ด๋–ป๊ฒŒ ๋ณ€ํ•˜๋Š”๊ฐ€?

๋ฌธ์ œ 3: ๋ถ€ํŠธ์ŠคํŠธ๋žฉ

๋‹ค์Œ ๋น„๋Œ€์นญ ๋ถ„ํฌ์—์„œ ํ‰๊ท ๊ณผ ์ค‘์•™๊ฐ’์˜ ๋ถ€ํŠธ์ŠคํŠธ๋žฉ 95% ์‹ ๋ขฐ๊ตฌ๊ฐ„์„ ๊ฐ๊ฐ ๊ตฌํ•˜์‹œ์˜ค.

np.random.seed(42)
data = np.random.exponential(10, 40)

์ •๋ฆฌ

์‹ ๋ขฐ๊ตฌ๊ฐ„ ์œ ํ˜• ์กฐ๊ฑด ๊ณต์‹ Python
ํ‰๊ท  (ฯƒ ๊ธฐ์ง€) Z-๊ตฌ๊ฐ„ xฬ„ ยฑ zยทฯƒ/โˆšn stats.norm.interval()
ํ‰๊ท  (ฯƒ ๋ฏธ์ง€) t-๊ตฌ๊ฐ„ xฬ„ ยฑ tยทs/โˆšn stats.t.interval()
๋น„์œจ np โ‰ฅ 10 pฬ‚ ยฑ zยทโˆš(pฬ‚(1-pฬ‚)/n) proportion_confint()
๋ถ„์‚ฐ ์ •๊ทœ๋ชจ์ง‘๋‹จ ฯ‡ยฒ ๋ถ„ํฌ ์ด์šฉ ์ง์ ‘ ๊ณ„์‚ฐ
๋ถ€ํŠธ์ŠคํŠธ๋žฉ ๋น„๋ชจ์ˆ˜์  ์žฌํ‘œ๋ณธ์ถ”์ถœ stats.bootstrap()
to navigate between lessons