์„ ํ˜•ํšŒ๊ท€ (Linear Regression)

์„ ํ˜•ํšŒ๊ท€ (Linear Regression)

๊ฐœ์š”

์„ ํ˜•ํšŒ๊ท€๋Š” ์—ฐ์†์ ์ธ ๊ฐ’์„ ์˜ˆ์ธกํ•˜๋Š” ๊ฐ€์žฅ ๊ธฐ๋ณธ์ ์ธ ํšŒ๊ท€ ์•Œ๊ณ ๋ฆฌ์ฆ˜์ž…๋‹ˆ๋‹ค. ์ž…๋ ฅ ๋ณ€์ˆ˜์™€ ์ถœ๋ ฅ ๋ณ€์ˆ˜ ๊ฐ„์˜ ์„ ํ˜• ๊ด€๊ณ„๋ฅผ ๋ชจ๋ธ๋งํ•ฉ๋‹ˆ๋‹ค.


1. ๋‹จ์ˆœ ์„ ํ˜•ํšŒ๊ท€

1.1 ๊ฐœ๋…

ํ•˜๋‚˜์˜ ๋…๋ฆฝ๋ณ€์ˆ˜(X)๋กœ ์ข…์†๋ณ€์ˆ˜(y)๋ฅผ ์˜ˆ์ธกํ•ฉ๋‹ˆ๋‹ค.

y = ฮฒโ‚€ + ฮฒโ‚x + ฮต

- ฮฒโ‚€: ์ ˆํŽธ (intercept)
- ฮฒโ‚: ๊ธฐ์šธ๊ธฐ (slope)
- ฮต: ์˜ค์ฐจํ•ญ

1.2 ๊ตฌํ˜„

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# ๋ฐ์ดํ„ฐ ์ƒ์„ฑ
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)  # y = 4 + 3x + noise

# ๋ชจ๋ธ ํ•™์Šต
model = LinearRegression()
model.fit(X, y)

# ๊ณ„์ˆ˜ ํ™•์ธ
print(f"์ ˆํŽธ (ฮฒโ‚€): {model.intercept_[0]:.4f}")
print(f"๊ธฐ์šธ๊ธฐ (ฮฒโ‚): {model.coef_[0][0]:.4f}")

# ์˜ˆ์ธก
X_new = np.array([[0], [2]])
y_pred = model.predict(X_new)
print(f"\n์˜ˆ์ธก๊ฐ’: X=0 โ†’ y={y_pred[0][0]:.2f}, X=2 โ†’ y={y_pred[1][0]:.2f}")

# ์‹œ๊ฐํ™”
plt.figure(figsize=(10, 6))
plt.scatter(X, y, alpha=0.7, label='๋ฐ์ดํ„ฐ')
plt.plot(X_new, y_pred, 'r-', linewidth=2, label='ํšŒ๊ท€์„ ')
plt.xlabel('X')
plt.ylabel('y')
plt.title('๋‹จ์ˆœ ์„ ํ˜•ํšŒ๊ท€')
plt.legend()
plt.show()

1.3 ์ตœ์†Œ์ž์Šน๋ฒ• (OLS)

# ์ตœ์†Œ์ž์Šน๋ฒ•: ์ž”์ฐจ ์ œ๊ณฑํ•ฉ(RSS)์„ ์ตœ์†Œํ™”
# RSS = ฮฃ(yแตข - ลทแตข)ยฒ

# ์ˆ˜ํ•™์  ํ•ด
X_b = np.c_[np.ones((100, 1)), X]  # bias ์ถ”๊ฐ€
theta_best = np.linalg.inv(X_b.T @ X_b) @ X_b.T @ y

print(f"์ˆ˜ํ•™์  ํ•ด:")
print(f"ฮธโ‚€ = {theta_best[0][0]:.4f}")
print(f"ฮธโ‚ = {theta_best[1][0]:.4f}")

2. ๋‹ค์ค‘ ์„ ํ˜•ํšŒ๊ท€

2.1 ๊ฐœ๋…

์—ฌ๋Ÿฌ ๊ฐœ์˜ ๋…๋ฆฝ๋ณ€์ˆ˜๋กœ ์ข…์†๋ณ€์ˆ˜๋ฅผ ์˜ˆ์ธกํ•ฉ๋‹ˆ๋‹ค.

y = ฮฒโ‚€ + ฮฒโ‚xโ‚ + ฮฒโ‚‚xโ‚‚ + ... + ฮฒโ‚™xโ‚™ + ฮต

2.2 ๊ตฌํ˜„

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ๋‹น๋‡จ๋ณ‘ ๋ฐ์ดํ„ฐ์…‹
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
print(f"ํŠน์„ฑ: {diabetes.feature_names}")
print(f"๋ฐ์ดํ„ฐ ํ˜•ํƒœ: {X.shape}")

# ๋ฐ์ดํ„ฐ ๋ถ„ํ• 
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ์Šค์ผ€์ผ๋ง
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ๋ชจ๋ธ ํ•™์Šต
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# ์˜ˆ์ธก ๋ฐ ํ‰๊ฐ€
y_pred = model.predict(X_test_scaled)

print(f"\nMSE: {mean_squared_error(y_test, y_pred):.2f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
print(f"Rยฒ Score: {r2_score(y_test, y_pred):.4f}")

# ๊ณ„์ˆ˜ ํ™•์ธ
coefficients = pd.DataFrame({
    'feature': diabetes.feature_names,
    'coefficient': model.coef_
}).sort_values('coefficient', key=abs, ascending=False)
print(f"\nํšŒ๊ท€ ๊ณ„์ˆ˜:")
print(coefficients)

3. ๊ฒฝ์‚ฌํ•˜๊ฐ•๋ฒ• (Gradient Descent)

3.1 ๋ฐฐ์น˜ ๊ฒฝ์‚ฌํ•˜๊ฐ•๋ฒ•

# ๋น„์šฉ ํ•จ์ˆ˜: J(ฮธ) = (1/2m) ฮฃ(h(xแตข) - yแตข)ยฒ
# ์—…๋ฐ์ดํŠธ: ฮธ = ฮธ - ฮฑ * โˆ‡J(ฮธ)

def batch_gradient_descent(X, y, learning_rate=0.01, n_iterations=1000):
    m = len(y)
    X_b = np.c_[np.ones((m, 1)), X]  # bias ์ถ”๊ฐ€
    theta = np.random.randn(2, 1)  # ๋žœ๋ค ์ดˆ๊ธฐํ™”

    cost_history = []

    for iteration in range(n_iterations):
        gradients = (1/m) * X_b.T @ (X_b @ theta - y)
        theta = theta - learning_rate * gradients

        cost = (1/(2*m)) * np.sum((X_b @ theta - y)**2)
        cost_history.append(cost)

    return theta, cost_history

# ์‹คํ–‰
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

theta, cost_history = batch_gradient_descent(X, y, learning_rate=0.1, n_iterations=1000)

print(f"ฮธโ‚€ = {theta[0][0]:.4f}")
print(f"ฮธโ‚ = {theta[1][0]:.4f}")

# ๋น„์šฉ ํ•จ์ˆ˜ ์ˆ˜๋ ด ์‹œ๊ฐํ™”
plt.figure(figsize=(10, 4))
plt.plot(cost_history[:100])
plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.title('๊ฒฝ์‚ฌํ•˜๊ฐ•๋ฒ• ์ˆ˜๋ ด')
plt.show()

3.2 ํ™•๋ฅ ์  ๊ฒฝ์‚ฌํ•˜๊ฐ•๋ฒ• (SGD)

from sklearn.linear_model import SGDRegressor

# ๋ฐ์ดํ„ฐ ์ค€๋น„
X_train, X_test, y_train, y_test = train_test_split(X, y.ravel(), test_size=0.2)

# ์Šค์ผ€์ผ๋ง (SGD๋Š” ์Šค์ผ€์ผ๋ง ํ•„์ˆ˜)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SGD ํšŒ๊ท€
sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None,
                       eta0=0.01, random_state=42)
sgd_reg.fit(X_train_scaled, y_train)

print(f"SGD ์ ˆํŽธ: {sgd_reg.intercept_[0]:.4f}")
print(f"SGD ๊ณ„์ˆ˜: {sgd_reg.coef_[0]:.4f}")

3.3 ๋ฏธ๋‹ˆ๋ฐฐ์น˜ ๊ฒฝ์‚ฌํ•˜๊ฐ•๋ฒ•

def mini_batch_gradient_descent(X, y, batch_size=20, learning_rate=0.01, n_epochs=50):
    m = len(y)
    X_b = np.c_[np.ones((m, 1)), X]
    theta = np.random.randn(2, 1)

    for epoch in range(n_epochs):
        shuffled_indices = np.random.permutation(m)
        X_b_shuffled = X_b[shuffled_indices]
        y_shuffled = y[shuffled_indices]

        for i in range(0, m, batch_size):
            xi = X_b_shuffled[i:i+batch_size]
            yi = y_shuffled[i:i+batch_size]
            gradients = (1/len(yi)) * xi.T @ (xi @ theta - yi)
            theta = theta - learning_rate * gradients

    return theta

theta = mini_batch_gradient_descent(X, y)
print(f"๋ฏธ๋‹ˆ๋ฐฐ์น˜ GD ๊ฒฐ๊ณผ: ฮธโ‚€={theta[0][0]:.4f}, ฮธโ‚={theta[1][0]:.4f}")

4. ์ •๊ทœํ™” (Regularization)

๊ณผ์ ํ•ฉ์„ ๋ฐฉ์ง€ํ•˜๊ธฐ ์œ„ํ•ด ๋ชจ๋ธ์˜ ๋ณต์žก๋„์— ํŒจ๋„ํ‹ฐ๋ฅผ ๋ถ€์—ฌํ•ฉ๋‹ˆ๋‹ค.

4.1 Ridge ํšŒ๊ท€ (L2 ์ •๊ทœํ™”)

from sklearn.linear_model import Ridge

# ๋น„์šฉ ํ•จ์ˆ˜: J(ฮธ) = MSE + ฮฑ * ฮฃฮธแตขยฒ

# ๋‹ค์–‘ํ•œ alpha ๊ฐ’์œผ๋กœ ์‹คํ—˜
alphas = [0, 0.1, 1, 10, 100]

plt.figure(figsize=(12, 4))
for alpha in alphas:
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train_scaled, y_train)
    y_pred = ridge.predict(X_test_scaled)
    print(f"Alpha={alpha}: Rยฒ={r2_score(y_test, y_pred):.4f}, ๊ณ„์ˆ˜ํ•ฉ={sum(abs(ridge.coef_)):.4f}")

4.2 Lasso ํšŒ๊ท€ (L1 ์ •๊ทœํ™”)

from sklearn.linear_model import Lasso

# ๋น„์šฉ ํ•จ์ˆ˜: J(ฮธ) = MSE + ฮฑ * ฮฃ|ฮธแตข|
# ํŠน์ง•: ์ผ๋ถ€ ๊ณ„์ˆ˜๋ฅผ 0์œผ๋กœ ๋งŒ๋“ฆ (ํŠน์„ฑ ์„ ํƒ)

lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)

# 0์ด ์•„๋‹Œ ๊ณ„์ˆ˜ ํ™•์ธ
non_zero = np.sum(lasso.coef_ != 0)
print(f"0์ด ์•„๋‹Œ ๊ณ„์ˆ˜ ์ˆ˜: {non_zero}/{len(lasso.coef_)}")

y_pred = lasso.predict(X_test_scaled)
print(f"Lasso Rยฒ: {r2_score(y_test, y_pred):.4f}")

4.3 Elastic Net

from sklearn.linear_model import ElasticNet

# L1๊ณผ L2๋ฅผ ํ˜ผํ•ฉ
# ๋น„์šฉ ํ•จ์ˆ˜: J(ฮธ) = MSE + r*ฮฑ*ฮฃ|ฮธแตข| + (1-r)*ฮฑ*ฮฃฮธแตขยฒ/2

elastic = ElasticNet(alpha=0.1, l1_ratio=0.5)  # l1_ratio = r
elastic.fit(X_train_scaled, y_train)

y_pred = elastic.predict(X_test_scaled)
print(f"Elastic Net Rยฒ: {r2_score(y_test, y_pred):.4f}")

4.4 ์ •๊ทœํ™” ๋น„๊ต

from sklearn.datasets import make_regression

# ๋ฐ์ดํ„ฐ ์ƒ์„ฑ (ํŠน์„ฑ > ์ƒ˜ํ”Œ)
X, y = make_regression(n_samples=50, n_features=100, noise=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# ๋ชจ๋ธ ๋น„๊ต
models = {
    'Linear': LinearRegression(),
    'Ridge': Ridge(alpha=1),
    'Lasso': Lasso(alpha=0.1),
    'ElasticNet': ElasticNet(alpha=0.1, l1_ratio=0.5)
}

print("์ •๊ทœํ™” ๋ฐฉ๋ฒ• ๋น„๊ต:")
for name, model in models.items():
    model.fit(X_train, y_train)
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    non_zero = np.sum(model.coef_ != 0) if hasattr(model, 'coef_') else len(model.coef_)
    print(f"{name:12}: Train Rยฒ={train_score:.3f}, Test Rยฒ={test_score:.3f}, ๋น„์˜ ๊ณ„์ˆ˜={non_zero}")

5. ๋‹คํ•ญ ํšŒ๊ท€

๋น„์„ ํ˜• ๊ด€๊ณ„๋ฅผ ์„ ํ˜•ํšŒ๊ท€๋กœ ๋ชจ๋ธ๋งํ•ฉ๋‹ˆ๋‹ค.

from sklearn.preprocessing import PolynomialFeatures

# ๋น„์„ ํ˜• ๋ฐ์ดํ„ฐ ์ƒ์„ฑ
np.random.seed(42)
X = 6 * np.random.rand(100, 1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(100, 1)

# ๋‹คํ•ญ ํŠน์„ฑ ์ƒ์„ฑ
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)
print(f"์›๋ณธ ํŠน์„ฑ: {X.shape}")
print(f"๋‹คํ•ญ ํŠน์„ฑ: {X_poly.shape}")
print(f"ํŠน์„ฑ ์ด๋ฆ„: {poly.get_feature_names_out()}")

# ์„ ํ˜•ํšŒ๊ท€ ์ ์šฉ
model = LinearRegression()
model.fit(X_poly, y)

print(f"\n๊ณ„์ˆ˜: {model.coef_}")
print(f"์ ˆํŽธ: {model.intercept_}")

# ์‹œ๊ฐํ™”
X_plot = np.linspace(-3, 3, 100).reshape(-1, 1)
X_plot_poly = poly.transform(X_plot)
y_plot = model.predict(X_plot_poly)

plt.figure(figsize=(10, 6))
plt.scatter(X, y, alpha=0.7)
plt.plot(X_plot, y_plot, 'r-', linewidth=2)
plt.xlabel('X')
plt.ylabel('y')
plt.title('๋‹คํ•ญ ํšŒ๊ท€ (degree=2)')
plt.show()

6. ํšŒ๊ท€ ํ‰๊ฐ€ ์ง€ํ‘œ

from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    r2_score,
    mean_absolute_percentage_error
)

# ์˜ˆ์ธก
y_true = np.array([3, -0.5, 2, 7])
y_pred = np.array([2.5, 0.0, 2, 8])

# MAE (Mean Absolute Error)
mae = mean_absolute_error(y_true, y_pred)
print(f"MAE: {mae:.4f}")

# MSE (Mean Squared Error)
mse = mean_squared_error(y_true, y_pred)
print(f"MSE: {mse:.4f}")

# RMSE (Root Mean Squared Error)
rmse = np.sqrt(mse)
print(f"RMSE: {rmse:.4f}")

# Rยฒ (๊ฒฐ์ •๊ณ„์ˆ˜)
r2 = r2_score(y_true, y_pred)
print(f"Rยฒ: {r2:.4f}")

# MAPE (Mean Absolute Percentage Error)
mape = mean_absolute_percentage_error(y_true, y_pred)
print(f"MAPE: {mape:.4f}")

์—ฐ์Šต ๋ฌธ์ œ

๋ฌธ์ œ 1: ๋‹จ์ˆœ ์„ ํ˜•ํšŒ๊ท€

๋‹ค์Œ ๋ฐ์ดํ„ฐ๋กœ ์„ ํ˜•ํšŒ๊ท€ ๋ชจ๋ธ์„ ํ•™์Šตํ•˜๊ณ  X=7์ผ ๋•Œ ์˜ˆ์ธก๊ฐ’์„ ๊ตฌํ•˜์„ธ์š”.

X = np.array([[1], [2], [3], [4], [5], [6]])
y = np.array([2, 4, 5, 4, 5, 7])

# ํ’€์ด
model = LinearRegression()
model.fit(X, y)
prediction = model.predict([[7]])
print(f"X=7์ผ ๋•Œ ์˜ˆ์ธก๊ฐ’: {prediction[0]:.2f}")
print(f"Rยฒ: {model.score(X, y):.4f}")

๋ฌธ์ œ 2: Ridge vs Lasso

๋‹น๋‡จ๋ณ‘ ๋ฐ์ดํ„ฐ์—์„œ Ridge์™€ Lasso์˜ ์„ฑ๋Šฅ์„ ๋น„๊ตํ•˜์„ธ์š”.

from sklearn.datasets import load_diabetes

diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(
    diabetes.data, diabetes.target, test_size=0.2, random_state=42
)

# ํ’€์ด
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

for Model, name in [(Ridge, 'Ridge'), (Lasso, 'Lasso')]:
    model = Model(alpha=1)
    model.fit(X_train_s, y_train)
    print(f"{name} Rยฒ: {model.score(X_test_s, y_test):.4f}")

์š”์•ฝ

๋ฐฉ๋ฒ• ํŠน์ง• ์‚ฌ์šฉ ์‹œ์ 
์„ ํ˜•ํšŒ๊ท€ ๊ธฐ๋ณธ, ํ•ด์„ ์šฉ์ด ๊ธฐ์ค€ ๋ชจ๋ธ
Ridge (L2) ๊ณ„์ˆ˜ ์ถ•์†Œ, ๊ณผ์ ํ•ฉ ๋ฐฉ์ง€ ๋‹ค์ค‘๊ณต์„ ์„ฑ
Lasso (L1) ํŠน์„ฑ ์„ ํƒ, ํฌ์†Œ ๋ชจ๋ธ ๋งŽ์€ ํŠน์„ฑ
Elastic Net L1+L2 ํ˜ผํ•ฉ ์ƒ๊ด€๋œ ํŠน์„ฑ
๋‹คํ•ญ ํšŒ๊ท€ ๋น„์„ ํ˜• ๊ด€๊ณ„ ๊ณก์„  ํŒจํ„ด
to navigate between lessons