์ ํํ๊ท (Linear Regression)
์ ํํ๊ท (Linear Regression)¶
๊ฐ์¶
์ ํํ๊ท๋ ์ฐ์์ ์ธ ๊ฐ์ ์์ธกํ๋ ๊ฐ์ฅ ๊ธฐ๋ณธ์ ์ธ ํ๊ท ์๊ณ ๋ฆฌ์ฆ์ ๋๋ค. ์ ๋ ฅ ๋ณ์์ ์ถ๋ ฅ ๋ณ์ ๊ฐ์ ์ ํ ๊ด๊ณ๋ฅผ ๋ชจ๋ธ๋งํฉ๋๋ค.
1. ๋จ์ ์ ํํ๊ท¶
1.1 ๊ฐ๋ ¶
ํ๋์ ๋ ๋ฆฝ๋ณ์(X)๋ก ์ข ์๋ณ์(y)๋ฅผ ์์ธกํฉ๋๋ค.
y = ฮฒโ + ฮฒโx + ฮต
- ฮฒโ: ์ ํธ (intercept)
- ฮฒโ: ๊ธฐ์ธ๊ธฐ (slope)
- ฮต: ์ค์ฐจํญ
1.2 ๊ตฌํ¶
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# ๋ฐ์ดํฐ ์์ฑ
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1) # y = 4 + 3x + noise
# ๋ชจ๋ธ ํ์ต
model = LinearRegression()
model.fit(X, y)
# ๊ณ์ ํ์ธ
print(f"์ ํธ (ฮฒโ): {model.intercept_[0]:.4f}")
print(f"๊ธฐ์ธ๊ธฐ (ฮฒโ): {model.coef_[0][0]:.4f}")
# ์์ธก
X_new = np.array([[0], [2]])
y_pred = model.predict(X_new)
print(f"\n์์ธก๊ฐ: X=0 โ y={y_pred[0][0]:.2f}, X=2 โ y={y_pred[1][0]:.2f}")
# ์๊ฐํ
plt.figure(figsize=(10, 6))
plt.scatter(X, y, alpha=0.7, label='๋ฐ์ดํฐ')
plt.plot(X_new, y_pred, 'r-', linewidth=2, label='ํ๊ท์ ')
plt.xlabel('X')
plt.ylabel('y')
plt.title('๋จ์ ์ ํํ๊ท')
plt.legend()
plt.show()
1.3 ์ต์์์น๋ฒ (OLS)¶
# ์ต์์์น๋ฒ: ์์ฐจ ์ ๊ณฑํฉ(RSS)์ ์ต์ํ
# RSS = ฮฃ(yแตข - ลทแตข)ยฒ
# ์ํ์ ํด
X_b = np.c_[np.ones((100, 1)), X] # bias ์ถ๊ฐ
theta_best = np.linalg.inv(X_b.T @ X_b) @ X_b.T @ y
print(f"์ํ์ ํด:")
print(f"ฮธโ = {theta_best[0][0]:.4f}")
print(f"ฮธโ = {theta_best[1][0]:.4f}")
2. ๋ค์ค ์ ํํ๊ท¶
2.1 ๊ฐ๋ ¶
์ฌ๋ฌ ๊ฐ์ ๋ ๋ฆฝ๋ณ์๋ก ์ข ์๋ณ์๋ฅผ ์์ธกํฉ๋๋ค.
y = ฮฒโ + ฮฒโxโ + ฮฒโxโ + ... + ฮฒโxโ + ฮต
2.2 ๊ตฌํ¶
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# ๋น๋จ๋ณ ๋ฐ์ดํฐ์
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
print(f"ํน์ฑ: {diabetes.feature_names}")
print(f"๋ฐ์ดํฐ ํํ: {X.shape}")
# ๋ฐ์ดํฐ ๋ถํ
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# ์ค์ผ์ผ๋ง
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# ๋ชจ๋ธ ํ์ต
model = LinearRegression()
model.fit(X_train_scaled, y_train)
# ์์ธก ๋ฐ ํ๊ฐ
y_pred = model.predict(X_test_scaled)
print(f"\nMSE: {mean_squared_error(y_test, y_pred):.2f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
print(f"Rยฒ Score: {r2_score(y_test, y_pred):.4f}")
# ๊ณ์ ํ์ธ
coefficients = pd.DataFrame({
'feature': diabetes.feature_names,
'coefficient': model.coef_
}).sort_values('coefficient', key=abs, ascending=False)
print(f"\nํ๊ท ๊ณ์:")
print(coefficients)
3. ๊ฒฝ์ฌํ๊ฐ๋ฒ (Gradient Descent)¶
3.1 ๋ฐฐ์น ๊ฒฝ์ฌํ๊ฐ๋ฒ¶
# ๋น์ฉ ํจ์: J(ฮธ) = (1/2m) ฮฃ(h(xแตข) - yแตข)ยฒ
# ์
๋ฐ์ดํธ: ฮธ = ฮธ - ฮฑ * โJ(ฮธ)
def batch_gradient_descent(X, y, learning_rate=0.01, n_iterations=1000):
m = len(y)
X_b = np.c_[np.ones((m, 1)), X] # bias ์ถ๊ฐ
theta = np.random.randn(2, 1) # ๋๋ค ์ด๊ธฐํ
cost_history = []
for iteration in range(n_iterations):
gradients = (1/m) * X_b.T @ (X_b @ theta - y)
theta = theta - learning_rate * gradients
cost = (1/(2*m)) * np.sum((X_b @ theta - y)**2)
cost_history.append(cost)
return theta, cost_history
# ์คํ
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)
theta, cost_history = batch_gradient_descent(X, y, learning_rate=0.1, n_iterations=1000)
print(f"ฮธโ = {theta[0][0]:.4f}")
print(f"ฮธโ = {theta[1][0]:.4f}")
# ๋น์ฉ ํจ์ ์๋ ด ์๊ฐํ
plt.figure(figsize=(10, 4))
plt.plot(cost_history[:100])
plt.xlabel('Iteration')
plt.ylabel('Cost')
plt.title('๊ฒฝ์ฌํ๊ฐ๋ฒ ์๋ ด')
plt.show()
3.2 ํ๋ฅ ์ ๊ฒฝ์ฌํ๊ฐ๋ฒ (SGD)¶
from sklearn.linear_model import SGDRegressor
# ๋ฐ์ดํฐ ์ค๋น
X_train, X_test, y_train, y_test = train_test_split(X, y.ravel(), test_size=0.2)
# ์ค์ผ์ผ๋ง (SGD๋ ์ค์ผ์ผ๋ง ํ์)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# SGD ํ๊ท
sgd_reg = SGDRegressor(max_iter=1000, tol=1e-3, penalty=None,
eta0=0.01, random_state=42)
sgd_reg.fit(X_train_scaled, y_train)
print(f"SGD ์ ํธ: {sgd_reg.intercept_[0]:.4f}")
print(f"SGD ๊ณ์: {sgd_reg.coef_[0]:.4f}")
3.3 ๋ฏธ๋๋ฐฐ์น ๊ฒฝ์ฌํ๊ฐ๋ฒ¶
def mini_batch_gradient_descent(X, y, batch_size=20, learning_rate=0.01, n_epochs=50):
m = len(y)
X_b = np.c_[np.ones((m, 1)), X]
theta = np.random.randn(2, 1)
for epoch in range(n_epochs):
shuffled_indices = np.random.permutation(m)
X_b_shuffled = X_b[shuffled_indices]
y_shuffled = y[shuffled_indices]
for i in range(0, m, batch_size):
xi = X_b_shuffled[i:i+batch_size]
yi = y_shuffled[i:i+batch_size]
gradients = (1/len(yi)) * xi.T @ (xi @ theta - yi)
theta = theta - learning_rate * gradients
return theta
theta = mini_batch_gradient_descent(X, y)
print(f"๋ฏธ๋๋ฐฐ์น GD ๊ฒฐ๊ณผ: ฮธโ={theta[0][0]:.4f}, ฮธโ={theta[1][0]:.4f}")
4. ์ ๊ทํ (Regularization)¶
๊ณผ์ ํฉ์ ๋ฐฉ์งํ๊ธฐ ์ํด ๋ชจ๋ธ์ ๋ณต์ก๋์ ํจ๋ํฐ๋ฅผ ๋ถ์ฌํฉ๋๋ค.
4.1 Ridge ํ๊ท (L2 ์ ๊ทํ)¶
from sklearn.linear_model import Ridge
# ๋น์ฉ ํจ์: J(ฮธ) = MSE + ฮฑ * ฮฃฮธแตขยฒ
# ๋ค์ํ alpha ๊ฐ์ผ๋ก ์คํ
alphas = [0, 0.1, 1, 10, 100]
plt.figure(figsize=(12, 4))
for alpha in alphas:
ridge = Ridge(alpha=alpha)
ridge.fit(X_train_scaled, y_train)
y_pred = ridge.predict(X_test_scaled)
print(f"Alpha={alpha}: Rยฒ={r2_score(y_test, y_pred):.4f}, ๊ณ์ํฉ={sum(abs(ridge.coef_)):.4f}")
4.2 Lasso ํ๊ท (L1 ์ ๊ทํ)¶
from sklearn.linear_model import Lasso
# ๋น์ฉ ํจ์: J(ฮธ) = MSE + ฮฑ * ฮฃ|ฮธแตข|
# ํน์ง: ์ผ๋ถ ๊ณ์๋ฅผ 0์ผ๋ก ๋ง๋ฆ (ํน์ฑ ์ ํ)
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train)
# 0์ด ์๋ ๊ณ์ ํ์ธ
non_zero = np.sum(lasso.coef_ != 0)
print(f"0์ด ์๋ ๊ณ์ ์: {non_zero}/{len(lasso.coef_)}")
y_pred = lasso.predict(X_test_scaled)
print(f"Lasso Rยฒ: {r2_score(y_test, y_pred):.4f}")
4.3 Elastic Net¶
from sklearn.linear_model import ElasticNet
# L1๊ณผ L2๋ฅผ ํผํฉ
# ๋น์ฉ ํจ์: J(ฮธ) = MSE + r*ฮฑ*ฮฃ|ฮธแตข| + (1-r)*ฮฑ*ฮฃฮธแตขยฒ/2
elastic = ElasticNet(alpha=0.1, l1_ratio=0.5) # l1_ratio = r
elastic.fit(X_train_scaled, y_train)
y_pred = elastic.predict(X_test_scaled)
print(f"Elastic Net Rยฒ: {r2_score(y_test, y_pred):.4f}")
4.4 ์ ๊ทํ ๋น๊ต¶
from sklearn.datasets import make_regression
# ๋ฐ์ดํฐ ์์ฑ (ํน์ฑ > ์ํ)
X, y = make_regression(n_samples=50, n_features=100, noise=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# ๋ชจ๋ธ ๋น๊ต
models = {
'Linear': LinearRegression(),
'Ridge': Ridge(alpha=1),
'Lasso': Lasso(alpha=0.1),
'ElasticNet': ElasticNet(alpha=0.1, l1_ratio=0.5)
}
print("์ ๊ทํ ๋ฐฉ๋ฒ ๋น๊ต:")
for name, model in models.items():
model.fit(X_train, y_train)
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
non_zero = np.sum(model.coef_ != 0) if hasattr(model, 'coef_') else len(model.coef_)
print(f"{name:12}: Train Rยฒ={train_score:.3f}, Test Rยฒ={test_score:.3f}, ๋น์ ๊ณ์={non_zero}")
5. ๋คํญ ํ๊ท¶
๋น์ ํ ๊ด๊ณ๋ฅผ ์ ํํ๊ท๋ก ๋ชจ๋ธ๋งํฉ๋๋ค.
from sklearn.preprocessing import PolynomialFeatures
# ๋น์ ํ ๋ฐ์ดํฐ ์์ฑ
np.random.seed(42)
X = 6 * np.random.rand(100, 1) - 3
y = 0.5 * X**2 + X + 2 + np.random.randn(100, 1)
# ๋คํญ ํน์ฑ ์์ฑ
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)
print(f"์๋ณธ ํน์ฑ: {X.shape}")
print(f"๋คํญ ํน์ฑ: {X_poly.shape}")
print(f"ํน์ฑ ์ด๋ฆ: {poly.get_feature_names_out()}")
# ์ ํํ๊ท ์ ์ฉ
model = LinearRegression()
model.fit(X_poly, y)
print(f"\n๊ณ์: {model.coef_}")
print(f"์ ํธ: {model.intercept_}")
# ์๊ฐํ
X_plot = np.linspace(-3, 3, 100).reshape(-1, 1)
X_plot_poly = poly.transform(X_plot)
y_plot = model.predict(X_plot_poly)
plt.figure(figsize=(10, 6))
plt.scatter(X, y, alpha=0.7)
plt.plot(X_plot, y_plot, 'r-', linewidth=2)
plt.xlabel('X')
plt.ylabel('y')
plt.title('๋คํญ ํ๊ท (degree=2)')
plt.show()
6. ํ๊ท ํ๊ฐ ์งํ¶
from sklearn.metrics import (
mean_absolute_error,
mean_squared_error,
r2_score,
mean_absolute_percentage_error
)
# ์์ธก
y_true = np.array([3, -0.5, 2, 7])
y_pred = np.array([2.5, 0.0, 2, 8])
# MAE (Mean Absolute Error)
mae = mean_absolute_error(y_true, y_pred)
print(f"MAE: {mae:.4f}")
# MSE (Mean Squared Error)
mse = mean_squared_error(y_true, y_pred)
print(f"MSE: {mse:.4f}")
# RMSE (Root Mean Squared Error)
rmse = np.sqrt(mse)
print(f"RMSE: {rmse:.4f}")
# Rยฒ (๊ฒฐ์ ๊ณ์)
r2 = r2_score(y_true, y_pred)
print(f"Rยฒ: {r2:.4f}")
# MAPE (Mean Absolute Percentage Error)
mape = mean_absolute_percentage_error(y_true, y_pred)
print(f"MAPE: {mape:.4f}")
์ฐ์ต ๋ฌธ์ ¶
๋ฌธ์ 1: ๋จ์ ์ ํํ๊ท¶
๋ค์ ๋ฐ์ดํฐ๋ก ์ ํํ๊ท ๋ชจ๋ธ์ ํ์ตํ๊ณ X=7์ผ ๋ ์์ธก๊ฐ์ ๊ตฌํ์ธ์.
X = np.array([[1], [2], [3], [4], [5], [6]])
y = np.array([2, 4, 5, 4, 5, 7])
# ํ์ด
model = LinearRegression()
model.fit(X, y)
prediction = model.predict([[7]])
print(f"X=7์ผ ๋ ์์ธก๊ฐ: {prediction[0]:.2f}")
print(f"Rยฒ: {model.score(X, y):.4f}")
๋ฌธ์ 2: Ridge vs Lasso¶
๋น๋จ๋ณ ๋ฐ์ดํฐ์์ Ridge์ Lasso์ ์ฑ๋ฅ์ ๋น๊ตํ์ธ์.
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(
diabetes.data, diabetes.target, test_size=0.2, random_state=42
)
# ํ์ด
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)
for Model, name in [(Ridge, 'Ridge'), (Lasso, 'Lasso')]:
model = Model(alpha=1)
model.fit(X_train_s, y_train)
print(f"{name} Rยฒ: {model.score(X_test_s, y_test):.4f}")
์์ฝ¶
| ๋ฐฉ๋ฒ | ํน์ง | ์ฌ์ฉ ์์ |
|---|---|---|
| ์ ํํ๊ท | ๊ธฐ๋ณธ, ํด์ ์ฉ์ด | ๊ธฐ์ค ๋ชจ๋ธ |
| Ridge (L2) | ๊ณ์ ์ถ์, ๊ณผ์ ํฉ ๋ฐฉ์ง | ๋ค์ค๊ณต์ ์ฑ |
| Lasso (L1) | ํน์ฑ ์ ํ, ํฌ์ ๋ชจ๋ธ | ๋ง์ ํน์ฑ |
| Elastic Net | L1+L2 ํผํฉ | ์๊ด๋ ํน์ฑ |
| ๋คํญ ํ๊ท | ๋น์ ํ ๊ด๊ณ | ๊ณก์ ํจํด |