๋ชจ๋ธ ํ๊ฐ (Model Evaluation)
๋ชจ๋ธ ํ๊ฐ (Model Evaluation)¶
๊ฐ์¶
๋ชจ๋ธ ํ๊ฐ๋ ํ์ต๋ ๋ชจ๋ธ์ ์ฑ๋ฅ์ ๊ฐ๊ด์ ์ผ๋ก ์ธก์ ํ๋ ๊ณผ์ ์ ๋๋ค. ๋ถ๋ฅ์ ํ๊ท ๋ฌธ์ ์ ๋ฐ๋ผ ๋ค๋ฅธ ํ๊ฐ ์งํ๋ฅผ ์ฌ์ฉํฉ๋๋ค.
1. ๋ถ๋ฅ ํ๊ฐ ์งํ¶
1.1 ํผ๋ ํ๋ ฌ (Confusion Matrix)¶
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
# ์์ ๋ฐ์ดํฐ
y_true = [1, 0, 1, 1, 0, 1, 0, 0, 1, 1]
y_pred = [1, 0, 1, 0, 0, 1, 1, 0, 1, 1]
# ํผ๋ ํ๋ ฌ
cm = confusion_matrix(y_true, y_pred)
print("ํผ๋ ํ๋ ฌ:")
print(cm)
# ์๊ฐํ
fig, ax = plt.subplots(figsize=(6, 5))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Negative', 'Positive'])
disp.plot(ax=ax, cmap='Blues')
plt.title('Confusion Matrix')
plt.show()
# ํผ๋ ํ๋ ฌ ์์
tn, fp, fn, tp = cm.ravel()
print(f"\nTN (True Negative): {tn}")
print(f"FP (False Positive): {fp} - Type I Error")
print(f"FN (False Negative): {fn} - Type II Error")
print(f"TP (True Positive): {tp}")
1.2 ์ ํ๋ (Accuracy)¶
from sklearn.metrics import accuracy_score
# Accuracy = (TP + TN) / (TP + TN + FP + FN)
accuracy = accuracy_score(y_true, y_pred)
print(f"์ ํ๋: {accuracy:.4f}")
# ์๋ ๊ณ์ฐ
accuracy_manual = (tp + tn) / (tp + tn + fp + fn)
print(f"์ ํ๋ (์๋): {accuracy_manual:.4f}")
# ์ฃผ์: ๋ถ๊ท ํ ๋ฐ์ดํฐ์์๋ ์ ํ๋๋ง์ผ๋ก ํ๊ฐ ๋ถ์ ์
# ์: 99% negative โ ๋ชจ๋ negative ์์ธกํด๋ 99% ์ ํ๋
1.3 ์ ๋ฐ๋, ์ฌํ์จ, F1-score¶
from sklearn.metrics import precision_score, recall_score, f1_score
# Precision = TP / (TP + FP)
# "์์ฑ์ผ๋ก ์์ธกํ ๊ฒ ์ค ์ค์ ์์ฑ์ ๋น์จ"
precision = precision_score(y_true, y_pred)
print(f"์ ๋ฐ๋ (Precision): {precision:.4f}")
# Recall (Sensitivity) = TP / (TP + FN)
# "์ค์ ์์ฑ ์ค ์์ฑ์ผ๋ก ์์ธกํ ๋น์จ"
recall = recall_score(y_true, y_pred)
print(f"์ฌํ์จ (Recall): {recall:.4f}")
# F1-Score = 2 * (Precision * Recall) / (Precision + Recall)
# ์ ๋ฐ๋์ ์ฌํ์จ์ ์กฐํ ํ๊ท
f1 = f1_score(y_true, y_pred)
print(f"F1-Score: {f1:.4f}")
# ์๋ ๊ณ์ฐ
precision_manual = tp / (tp + fp)
recall_manual = tp / (tp + fn)
f1_manual = 2 * precision_manual * recall_manual / (precision_manual + recall_manual)
print(f"\n์๋ ๊ณ์ฐ:")
print(f"Precision: {precision_manual:.4f}")
print(f"Recall: {recall_manual:.4f}")
print(f"F1: {f1_manual:.4f}")
1.4 ๋ถ๋ฅ ๋ฆฌํฌํธ¶
from sklearn.metrics import classification_report
y_true = [0, 0, 0, 1, 1, 1, 2, 2, 2]
y_pred = [0, 0, 1, 1, 1, 2, 2, 2, 0]
report = classification_report(y_true, y_pred, target_names=['Class A', 'Class B', 'Class C'])
print("๋ถ๋ฅ ๋ฆฌํฌํธ:")
print(report)
# ๋์
๋๋ฆฌ๋ก ๋ฐํ
report_dict = classification_report(y_true, y_pred, output_dict=True)
print(f"\nClass B์ F1-score: {report_dict['Class B']['f1-score']:.4f}")
1.5 ROC ๊ณก์ ๊ณผ AUC¶
from sklearn.metrics import roc_curve, roc_auc_score, auc
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
# ๋ฐ์ดํฐ ์ค๋น
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(
cancer.data, cancer.target, test_size=0.2, random_state=42
)
# ๋ชจ๋ธ ํ์ต
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
# ์์ธก ํ๋ฅ
y_proba = model.predict_proba(X_test)[:, 1]
# ROC ๊ณก์
fpr, tpr, thresholds = roc_curve(y_test, y_proba)
roc_auc = auc(fpr, tpr)
# ์๊ฐํ
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, 'b-', linewidth=2, label=f'ROC Curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], 'r--', linewidth=1, label='Random Classifier')
plt.xlabel('False Positive Rate (1 - Specificity)')
plt.ylabel('True Positive Rate (Sensitivity)')
plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)
plt.show()
print(f"AUC Score: {roc_auc:.4f}")
print(f"AUC Score (sklearn): {roc_auc_score(y_test, y_proba):.4f}")
1.6 PR ๊ณก์ (Precision-Recall)¶
from sklearn.metrics import precision_recall_curve, average_precision_score
# PR ๊ณก์
precision, recall, thresholds = precision_recall_curve(y_test, y_proba)
ap = average_precision_score(y_test, y_proba)
# ์๊ฐํ
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, 'b-', linewidth=2, label=f'PR Curve (AP = {ap:.4f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
print(f"Average Precision: {ap:.4f}")
# ROC vs PR
# - ROC: ๋ถ๊ท ํ ๋ฐ์ดํฐ์์๋ ์์ ์ ์ด์ง๋ง, ๊ธ์ ํด๋์ค๊ฐ ์ ์ผ๋ฉด FPR์ด ๋ฎ์ ๋ณด์ผ ์ ์์
# - PR: ๋ถ๊ท ํ ๋ฐ์ดํฐ์์ ๋ ๋ฏผ๊ฐ, ๊ธ์ ํด๋์ค ์์ธก ์ฑ๋ฅ์ ์ง์ค
2. ๋ค์ค ๋ถ๋ฅ ํ๊ฐ¶
2.1 ๋ค์ค ๋ถ๋ฅ ์งํ¶
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(
iris.data, iris.target, test_size=0.2, random_state=42
)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# ์ ํ๋
print(f"์ ํ๋: {accuracy_score(y_test, y_pred):.4f}")
# F1-score (๋ค์ํ ํ๊ท ๋ฐฉ๋ฒ)
print(f"\nF1-Score (macro): {f1_score(y_test, y_pred, average='macro'):.4f}")
print(f"F1-Score (weighted): {f1_score(y_test, y_pred, average='weighted'):.4f}")
print(f"F1-Score (micro): {f1_score(y_test, y_pred, average='micro'):.4f}")
# macro: ๊ฐ ํด๋์ค์ F1์ ๋จ์ ํ๊ท
# weighted: ๊ฐ ํด๋์ค์ ์ํ ์๋ก ๊ฐ์ค ํ๊ท
# micro: ์ ์ฒด TP, FP, FN์ ํฉ์ฐํ์ฌ ๊ณ์ฐ
2.2 ๋ค์ค ํด๋์ค ROC¶
from sklearn.preprocessing import label_binarize
# ๋ ์ด๋ธ ์ด์งํ
y_test_bin = label_binarize(y_test, classes=[0, 1, 2])
y_proba = model.predict_proba(X_test)
# ๊ฐ ํด๋์ค๋ณ ROC
plt.figure(figsize=(10, 6))
colors = ['blue', 'red', 'green']
for i, (color, name) in enumerate(zip(colors, iris.target_names)):
fpr, tpr, _ = roc_curve(y_test_bin[:, i], y_proba[:, i])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, color=color, linewidth=2,
label=f'{name} (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], 'k--', linewidth=1)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Multi-class ROC Curves')
plt.legend(loc='lower right')
plt.grid(True, alpha=0.3)
plt.show()
3. ํ๊ท ํ๊ฐ ์งํ¶
import numpy as np
from sklearn.metrics import (
mean_absolute_error,
mean_squared_error,
r2_score,
mean_absolute_percentage_error
)
# ์์ ๋ฐ์ดํฐ
y_true = np.array([3, -0.5, 2, 7, 4.5])
y_pred = np.array([2.5, 0.0, 2, 8, 4.0])
# MAE (Mean Absolute Error)
mae = mean_absolute_error(y_true, y_pred)
print(f"MAE: {mae:.4f}")
# ํด์: ํ๊ท ์ ์ผ๋ก ์์ธก์ด ์ค์ ๊ฐ์์ {mae} ๋งํผ ๋ฒ์ด๋จ
# MSE (Mean Squared Error)
mse = mean_squared_error(y_true, y_pred)
print(f"MSE: {mse:.4f}")
# ํน์ง: ํฐ ์ค์ฐจ์ ๋ ํฐ ํจ๋ํฐ
# RMSE (Root Mean Squared Error)
rmse = np.sqrt(mse)
print(f"RMSE: {rmse:.4f}")
# ํด์: ํ๊ฒ๊ณผ ๊ฐ์ ๋จ์๋ก ํด์ ๊ฐ๋ฅ
# Rยฒ (๊ฒฐ์ ๊ณ์)
r2 = r2_score(y_true, y_pred)
print(f"Rยฒ: {r2:.4f}")
# ํด์: 0~1, 1์ ๊ฐ๊น์ธ์๋ก ์ข์, ๋ชจ๋ธ์ด ๋ถ์ฐ์ ๋ช %๋ฅผ ์ค๋ช
ํ๋์ง
# MAPE (Mean Absolute Percentage Error)
mape = mean_absolute_percentage_error(y_true, y_pred)
print(f"MAPE: {mape:.4f}")
# ์ฃผ์: y_true๊ฐ 0์ ๊ฐ๊น์ฐ๋ฉด ๋ถ์์
# ์๋ ๊ณ์ฐ
print("\n=== ์๋ ๊ณ์ฐ ===")
print(f"MAE: {np.mean(np.abs(y_true - y_pred)):.4f}")
print(f"MSE: {np.mean((y_true - y_pred)**2):.4f}")
print(f"Rยฒ: {1 - np.sum((y_true - y_pred)**2) / np.sum((y_true - np.mean(y_true))**2):.4f}")
3.1 Rยฒ Score ํด์¶
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(
diabetes.data, diabetes.target, test_size=0.2, random_state=42
)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
print(f"Rยฒ Score: {r2:.4f}")
print(f"ํด์: ๋ชจ๋ธ์ด ํ๊ฒ ๋ถ์ฐ์ {r2*100:.1f}%๋ฅผ ์ค๋ช
ํฉ๋๋ค.")
# ์๊ฐํ
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.7)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
plt.xlabel('์ค์ ๊ฐ')
plt.ylabel('์์ธก๊ฐ')
plt.title(f'์ค์ ๊ฐ vs ์์ธก๊ฐ (Rยฒ = {r2:.4f})')
plt.grid(True, alpha=0.3)
plt.show()
4. ํ๊ฐ ์งํ ์ ํ ๊ฐ์ด๋¶
"""
๋ถ๋ฅ ๋ฌธ์ :
1. ๊ท ํ ๋ฐ์ดํฐ
- Accuracy, F1-score
2. ๋ถ๊ท ํ ๋ฐ์ดํฐ
- Precision, Recall, F1-score, PR-AUC
- ์์ฑ ํด๋์ค๊ฐ ์ค์: Recall ์ค์
- ์คํ์ด ๋น์ฉ: Precision ์ค์
3. ํ๋ฅ ์์ธก ํ์ง
- ROC-AUC, PR-AUC, Log Loss
4. ๋ค์ค ๋ถ๋ฅ
- Macro F1: ํด๋์ค ๊ท ๋ฑ ์ค์
- Weighted F1: ์ํ ์ ๋น๋ก ์ค์
- Micro F1: ์ ์ฒด ์ ํ๋์ ์ ์ฌ
ํ๊ท ๋ฌธ์ :
1. ๊ธฐ๋ณธ
- MSE, RMSE, MAE
2. ์ด์์น ๋ฏผ๊ฐ
- MAE (robust), MSE (sensitive)
3. ์๋์ ์ค์ฐจ
- MAPE, Rยฒ
4. ๋ชจ๋ธ ๋น๊ต
- Rยฒ (0~1 ๋ฒ์๋ก ์ ๊ทํ๋จ)
"""
# ํ๊ฐ ์งํ ๋น๊ต ํจ์
def evaluate_classification(y_true, y_pred, y_proba=None):
"""๋ถ๋ฅ ๋ชจ๋ธ ์ข
ํฉ ํ๊ฐ"""
print("=== ๋ถ๋ฅ ํ๊ฐ ๊ฒฐ๊ณผ ===")
print(f"Accuracy: {accuracy_score(y_true, y_pred):.4f}")
print(f"Precision: {precision_score(y_true, y_pred, average='weighted'):.4f}")
print(f"Recall: {recall_score(y_true, y_pred, average='weighted'):.4f}")
print(f"F1-Score: {f1_score(y_true, y_pred, average='weighted'):.4f}")
if y_proba is not None:
print(f"ROC-AUC: {roc_auc_score(y_true, y_proba):.4f}")
def evaluate_regression(y_true, y_pred):
"""ํ๊ท ๋ชจ๋ธ ์ข
ํฉ ํ๊ฐ"""
print("=== ํ๊ท ํ๊ฐ ๊ฒฐ๊ณผ ===")
print(f"MAE: {mean_absolute_error(y_true, y_pred):.4f}")
print(f"MSE: {mean_squared_error(y_true, y_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_true, y_pred)):.4f}")
print(f"Rยฒ: {r2_score(y_true, y_pred):.4f}")
5. ํ์ต ๊ณก์ ๊ณผ ๊ฒ์ฆ ๊ณก์ ¶
5.1 ํ์ต ๊ณก์ (Learning Curve)¶
from sklearn.model_selection import learning_curve
# ๋ฐ์ดํฐ ์ค๋น
cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
# ํ์ต ๊ณก์ ๊ณ์ฐ
train_sizes, train_scores, val_scores = learning_curve(
LogisticRegression(max_iter=1000),
X, y,
train_sizes=np.linspace(0.1, 1.0, 10),
cv=5,
scoring='accuracy'
)
# ํ๊ท ๋ฐ ํ์คํธ์ฐจ
train_mean = train_scores.mean(axis=1)
train_std = train_scores.std(axis=1)
val_mean = val_scores.mean(axis=1)
val_std = val_scores.std(axis=1)
# ์๊ฐํ
plt.figure(figsize=(10, 6))
plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1, color='blue')
plt.fill_between(train_sizes, val_mean - val_std, val_mean + val_std, alpha=0.1, color='orange')
plt.plot(train_sizes, train_mean, 'o-', color='blue', label='Training Score')
plt.plot(train_sizes, val_mean, 'o-', color='orange', label='Validation Score')
plt.xlabel('Training Set Size')
plt.ylabel('Accuracy')
plt.title('Learning Curve')
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.show()
# ํด์:
# - ๋ ๊ณก์ ์ด ๋ชจ๋ ๋ฎ์ โ ๊ณผ์์ ํฉ
# - ํ๋ จ ๊ณก์ ๋๊ณ ๊ฒ์ฆ ๊ณก์ ๋ฎ์ โ ๊ณผ์ ํฉ
# - ๋ ๊ณก์ ์ด ์๋ ด โ ์ ์ ํ ์ ํฉ
5.2 ๊ฒ์ฆ ๊ณก์ (Validation Curve)¶
from sklearn.model_selection import validation_curve
# ํ์ดํผํ๋ผ๋ฏธํฐ ๋ฒ์
param_range = np.logspace(-4, 2, 10)
# ๊ฒ์ฆ ๊ณก์ ๊ณ์ฐ
train_scores, val_scores = validation_curve(
LogisticRegression(max_iter=1000),
X, y,
param_name='C',
param_range=param_range,
cv=5,
scoring='accuracy'
)
train_mean = train_scores.mean(axis=1)
train_std = train_scores.std(axis=1)
val_mean = val_scores.mean(axis=1)
val_std = val_scores.std(axis=1)
# ์๊ฐํ
plt.figure(figsize=(10, 6))
plt.semilogx(param_range, train_mean, 'o-', color='blue', label='Training Score')
plt.semilogx(param_range, val_mean, 'o-', color='orange', label='Validation Score')
plt.fill_between(param_range, train_mean - train_std, train_mean + train_std, alpha=0.1, color='blue')
plt.fill_between(param_range, val_mean - val_std, val_mean + val_std, alpha=0.1, color='orange')
plt.xlabel('C (Regularization Parameter)')
plt.ylabel('Accuracy')
plt.title('Validation Curve')
plt.legend(loc='best')
plt.grid(True, alpha=0.3)
plt.show()
์ฐ์ต ๋ฌธ์ ¶
๋ฌธ์ 1: ๋ถ๋ฅ ํ๊ฐ¶
ํผ๋ ํ๋ ฌ์์ Precision, Recall, F1-score๋ฅผ ๊ณ์ฐํ์ธ์.
# TN=50, FP=10, FN=5, TP=35
# ํ์ด
tn, fp, fn, tp = 50, 10, 5, 35
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f1 = 2 * precision * recall / (precision + recall)
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
๋ฌธ์ 2: ํ๊ท ํ๊ฐ¶
์์ธก๊ฐ๊ณผ ์ค์ ๊ฐ์ผ๋ก Rยฒ๋ฅผ ๊ณ์ฐํ์ธ์.
y_true = [100, 150, 200, 250, 300]
y_pred = [110, 140, 210, 240, 290]
# ํ์ด
from sklearn.metrics import r2_score
print(f"Rยฒ Score: {r2_score(y_true, y_pred):.4f}")
์์ฝ¶
| ์งํ | ๋ถ๋ฅ/ํ๊ท | ๋ฒ์ | ์ค๋ช |
|---|---|---|---|
| Accuracy | ๋ถ๋ฅ | 0-1 | ์ ์ฒด ์ ๋ต ๋น์จ |
| Precision | ๋ถ๋ฅ | 0-1 | ์์ฑ ์์ธก ์ค ์ค์ ์์ฑ |
| Recall | ๋ถ๋ฅ | 0-1 | ์ค์ ์์ฑ ์ค ์์ฑ ์์ธก |
| F1-Score | ๋ถ๋ฅ | 0-1 | Precision/Recall ์กฐํํ๊ท |
| ROC-AUC | ๋ถ๋ฅ | 0-1 | ๋ถ๋ฅ๊ธฐ ์ ๋ฐ์ ์ฑ๋ฅ |
| MAE | ํ๊ท | 0-โ | ํ๊ท ์ ๋ ์ค์ฐจ |
| MSE | ํ๊ท | 0-โ | ํ๊ท ์ ๊ณฑ ์ค์ฐจ |
| Rยฒ | ํ๊ท | -โ-1 | ์ค๋ช ๋ถ์ฐ ๋น์จ |