03_model_evaluation.ipynb

Download
json 746 lines 25.5 KB
  1{
  2 "cells": [
  3  {
  4   "cell_type": "markdown",
  5   "metadata": {},
  6   "source": [
  7    "# ๋ชจ๋ธ ํ‰๊ฐ€ (Model Evaluation)\n",
  8    "\n",
  9    "์ด ๋…ธํŠธ๋ถ์—์„œ๋Š” ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ์˜ ์„ฑ๋Šฅ์„ ํ‰๊ฐ€ํ•˜๋Š” ๋‹ค์–‘ํ•œ ์ง€ํ‘œ์™€ ๋ฐฉ๋ฒ•์„ ํ•™์Šตํ•ฉ๋‹ˆ๋‹ค.\n",
 10    "\n",
 11    "## ๋ชฉ์ฐจ\n",
 12    "1. ๋ถ„๋ฅ˜ ํ‰๊ฐ€ ์ง€ํ‘œ\n",
 13    "   - ํ˜ผ๋™ ํ–‰๋ ฌ (Confusion Matrix)\n",
 14    "   - ์ •ํ™•๋„, ์ •๋ฐ€๋„, ์žฌํ˜„์œจ, F1-score\n",
 15    "   - ROC ๊ณก์„ ๊ณผ AUC\n",
 16    "   - Precision-Recall ๊ณก์„ \n",
 17    "2. ๋‹ค์ค‘ ๋ถ„๋ฅ˜ ํ‰๊ฐ€\n",
 18    "3. ํšŒ๊ท€ ํ‰๊ฐ€ ์ง€ํ‘œ\n",
 19    "4. ํ•™์Šต ๊ณก์„ "
 20   ]
 21  },
 22  {
 23   "cell_type": "code",
 24   "execution_count": null,
 25   "metadata": {},
 26   "outputs": [],
 27   "source": [
 28    "# ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์ž„ํฌํŠธ\n",
 29    "import numpy as np\n",
 30    "import matplotlib.pyplot as plt\n",
 31    "import seaborn as sns\n",
 32    "from sklearn.datasets import load_breast_cancer, load_iris, load_diabetes\n",
 33    "from sklearn.model_selection import train_test_split, learning_curve\n",
 34    "from sklearn.linear_model import LogisticRegression, LinearRegression\n",
 35    "from sklearn.metrics import (\n",
 36    "    confusion_matrix, ConfusionMatrixDisplay,\n",
 37    "    accuracy_score, precision_score, recall_score, f1_score,\n",
 38    "    classification_report,\n",
 39    "    roc_curve, roc_auc_score, auc,\n",
 40    "    precision_recall_curve, average_precision_score,\n",
 41    "    mean_absolute_error, mean_squared_error, r2_score\n",
 42    ")\n",
 43    "from sklearn.preprocessing import label_binarize\n",
 44    "\n",
 45    "# ์‹œ๊ฐํ™” ์„ค์ •\n",
 46    "plt.rcParams['figure.figsize'] = (10, 6)\n",
 47    "plt.rcParams['font.family'] = 'AppleGothic'  # MacOS์šฉ ํ•œ๊ธ€ ํฐํŠธ\n",
 48    "plt.rcParams['axes.unicode_minus'] = False\n",
 49    "sns.set_style('whitegrid')\n",
 50    "\n",
 51    "# ๊ฒฝ๊ณ  ๋ฌด์‹œ\n",
 52    "import warnings\n",
 53    "warnings.filterwarnings('ignore')"
 54   ]
 55  },
 56  {
 57   "cell_type": "markdown",
 58   "metadata": {},
 59   "source": [
 60    "## 1. ๋ถ„๋ฅ˜ ํ‰๊ฐ€ ์ง€ํ‘œ\n",
 61    "\n",
 62    "### 1.1 ํ˜ผ๋™ ํ–‰๋ ฌ (Confusion Matrix)"
 63   ]
 64  },
 65  {
 66   "cell_type": "code",
 67   "execution_count": null,
 68   "metadata": {},
 69   "outputs": [],
 70   "source": [
 71    "# ๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ ๋ฐ์ดํ„ฐ\n",
 72    "y_true = np.array([1, 0, 1, 1, 0, 1, 0, 0, 1, 1])\n",
 73    "y_pred = np.array([1, 0, 1, 0, 0, 1, 1, 0, 1, 1])\n",
 74    "\n",
 75    "# ํ˜ผ๋™ ํ–‰๋ ฌ ๊ณ„์‚ฐ\n",
 76    "cm = confusion_matrix(y_true, y_pred)\n",
 77    "print(\"ํ˜ผ๋™ ํ–‰๋ ฌ:\")\n",
 78    "print(cm)\n",
 79    "print()\n",
 80    "\n",
 81    "# ํ˜ผ๋™ ํ–‰๋ ฌ ์š”์†Œ ์ถ”์ถœ\n",
 82    "tn, fp, fn, tp = cm.ravel()\n",
 83    "print(f\"TN (True Negative): {tn}\")\n",
 84    "print(f\"FP (False Positive): {fp} - Type I Error (์œ„์–‘์„ฑ)\")\n",
 85    "print(f\"FN (False Negative): {fn} - Type II Error (์œ„์Œ์„ฑ)\")\n",
 86    "print(f\"TP (True Positive): {tp}\")"
 87   ]
 88  },
 89  {
 90   "cell_type": "code",
 91   "execution_count": null,
 92   "metadata": {},
 93   "outputs": [],
 94   "source": [
 95    "# ํ˜ผ๋™ ํ–‰๋ ฌ ์‹œ๊ฐํ™”\n",
 96    "fig, ax = plt.subplots(figsize=(8, 6))\n",
 97    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Negative', 'Positive'])\n",
 98    "disp.plot(ax=ax, cmap='Blues', values_format='d')\n",
 99    "plt.title('Confusion Matrix', fontsize=14, pad=20)\n",
100    "plt.show()"
101   ]
102  },
103  {
104   "cell_type": "markdown",
105   "metadata": {},
106   "source": [
107    "### 1.2 ์ •ํ™•๋„, ์ •๋ฐ€๋„, ์žฌํ˜„์œจ, F1-Score"
108   ]
109  },
110  {
111   "cell_type": "code",
112   "execution_count": null,
113   "metadata": {},
114   "outputs": [],
115   "source": [
116    "# ๊ฐ ์ง€ํ‘œ ๊ณ„์‚ฐ\n",
117    "accuracy = accuracy_score(y_true, y_pred)\n",
118    "precision = precision_score(y_true, y_pred)\n",
119    "recall = recall_score(y_true, y_pred)\n",
120    "f1 = f1_score(y_true, y_pred)\n",
121    "\n",
122    "print(\"=== ๋ถ„๋ฅ˜ ํ‰๊ฐ€ ์ง€ํ‘œ ===\")\n",
123    "print(f\"์ •ํ™•๋„ (Accuracy): {accuracy:.4f}\")\n",
124    "print(f\"  - (TP + TN) / (TP + TN + FP + FN)\")\n",
125    "print(f\"  - ์ „์ฒด ์˜ˆ์ธก ์ค‘ ์ •๋‹ต ๋น„์œจ\\n\")\n",
126    "\n",
127    "print(f\"์ •๋ฐ€๋„ (Precision): {precision:.4f}\")\n",
128    "print(f\"  - TP / (TP + FP)\")\n",
129    "print(f\"  - ์–‘์„ฑ์œผ๋กœ ์˜ˆ์ธกํ•œ ๊ฒƒ ์ค‘ ์‹ค์ œ ์–‘์„ฑ์˜ ๋น„์œจ\\n\")\n",
130    "\n",
131    "print(f\"์žฌํ˜„์œจ (Recall/Sensitivity): {recall:.4f}\")\n",
132    "print(f\"  - TP / (TP + FN)\")\n",
133    "print(f\"  - ์‹ค์ œ ์–‘์„ฑ ์ค‘ ์–‘์„ฑ์œผ๋กœ ์˜ˆ์ธกํ•œ ๋น„์œจ\\n\")\n",
134    "\n",
135    "print(f\"F1-Score: {f1:.4f}\")\n",
136    "print(f\"  - 2 * (Precision * Recall) / (Precision + Recall)\")\n",
137    "print(f\"  - ์ •๋ฐ€๋„์™€ ์žฌํ˜„์œจ์˜ ์กฐํ™”ํ‰๊ท \")"
138   ]
139  },
140  {
141   "cell_type": "code",
142   "execution_count": null,
143   "metadata": {},
144   "outputs": [],
145   "source": [
146    "# ์ˆ˜๋™ ๊ณ„์‚ฐ์œผ๋กœ ๊ฒ€์ฆ\n",
147    "accuracy_manual = (tp + tn) / (tp + tn + fp + fn)\n",
148    "precision_manual = tp / (tp + fp) if (tp + fp) > 0 else 0\n",
149    "recall_manual = tp / (tp + fn) if (tp + fn) > 0 else 0\n",
150    "f1_manual = 2 * precision_manual * recall_manual / (precision_manual + recall_manual) if (precision_manual + recall_manual) > 0 else 0\n",
151    "\n",
152    "print(\"=== ์ˆ˜๋™ ๊ณ„์‚ฐ ๊ฒ€์ฆ ===\")\n",
153    "print(f\"Accuracy:  {accuracy_manual:.4f}\")\n",
154    "print(f\"Precision: {precision_manual:.4f}\")\n",
155    "print(f\"Recall:    {recall_manual:.4f}\")\n",
156    "print(f\"F1-Score:  {f1_manual:.4f}\")"
157   ]
158  },
159  {
160   "cell_type": "markdown",
161   "metadata": {},
162   "source": [
163    "### 1.3 ์‹ค์ œ ๋ฐ์ดํ„ฐ์…‹์œผ๋กœ ๋ถ„๋ฅ˜ ํ‰๊ฐ€ - Breast Cancer Dataset"
164   ]
165  },
166  {
167   "cell_type": "code",
168   "execution_count": null,
169   "metadata": {},
170   "outputs": [],
171   "source": [
172    "# ์œ ๋ฐฉ์•” ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ\n",
173    "cancer = load_breast_cancer()\n",
174    "X_train, X_test, y_train, y_test = train_test_split(\n",
175    "    cancer.data, cancer.target, test_size=0.2, random_state=42\n",
176    ")\n",
177    "\n",
178    "# ๋กœ์ง€์Šคํ‹ฑ ํšŒ๊ท€ ๋ชจ๋ธ ํ•™์Šต\n",
179    "model = LogisticRegression(max_iter=10000, random_state=42)\n",
180    "model.fit(X_train, y_train)\n",
181    "y_pred = model.predict(X_test)\n",
182    "\n",
183    "print(\"Breast Cancer Classification Results\")\n",
184    "print(\"=\"*50)\n",
185    "print(f\"Training samples: {len(X_train)}\")\n",
186    "print(f\"Test samples: {len(X_test)}\")\n",
187    "print(f\"Features: {cancer.feature_names[:5]}... (total {len(cancer.feature_names)})\")"
188   ]
189  },
190  {
191   "cell_type": "code",
192   "execution_count": null,
193   "metadata": {},
194   "outputs": [],
195   "source": [
196    "# ํ˜ผ๋™ ํ–‰๋ ฌ ์‹œ๊ฐํ™”\n",
197    "cm = confusion_matrix(y_test, y_pred)\n",
198    "fig, ax = plt.subplots(figsize=(8, 6))\n",
199    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Malignant', 'Benign'])\n",
200    "disp.plot(ax=ax, cmap='RdYlGn', values_format='d')\n",
201    "plt.title('Confusion Matrix - Breast Cancer Classification', fontsize=14, pad=20)\n",
202    "plt.show()\n",
203    "\n",
204    "tn, fp, fn, tp = cm.ravel()\n",
205    "print(f\"\\nTrue Negatives: {tn}\")\n",
206    "print(f\"False Positives: {fp}\")\n",
207    "print(f\"False Negatives: {fn}\")\n",
208    "print(f\"True Positives: {tp}\")"
209   ]
210  },
211  {
212   "cell_type": "code",
213   "execution_count": null,
214   "metadata": {},
215   "outputs": [],
216   "source": [
217    "# ๋ถ„๋ฅ˜ ๋ฆฌํฌํŠธ\n",
218    "report = classification_report(y_test, y_pred, target_names=['Malignant', 'Benign'])\n",
219    "print(\"\\n=== Classification Report ===\")\n",
220    "print(report)\n",
221    "\n",
222    "# ๋”•์…”๋„ˆ๋ฆฌ ํ˜•ํƒœ๋กœ๋„ ํ™•์ธ\n",
223    "report_dict = classification_report(y_test, y_pred, target_names=['Malignant', 'Benign'], output_dict=True)\n",
224    "print(f\"\\nBenign ํด๋ž˜์Šค์˜ F1-score: {report_dict['Benign']['f1-score']:.4f}\")\n",
225    "print(f\"Malignant ํด๋ž˜์Šค์˜ Recall: {report_dict['Malignant']['recall']:.4f}\")"
226   ]
227  },
228  {
229   "cell_type": "markdown",
230   "metadata": {},
231   "source": [
232    "### 1.4 ROC ๊ณก์„ ๊ณผ AUC"
233   ]
234  },
235  {
236   "cell_type": "code",
237   "execution_count": null,
238   "metadata": {},
239   "outputs": [],
240   "source": [
241    "# ์˜ˆ์ธก ํ™•๋ฅ \n",
242    "y_proba = model.predict_proba(X_test)[:, 1]\n",
243    "\n",
244    "# ROC ๊ณก์„  ๊ณ„์‚ฐ\n",
245    "fpr, tpr, thresholds = roc_curve(y_test, y_proba)\n",
246    "roc_auc = auc(fpr, tpr)\n",
247    "\n",
248    "# ROC ๊ณก์„  ์‹œ๊ฐํ™”\n",
249    "plt.figure(figsize=(10, 6))\n",
250    "plt.plot(fpr, tpr, 'b-', linewidth=2, label=f'ROC Curve (AUC = {roc_auc:.4f})')\n",
251    "plt.plot([0, 1], [0, 1], 'r--', linewidth=2, label='Random Classifier (AUC = 0.5)')\n",
252    "plt.xlabel('False Positive Rate (1 - Specificity)', fontsize=12)\n",
253    "plt.ylabel('True Positive Rate (Sensitivity/Recall)', fontsize=12)\n",
254    "plt.title('ROC Curve - Breast Cancer Classification', fontsize=14, pad=20)\n",
255    "plt.legend(loc='lower right', fontsize=11)\n",
256    "plt.grid(True, alpha=0.3)\n",
257    "plt.show()\n",
258    "\n",
259    "print(f\"AUC Score: {roc_auc:.4f}\")\n",
260    "print(f\"AUC Score (sklearn ์ง์ ‘ ๊ณ„์‚ฐ): {roc_auc_score(y_test, y_proba):.4f}\")\n",
261    "print(\"\\nAUC ํ•ด์„:\")\n",
262    "print(\"  - 1.0: ์™„๋ฒฝํ•œ ๋ถ„๋ฅ˜๊ธฐ\")\n",
263    "print(\"  - 0.5: ๋žœ๋ค ๋ถ„๋ฅ˜๊ธฐ\")\n",
264    "print(\"  - 0.0: ์ตœ์•…์˜ ๋ถ„๋ฅ˜๊ธฐ\")"
265   ]
266  },
267  {
268   "cell_type": "markdown",
269   "metadata": {},
270   "source": [
271    "### 1.5 Precision-Recall ๊ณก์„ "
272   ]
273  },
274  {
275   "cell_type": "code",
276   "execution_count": null,
277   "metadata": {},
278   "outputs": [],
279   "source": [
280    "# PR ๊ณก์„  ๊ณ„์‚ฐ\n",
281    "precision, recall, pr_thresholds = precision_recall_curve(y_test, y_proba)\n",
282    "ap = average_precision_score(y_test, y_proba)\n",
283    "\n",
284    "# PR ๊ณก์„  ์‹œ๊ฐํ™”\n",
285    "plt.figure(figsize=(10, 6))\n",
286    "plt.plot(recall, precision, 'b-', linewidth=2, label=f'PR Curve (AP = {ap:.4f})')\n",
287    "plt.xlabel('Recall', fontsize=12)\n",
288    "plt.ylabel('Precision', fontsize=12)\n",
289    "plt.title('Precision-Recall Curve', fontsize=14, pad=20)\n",
290    "plt.legend(loc='best', fontsize=11)\n",
291    "plt.grid(True, alpha=0.3)\n",
292    "plt.xlim([0.0, 1.0])\n",
293    "plt.ylim([0.0, 1.05])\n",
294    "plt.show()\n",
295    "\n",
296    "print(f\"Average Precision (AP): {ap:.4f}\")\n",
297    "print(\"\\nROC vs PR ๊ณก์„ :\")\n",
298    "print(\"  - ROC: ๋ถˆ๊ท ํ˜• ๋ฐ์ดํ„ฐ์—์„œ๋„ ์•ˆ์ •์ , ์ „๋ฐ˜์ ์ธ ์„ฑ๋Šฅ ํ‰๊ฐ€\")\n",
299    "print(\"  - PR: ๋ถˆ๊ท ํ˜• ๋ฐ์ดํ„ฐ์—์„œ ๋” ๋ฏผ๊ฐ, ์–‘์„ฑ ํด๋ž˜์Šค ์˜ˆ์ธก ์„ฑ๋Šฅ์— ์ง‘์ค‘\")"
300   ]
301  },
302  {
303   "cell_type": "code",
304   "execution_count": null,
305   "metadata": {},
306   "outputs": [],
307   "source": [
308    "# ROC์™€ PR ๊ณก์„  ๋™์‹œ ๋น„๊ต\n",
309    "fig, axes = plt.subplots(1, 2, figsize=(16, 6))\n",
310    "\n",
311    "# ROC Curve\n",
312    "axes[0].plot(fpr, tpr, 'b-', linewidth=2, label=f'ROC (AUC = {roc_auc:.4f})')\n",
313    "axes[0].plot([0, 1], [0, 1], 'r--', linewidth=2)\n",
314    "axes[0].set_xlabel('False Positive Rate', fontsize=12)\n",
315    "axes[0].set_ylabel('True Positive Rate', fontsize=12)\n",
316    "axes[0].set_title('ROC Curve', fontsize=14)\n",
317    "axes[0].legend(loc='lower right', fontsize=11)\n",
318    "axes[0].grid(True, alpha=0.3)\n",
319    "\n",
320    "# PR Curve\n",
321    "axes[1].plot(recall, precision, 'g-', linewidth=2, label=f'PR (AP = {ap:.4f})')\n",
322    "axes[1].set_xlabel('Recall', fontsize=12)\n",
323    "axes[1].set_ylabel('Precision', fontsize=12)\n",
324    "axes[1].set_title('Precision-Recall Curve', fontsize=14)\n",
325    "axes[1].legend(loc='best', fontsize=11)\n",
326    "axes[1].grid(True, alpha=0.3)\n",
327    "\n",
328    "plt.tight_layout()\n",
329    "plt.show()"
330   ]
331  },
332  {
333   "cell_type": "markdown",
334   "metadata": {},
335   "source": [
336    "## 2. ๋‹ค์ค‘ ๋ถ„๋ฅ˜ ํ‰๊ฐ€"
337   ]
338  },
339  {
340   "cell_type": "code",
341   "execution_count": null,
342   "metadata": {},
343   "outputs": [],
344   "source": [
345    "# Iris ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ (3๊ฐœ ํด๋ž˜์Šค)\n",
346    "iris = load_iris()\n",
347    "X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(\n",
348    "    iris.data, iris.target, test_size=0.2, random_state=42\n",
349    ")\n",
350    "\n",
351    "# ๋ชจ๋ธ ํ•™์Šต\n",
352    "model_iris = LogisticRegression(max_iter=1000, random_state=42)\n",
353    "model_iris.fit(X_train_iris, y_train_iris)\n",
354    "y_pred_iris = model_iris.predict(X_test_iris)\n",
355    "\n",
356    "print(\"Iris Multi-class Classification\")\n",
357    "print(\"=\"*50)\n",
358    "print(f\"Classes: {iris.target_names}\")\n",
359    "print(f\"Features: {iris.feature_names}\")"
360   ]
361  },
362  {
363   "cell_type": "code",
364   "execution_count": null,
365   "metadata": {},
366   "outputs": [],
367   "source": [
368    "# ๋‹ค์ค‘ ํด๋ž˜์Šค ํ˜ผ๋™ ํ–‰๋ ฌ\n",
369    "cm_iris = confusion_matrix(y_test_iris, y_pred_iris)\n",
370    "fig, ax = plt.subplots(figsize=(10, 8))\n",
371    "disp = ConfusionMatrixDisplay(confusion_matrix=cm_iris, display_labels=iris.target_names)\n",
372    "disp.plot(ax=ax, cmap='Blues', values_format='d')\n",
373    "plt.title('Multi-class Confusion Matrix - Iris Dataset', fontsize=14, pad=20)\n",
374    "plt.show()"
375   ]
376  },
377  {
378   "cell_type": "code",
379   "execution_count": null,
380   "metadata": {},
381   "outputs": [],
382   "source": [
383    "# ๋‹ค์ค‘ ๋ถ„๋ฅ˜ ์ง€ํ‘œ\n",
384    "print(\"=== Multi-class Classification Metrics ===\")\n",
385    "print(f\"์ •ํ™•๋„: {accuracy_score(y_test_iris, y_pred_iris):.4f}\\n\")\n",
386    "\n",
387    "# F1-score์˜ ๋‹ค์–‘ํ•œ ํ‰๊ท  ๋ฐฉ๋ฒ•\n",
388    "f1_macro = f1_score(y_test_iris, y_pred_iris, average='macro')\n",
389    "f1_weighted = f1_score(y_test_iris, y_pred_iris, average='weighted')\n",
390    "f1_micro = f1_score(y_test_iris, y_pred_iris, average='micro')\n",
391    "\n",
392    "print(f\"F1-Score (macro):    {f1_macro:.4f}  - ๊ฐ ํด๋ž˜์Šค์˜ F1์„ ๋‹จ์ˆœ ํ‰๊ท \")\n",
393    "print(f\"F1-Score (weighted): {f1_weighted:.4f}  - ๊ฐ ํด๋ž˜์Šค์˜ ์ƒ˜ํ”Œ ์ˆ˜๋กœ ๊ฐ€์ค‘ ํ‰๊ท \")\n",
394    "print(f\"F1-Score (micro):    {f1_micro:.4f}  - ์ „์ฒด TP, FP, FN์„ ํ•ฉ์‚ฐํ•˜์—ฌ ๊ณ„์‚ฐ\")"
395   ]
396  },
397  {
398   "cell_type": "code",
399   "execution_count": null,
400   "metadata": {},
401   "outputs": [],
402   "source": [
403    "# ๋ถ„๋ฅ˜ ๋ฆฌํฌํŠธ\n",
404    "report_iris = classification_report(y_test_iris, y_pred_iris, target_names=iris.target_names)\n",
405    "print(\"\\n=== Classification Report - Iris ===\")\n",
406    "print(report_iris)"
407   ]
408  },
409  {
410   "cell_type": "code",
411   "execution_count": null,
412   "metadata": {},
413   "outputs": [],
414   "source": [
415    "# ๋‹ค์ค‘ ํด๋ž˜์Šค ROC ๊ณก์„ \n",
416    "y_test_iris_bin = label_binarize(y_test_iris, classes=[0, 1, 2])\n",
417    "y_proba_iris = model_iris.predict_proba(X_test_iris)\n",
418    "\n",
419    "plt.figure(figsize=(10, 6))\n",
420    "colors = ['blue', 'red', 'green']\n",
421    "\n",
422    "for i, (color, name) in enumerate(zip(colors, iris.target_names)):\n",
423    "    fpr_i, tpr_i, _ = roc_curve(y_test_iris_bin[:, i], y_proba_iris[:, i])\n",
424    "    roc_auc_i = auc(fpr_i, tpr_i)\n",
425    "    plt.plot(fpr_i, tpr_i, color=color, linewidth=2,\n",
426    "             label=f'{name} (AUC = {roc_auc_i:.4f})')\n",
427    "\n",
428    "plt.plot([0, 1], [0, 1], 'k--', linewidth=2)\n",
429    "plt.xlabel('False Positive Rate', fontsize=12)\n",
430    "plt.ylabel('True Positive Rate', fontsize=12)\n",
431    "plt.title('Multi-class ROC Curves - Iris Dataset', fontsize=14, pad=20)\n",
432    "plt.legend(loc='lower right', fontsize=11)\n",
433    "plt.grid(True, alpha=0.3)\n",
434    "plt.show()"
435   ]
436  },
437  {
438   "cell_type": "markdown",
439   "metadata": {},
440   "source": [
441    "## 3. ํšŒ๊ท€ ํ‰๊ฐ€ ์ง€ํ‘œ"
442   ]
443  },
444  {
445   "cell_type": "code",
446   "execution_count": null,
447   "metadata": {},
448   "outputs": [],
449   "source": [
450    "# ๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ\n",
451    "y_true_reg = np.array([3.0, -0.5, 2.0, 7.0, 4.5])\n",
452    "y_pred_reg = np.array([2.5, 0.0, 2.0, 8.0, 4.0])\n",
453    "\n",
454    "# ํšŒ๊ท€ ์ง€ํ‘œ ๊ณ„์‚ฐ\n",
455    "mae = mean_absolute_error(y_true_reg, y_pred_reg)\n",
456    "mse = mean_squared_error(y_true_reg, y_pred_reg)\n",
457    "rmse = np.sqrt(mse)\n",
458    "r2 = r2_score(y_true_reg, y_pred_reg)\n",
459    "\n",
460    "print(\"=== ํšŒ๊ท€ ํ‰๊ฐ€ ์ง€ํ‘œ ===\")\n",
461    "print(f\"MAE (Mean Absolute Error): {mae:.4f}\")\n",
462    "print(f\"  - ํ‰๊ท ์ ์œผ๋กœ ์˜ˆ์ธก์ด ์‹ค์ œ๊ฐ’์—์„œ {mae:.4f} ๋งŒํผ ๋ฒ—์–ด๋‚จ\\n\")\n",
463    "\n",
464    "print(f\"MSE (Mean Squared Error): {mse:.4f}\")\n",
465    "print(f\"  - ํฐ ์˜ค์ฐจ์— ๋” ํฐ ํŒจ๋„ํ‹ฐ\\n\")\n",
466    "\n",
467    "print(f\"RMSE (Root Mean Squared Error): {rmse:.4f}\")\n",
468    "print(f\"  - ํƒ€๊ฒŸ๊ณผ ๊ฐ™์€ ๋‹จ์œ„๋กœ ํ•ด์„ ๊ฐ€๋Šฅ\\n\")\n",
469    "\n",
470    "print(f\"Rยฒ (Coefficient of Determination): {r2:.4f}\")\n",
471    "print(f\"  - 0~1, 1์— ๊ฐ€๊นŒ์šธ์ˆ˜๋ก ์ข‹์Œ\")\n",
472    "print(f\"  - ๋ชจ๋ธ์ด ๋ถ„์‚ฐ์˜ {r2*100:.1f}%๋ฅผ ์„ค๋ช…\")"
473   ]
474  },
475  {
476   "cell_type": "code",
477   "execution_count": null,
478   "metadata": {},
479   "outputs": [],
480   "source": [
481    "# ์ˆ˜๋™ ๊ณ„์‚ฐ์œผ๋กœ ๊ฒ€์ฆ\n",
482    "print(\"\\n=== ์ˆ˜๋™ ๊ณ„์‚ฐ ๊ฒ€์ฆ ===\")\n",
483    "mae_manual = np.mean(np.abs(y_true_reg - y_pred_reg))\n",
484    "mse_manual = np.mean((y_true_reg - y_pred_reg)**2)\n",
485    "rmse_manual = np.sqrt(mse_manual)\n",
486    "r2_manual = 1 - np.sum((y_true_reg - y_pred_reg)**2) / np.sum((y_true_reg - np.mean(y_true_reg))**2)\n",
487    "\n",
488    "print(f\"MAE:  {mae_manual:.4f}\")\n",
489    "print(f\"MSE:  {mse_manual:.4f}\")\n",
490    "print(f\"RMSE: {rmse_manual:.4f}\")\n",
491    "print(f\"Rยฒ:   {r2_manual:.4f}\")"
492   ]
493  },
494  {
495   "cell_type": "code",
496   "execution_count": null,
497   "metadata": {},
498   "outputs": [],
499   "source": [
500    "# ์‹ค์ œ ๋ฐ์ดํ„ฐ์…‹์œผ๋กœ ํšŒ๊ท€ ํ‰๊ฐ€ - Diabetes Dataset\n",
501    "diabetes = load_diabetes()\n",
502    "X_train_diab, X_test_diab, y_train_diab, y_test_diab = train_test_split(\n",
503    "    diabetes.data, diabetes.target, test_size=0.2, random_state=42\n",
504    ")\n",
505    "\n",
506    "# ์„ ํ˜• ํšŒ๊ท€ ๋ชจ๋ธ ํ•™์Šต\n",
507    "model_reg = LinearRegression()\n",
508    "model_reg.fit(X_train_diab, y_train_diab)\n",
509    "y_pred_diab = model_reg.predict(X_test_diab)\n",
510    "\n",
511    "# ํ‰๊ฐ€\n",
512    "mae_diab = mean_absolute_error(y_test_diab, y_pred_diab)\n",
513    "mse_diab = mean_squared_error(y_test_diab, y_pred_diab)\n",
514    "rmse_diab = np.sqrt(mse_diab)\n",
515    "r2_diab = r2_score(y_test_diab, y_pred_diab)\n",
516    "\n",
517    "print(\"Diabetes Regression Results\")\n",
518    "print(\"=\"*50)\n",
519    "print(f\"MAE:  {mae_diab:.4f}\")\n",
520    "print(f\"MSE:  {mse_diab:.4f}\")\n",
521    "print(f\"RMSE: {rmse_diab:.4f}\")\n",
522    "print(f\"Rยฒ:   {r2_diab:.4f}\")\n",
523    "print(f\"\\nํ•ด์„: ๋ชจ๋ธ์ด ํƒ€๊ฒŸ ๋ถ„์‚ฐ์˜ {r2_diab*100:.1f}%๋ฅผ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค.\")"
524   ]
525  },
526  {
527   "cell_type": "code",
528   "execution_count": null,
529   "metadata": {},
530   "outputs": [],
531   "source": [
532    "# ์‹ค์ œ๊ฐ’ vs ์˜ˆ์ธก๊ฐ’ ์‹œ๊ฐํ™”\n",
533    "plt.figure(figsize=(10, 6))\n",
534    "plt.scatter(y_test_diab, y_pred_diab, alpha=0.6, edgecolors='k', s=80)\n",
535    "plt.plot([y_test_diab.min(), y_test_diab.max()], \n",
536    "         [y_test_diab.min(), y_test_diab.max()], \n",
537    "         'r--', linewidth=2, label='Perfect Prediction')\n",
538    "plt.xlabel('์‹ค์ œ๊ฐ’ (Actual)', fontsize=12)\n",
539    "plt.ylabel('์˜ˆ์ธก๊ฐ’ (Predicted)', fontsize=12)\n",
540    "plt.title(f'์‹ค์ œ๊ฐ’ vs ์˜ˆ์ธก๊ฐ’ (Rยฒ = {r2_diab:.4f})', fontsize=14, pad=20)\n",
541    "plt.legend(fontsize=11)\n",
542    "plt.grid(True, alpha=0.3)\n",
543    "plt.show()"
544   ]
545  },
546  {
547   "cell_type": "code",
548   "execution_count": null,
549   "metadata": {},
550   "outputs": [],
551   "source": [
552    "# ์ž”์ฐจ ๋ถ„์„\n",
553    "residuals = y_test_diab - y_pred_diab\n",
554    "\n",
555    "fig, axes = plt.subplots(1, 2, figsize=(16, 6))\n",
556    "\n",
557    "# ์ž”์ฐจ ํ”Œ๋กฏ\n",
558    "axes[0].scatter(y_pred_diab, residuals, alpha=0.6, edgecolors='k', s=80)\n",
559    "axes[0].axhline(y=0, color='r', linestyle='--', linewidth=2)\n",
560    "axes[0].set_xlabel('์˜ˆ์ธก๊ฐ’ (Predicted)', fontsize=12)\n",
561    "axes[0].set_ylabel('์ž”์ฐจ (Residuals)', fontsize=12)\n",
562    "axes[0].set_title('Residual Plot', fontsize=14)\n",
563    "axes[0].grid(True, alpha=0.3)\n",
564    "\n",
565    "# ์ž”์ฐจ ๋ถ„ํฌ\n",
566    "axes[1].hist(residuals, bins=20, edgecolor='black', alpha=0.7)\n",
567    "axes[1].set_xlabel('์ž”์ฐจ (Residuals)', fontsize=12)\n",
568    "axes[1].set_ylabel('๋นˆ๋„ (Frequency)', fontsize=12)\n",
569    "axes[1].set_title('Residuals Distribution', fontsize=14)\n",
570    "axes[1].grid(True, alpha=0.3, axis='y')\n",
571    "\n",
572    "plt.tight_layout()\n",
573    "plt.show()\n",
574    "\n",
575    "print(f\"์ž”์ฐจ ํ‰๊ท : {residuals.mean():.4f} (0์— ๊ฐ€๊นŒ์›Œ์•ผ ํ•จ)\")\n",
576    "print(f\"์ž”์ฐจ ํ‘œ์ค€ํŽธ์ฐจ: {residuals.std():.4f}\")"
577   ]
578  },
579  {
580   "cell_type": "markdown",
581   "metadata": {},
582   "source": [
583    "## 4. ํ•™์Šต ๊ณก์„  (Learning Curve)"
584   ]
585  },
586  {
587   "cell_type": "code",
588   "execution_count": null,
589   "metadata": {},
590   "outputs": [],
591   "source": [
592    "# ํ•™์Šต ๊ณก์„  ๊ณ„์‚ฐ\n",
593    "train_sizes, train_scores, val_scores = learning_curve(\n",
594    "    LogisticRegression(max_iter=10000, random_state=42),\n",
595    "    cancer.data, cancer.target,\n",
596    "    train_sizes=np.linspace(0.1, 1.0, 10),\n",
597    "    cv=5,\n",
598    "    scoring='accuracy',\n",
599    "    n_jobs=-1\n",
600    ")\n",
601    "\n",
602    "# ํ‰๊ท  ๋ฐ ํ‘œ์ค€ํŽธ์ฐจ\n",
603    "train_mean = train_scores.mean(axis=1)\n",
604    "train_std = train_scores.std(axis=1)\n",
605    "val_mean = val_scores.mean(axis=1)\n",
606    "val_std = val_scores.std(axis=1)\n",
607    "\n",
608    "# ํ•™์Šต ๊ณก์„  ์‹œ๊ฐํ™”\n",
609    "plt.figure(figsize=(10, 6))\n",
610    "plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, \n",
611    "                 alpha=0.2, color='blue')\n",
612    "plt.fill_between(train_sizes, val_mean - val_std, val_mean + val_std, \n",
613    "                 alpha=0.2, color='orange')\n",
614    "plt.plot(train_sizes, train_mean, 'o-', color='blue', linewidth=2, \n",
615    "         label='Training Score')\n",
616    "plt.plot(train_sizes, val_mean, 'o-', color='orange', linewidth=2, \n",
617    "         label='Validation Score')\n",
618    "plt.xlabel('Training Set Size', fontsize=12)\n",
619    "plt.ylabel('Accuracy', fontsize=12)\n",
620    "plt.title('Learning Curve - Breast Cancer Classification', fontsize=14, pad=20)\n",
621    "plt.legend(loc='best', fontsize=11)\n",
622    "plt.grid(True, alpha=0.3)\n",
623    "plt.show()\n",
624    "\n",
625    "print(\"ํ•™์Šต ๊ณก์„  ํ•ด์„:\")\n",
626    "print(\"  - ๋‘ ๊ณก์„ ์ด ๋ชจ๋‘ ๋‚ฎ์Œ โ†’ ๊ณผ์†Œ์ ํ•ฉ (๋” ๋ณต์žกํ•œ ๋ชจ๋ธ ํ•„์š”)\")\n",
627    "print(\"  - ํ›ˆ๋ จ ๊ณก์„  ๋†’๊ณ  ๊ฒ€์ฆ ๊ณก์„  ๋‚ฎ์Œ โ†’ ๊ณผ์ ํ•ฉ (์ •๊ทœํ™” ํ•„์š”)\")\n",
628    "print(\"  - ๋‘ ๊ณก์„ ์ด ์ˆ˜๋ ด โ†’ ์ ์ ˆํ•œ ์ ํ•ฉ\")"
629   ]
630  },
631  {
632   "cell_type": "markdown",
633   "metadata": {},
634   "source": [
635    "## 5. ํ‰๊ฐ€ ์ง€ํ‘œ ์„ ํƒ ๊ฐ€์ด๋“œ"
636   ]
637  },
638  {
639   "cell_type": "code",
640   "execution_count": null,
641   "metadata": {},
642   "outputs": [],
643   "source": [
644    "# ์ข…ํ•ฉ ํ‰๊ฐ€ ํ•จ์ˆ˜\n",
645    "def evaluate_classification(y_true, y_pred, y_proba=None):\n",
646    "    \"\"\"๋ถ„๋ฅ˜ ๋ชจ๋ธ ์ข…ํ•ฉ ํ‰๊ฐ€\"\"\"\n",
647    "    print(\"=== ๋ถ„๋ฅ˜ ํ‰๊ฐ€ ๊ฒฐ๊ณผ ===\")\n",
648    "    print(f\"Accuracy:  {accuracy_score(y_true, y_pred):.4f}\")\n",
649    "    print(f\"Precision: {precision_score(y_true, y_pred, average='weighted'):.4f}\")\n",
650    "    print(f\"Recall:    {recall_score(y_true, y_pred, average='weighted'):.4f}\")\n",
651    "    print(f\"F1-Score:  {f1_score(y_true, y_pred, average='weighted'):.4f}\")\n",
652    "    if y_proba is not None and len(np.unique(y_true)) == 2:\n",
653    "        print(f\"ROC-AUC:   {roc_auc_score(y_true, y_proba):.4f}\")\n",
654    "\n",
655    "def evaluate_regression(y_true, y_pred):\n",
656    "    \"\"\"ํšŒ๊ท€ ๋ชจ๋ธ ์ข…ํ•ฉ ํ‰๊ฐ€\"\"\"\n",
657    "    print(\"=== ํšŒ๊ท€ ํ‰๊ฐ€ ๊ฒฐ๊ณผ ===\")\n",
658    "    print(f\"MAE:  {mean_absolute_error(y_true, y_pred):.4f}\")\n",
659    "    print(f\"MSE:  {mean_squared_error(y_true, y_pred):.4f}\")\n",
660    "    print(f\"RMSE: {np.sqrt(mean_squared_error(y_true, y_pred)):.4f}\")\n",
661    "    print(f\"Rยฒ:   {r2_score(y_true, y_pred):.4f}\")\n",
662    "\n",
663    "# ํ…Œ์ŠคํŠธ\n",
664    "print(\"Breast Cancer ๋ชจ๋ธ ํ‰๊ฐ€:\")\n",
665    "evaluate_classification(y_test, y_pred, y_proba)\n",
666    "\n",
667    "print(\"\\nDiabetes ํšŒ๊ท€ ๋ชจ๋ธ ํ‰๊ฐ€:\")\n",
668    "evaluate_regression(y_test_diab, y_pred_diab)"
669   ]
670  },
671  {
672   "cell_type": "code",
673   "execution_count": null,
674   "metadata": {},
675   "outputs": [],
676   "source": [
677    "# ํ‰๊ฐ€ ์ง€ํ‘œ ์š”์•ฝ ํ‘œ\n",
678    "import pandas as pd\n",
679    "\n",
680    "metrics_summary = pd.DataFrame({\n",
681    "    '์ง€ํ‘œ': ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'ROC-AUC', 'MAE', 'MSE', 'Rยฒ'],\n",
682    "    '๋ถ„๋ฅ˜/ํšŒ๊ท€': ['๋ถ„๋ฅ˜', '๋ถ„๋ฅ˜', '๋ถ„๋ฅ˜', '๋ถ„๋ฅ˜', '๋ถ„๋ฅ˜', 'ํšŒ๊ท€', 'ํšŒ๊ท€', 'ํšŒ๊ท€'],\n",
683    "    '๋ฒ”์œ„': ['0-1', '0-1', '0-1', '0-1', '0-1', '0-โˆž', '0-โˆž', '-โˆž-1'],\n",
684    "    '์„ค๋ช…': [\n",
685    "        '์ „์ฒด ์ •๋‹ต ๋น„์œจ',\n",
686    "        '์–‘์„ฑ ์˜ˆ์ธก ์ค‘ ์‹ค์ œ ์–‘์„ฑ',\n",
687    "        '์‹ค์ œ ์–‘์„ฑ ์ค‘ ์–‘์„ฑ ์˜ˆ์ธก',\n",
688    "        'Precision/Recall ์กฐํ™”ํ‰๊ท ',\n",
689    "        '๋ถ„๋ฅ˜๊ธฐ ์ „๋ฐ˜์  ์„ฑ๋Šฅ',\n",
690    "        'ํ‰๊ท  ์ ˆ๋Œ€ ์˜ค์ฐจ',\n",
691    "        'ํ‰๊ท  ์ œ๊ณฑ ์˜ค์ฐจ',\n",
692    "        '์„ค๋ช… ๋ถ„์‚ฐ ๋น„์œจ'\n",
693    "    ]\n",
694    "})\n",
695    "\n",
696    "print(\"\\n=== ํ‰๊ฐ€ ์ง€ํ‘œ ์š”์•ฝ ===\")\n",
697    "print(metrics_summary.to_string(index=False))"
698   ]
699  },
700  {
701   "cell_type": "markdown",
702   "metadata": {},
703   "source": [
704    "## ์š”์•ฝ\n",
705    "\n",
706    "### ๋ถ„๋ฅ˜ ๋ฌธ์ œ ์ง€ํ‘œ ์„ ํƒ\n",
707    "\n",
708    "1. **๊ท ํ˜• ๋ฐ์ดํ„ฐ**: Accuracy, F1-score\n",
709    "2. **๋ถˆ๊ท ํ˜• ๋ฐ์ดํ„ฐ**: Precision, Recall, F1-score, PR-AUC\n",
710    "   - ์–‘์„ฑ ํด๋ž˜์Šค๊ฐ€ ์ค‘์š”: Recall ์ค‘์‹œ (์•” ์ง„๋‹จ, ์‚ฌ๊ธฐ ํƒ์ง€)\n",
711    "   - ์˜คํƒ์ด ๋น„์šฉ: Precision ์ค‘์‹œ (์ŠคํŒธ ํ•„ํ„ฐ)\n",
712    "3. **ํ™•๋ฅ  ์˜ˆ์ธก ํ’ˆ์งˆ**: ROC-AUC, PR-AUC\n",
713    "4. **๋‹ค์ค‘ ๋ถ„๋ฅ˜**: Macro/Weighted/Micro F1\n",
714    "\n",
715    "### ํšŒ๊ท€ ๋ฌธ์ œ ์ง€ํ‘œ ์„ ํƒ\n",
716    "\n",
717    "1. **๊ธฐ๋ณธ**: MSE, RMSE, MAE\n",
718    "2. **์ด์ƒ์น˜ ๋ฏผ๊ฐ๋„**: MAE (robust), MSE (sensitive)\n",
719    "3. **์ƒ๋Œ€์  ์˜ค์ฐจ**: Rยฒ\n",
720    "4. **๋ชจ๋ธ ๋น„๊ต**: Rยฒ (0~1 ๋ฒ”์œ„๋กœ ์ •๊ทœํ™”)"
721   ]
722  }
723 ],
724 "metadata": {
725  "kernelspec": {
726   "display_name": "Python 3",
727   "language": "python",
728   "name": "python3"
729  },
730  "language_info": {
731   "codemirror_mode": {
732    "name": "ipython",
733    "version": 3
734   },
735   "file_extension": ".py",
736   "mimetype": "text/x-python",
737   "name": "python",
738   "nbconvert_exporter": "python",
739   "pygments_lexer": "ipython3",
740   "version": "3.8.0"
741  }
742 },
743 "nbformat": 4,
744 "nbformat_minor": 4
745}