1{
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "# 02. λ‘μ§μ€ν± νκ· (Logistic Regression)\n",
8 "\n",
9 "## νμ΅ λͺ©ν\n",
10 "- λ‘μ§μ€ν± νκ·μ μ리 μ΄ν΄ (μ΄μ§ λΆλ₯)\n",
11 "- μκ·Έλͺ¨μ΄λ ν¨μμ νλ₯ ν΄μ\n",
12 "- λΆλ₯ νκ° μ§ν (μ νλ, μ λ°λ, μ¬νμ¨, F1)"
13 ]
14 },
15 {
16 "cell_type": "code",
17 "execution_count": null,
18 "metadata": {},
19 "outputs": [],
20 "source": [
21 "import numpy as np\n",
22 "import pandas as pd\n",
23 "import matplotlib.pyplot as plt\n",
24 "from sklearn.linear_model import LogisticRegression\n",
25 "from sklearn.model_selection import train_test_split\n",
26 "from sklearn.metrics import (accuracy_score, precision_score, recall_score, \n",
27 " f1_score, confusion_matrix, classification_report,\n",
28 " roc_curve, roc_auc_score)\n",
29 "from sklearn.datasets import make_classification, load_iris\n",
30 "import seaborn as sns\n",
31 "\n",
32 "plt.rcParams['font.family'] = 'DejaVu Sans'"
33 ]
34 },
35 {
36 "cell_type": "markdown",
37 "metadata": {},
38 "source": [
39 "## 1. μκ·Έλͺ¨μ΄λ ν¨μ μ΄ν΄"
40 ]
41 },
42 {
43 "cell_type": "code",
44 "execution_count": null,
45 "metadata": {},
46 "outputs": [],
47 "source": [
48 "def sigmoid(z):\n",
49 " return 1 / (1 + np.exp(-z))\n",
50 "\n",
51 "# μκ·Έλͺ¨μ΄λ ν¨μ μκ°ν\n",
52 "z = np.linspace(-10, 10, 100)\n",
53 "plt.figure(figsize=(10, 5))\n",
54 "plt.plot(z, sigmoid(z), linewidth=2)\n",
55 "plt.axhline(y=0.5, color='r', linestyle='--', alpha=0.5)\n",
56 "plt.axvline(x=0, color='gray', linestyle='--', alpha=0.5)\n",
57 "plt.xlabel('z')\n",
58 "plt.ylabel('Ο(z)')\n",
59 "plt.title('Sigmoid Function: Ο(z) = 1 / (1 + e^(-z))')\n",
60 "plt.grid(True, alpha=0.3)\n",
61 "plt.show()"
62 ]
63 },
64 {
65 "cell_type": "markdown",
66 "metadata": {},
67 "source": [
68 "## 2. μ΄μ§ λΆλ₯ λ°μ΄ν° μμ±"
69 ]
70 },
71 {
72 "cell_type": "code",
73 "execution_count": null,
74 "metadata": {},
75 "outputs": [],
76 "source": [
77 "# μΈκ³΅ λ°μ΄ν° μμ±\n",
78 "X, y = make_classification(\n",
79 " n_samples=500, \n",
80 " n_features=2,\n",
81 " n_redundant=0,\n",
82 " n_informative=2,\n",
83 " n_clusters_per_class=1,\n",
84 " random_state=42\n",
85 ")\n",
86 "\n",
87 "print(f\"X shape: {X.shape}\")\n",
88 "print(f\"y classes: {np.unique(y)}\")\n",
89 "print(f\"Class distribution: {np.bincount(y)}\")"
90 ]
91 },
92 {
93 "cell_type": "code",
94 "execution_count": null,
95 "metadata": {},
96 "outputs": [],
97 "source": [
98 "# λ°μ΄ν° μκ°ν\n",
99 "plt.figure(figsize=(10, 6))\n",
100 "plt.scatter(X[y==0, 0], X[y==0, 1], c='blue', label='Class 0', alpha=0.6, edgecolors='black')\n",
101 "plt.scatter(X[y==1, 0], X[y==1, 1], c='red', label='Class 1', alpha=0.6, edgecolors='black')\n",
102 "plt.xlabel('Feature 1')\n",
103 "plt.ylabel('Feature 2')\n",
104 "plt.title('Binary Classification Data')\n",
105 "plt.legend()\n",
106 "plt.grid(True, alpha=0.3)\n",
107 "plt.show()"
108 ]
109 },
110 {
111 "cell_type": "markdown",
112 "metadata": {},
113 "source": [
114 "## 3. λͺ¨λΈ νμ΅"
115 ]
116 },
117 {
118 "cell_type": "code",
119 "execution_count": null,
120 "metadata": {},
121 "outputs": [],
122 "source": [
123 "# λ°μ΄ν° λΆν \n",
124 "X_train, X_test, y_train, y_test = train_test_split(\n",
125 " X, y, test_size=0.2, random_state=42\n",
126 ")\n",
127 "\n",
128 "# λ‘μ§μ€ν± νκ· λͺ¨λΈ\n",
129 "model = LogisticRegression(random_state=42)\n",
130 "model.fit(X_train, y_train)\n",
131 "\n",
132 "print(f\"Coefficients: {model.coef_}\")\n",
133 "print(f\"Intercept: {model.intercept_}\")"
134 ]
135 },
136 {
137 "cell_type": "markdown",
138 "metadata": {},
139 "source": [
140 "## 4. μμΈ‘ λ° νκ°"
141 ]
142 },
143 {
144 "cell_type": "code",
145 "execution_count": null,
146 "metadata": {},
147 "outputs": [],
148 "source": [
149 "# μμΈ‘\n",
150 "y_pred = model.predict(X_test)\n",
151 "y_prob = model.predict_proba(X_test)[:, 1] # ν΄λμ€ 1μ νλ₯ \n",
152 "\n",
153 "# νκ° μ§ν\n",
154 "print(\"=== Classification Metrics ===\")\n",
155 "print(f\"Accuracy: {accuracy_score(y_test, y_pred):.4f}\")\n",
156 "print(f\"Precision: {precision_score(y_test, y_pred):.4f}\")\n",
157 "print(f\"Recall: {recall_score(y_test, y_pred):.4f}\")\n",
158 "print(f\"F1 Score: {f1_score(y_test, y_pred):.4f}\")\n",
159 "print(f\"ROC-AUC: {roc_auc_score(y_test, y_prob):.4f}\")"
160 ]
161 },
162 {
163 "cell_type": "code",
164 "execution_count": null,
165 "metadata": {},
166 "outputs": [],
167 "source": [
168 "# Classification Report\n",
169 "print(\"\\n=== Classification Report ===\")\n",
170 "print(classification_report(y_test, y_pred))"
171 ]
172 },
173 {
174 "cell_type": "code",
175 "execution_count": null,
176 "metadata": {},
177 "outputs": [],
178 "source": [
179 "# νΌλ νλ ¬ μκ°ν\n",
180 "cm = confusion_matrix(y_test, y_pred)\n",
181 "\n",
182 "plt.figure(figsize=(8, 6))\n",
183 "sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', \n",
184 " xticklabels=['Predicted 0', 'Predicted 1'],\n",
185 " yticklabels=['Actual 0', 'Actual 1'])\n",
186 "plt.title('Confusion Matrix')\n",
187 "plt.ylabel('Actual')\n",
188 "plt.xlabel('Predicted')\n",
189 "plt.show()\n",
190 "\n",
191 "print(f\"\\nTrue Negatives: {cm[0,0]}\")\n",
192 "print(f\"False Positives: {cm[0,1]}\")\n",
193 "print(f\"False Negatives: {cm[1,0]}\")\n",
194 "print(f\"True Positives: {cm[1,1]}\")"
195 ]
196 },
197 {
198 "cell_type": "markdown",
199 "metadata": {},
200 "source": [
201 "## 5. ROC 곑μ "
202 ]
203 },
204 {
205 "cell_type": "code",
206 "execution_count": null,
207 "metadata": {},
208 "outputs": [],
209 "source": [
210 "# ROC 곑μ \n",
211 "fpr, tpr, thresholds = roc_curve(y_test, y_prob)\n",
212 "auc = roc_auc_score(y_test, y_prob)\n",
213 "\n",
214 "plt.figure(figsize=(8, 6))\n",
215 "plt.plot(fpr, tpr, linewidth=2, label=f'ROC Curve (AUC = {auc:.3f})')\n",
216 "plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')\n",
217 "plt.xlabel('False Positive Rate')\n",
218 "plt.ylabel('True Positive Rate')\n",
219 "plt.title('ROC Curve')\n",
220 "plt.legend()\n",
221 "plt.grid(True, alpha=0.3)\n",
222 "plt.show()"
223 ]
224 },
225 {
226 "cell_type": "markdown",
227 "metadata": {},
228 "source": [
229 "## 6. κ²°μ κ²½κ³ μκ°ν"
230 ]
231 },
232 {
233 "cell_type": "code",
234 "execution_count": null,
235 "metadata": {},
236 "outputs": [],
237 "source": [
238 "def plot_decision_boundary(model, X, y, title='Decision Boundary'):\n",
239 " x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n",
240 " y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n",
241 " xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),\n",
242 " np.linspace(y_min, y_max, 200))\n",
243 " \n",
244 " Z = model.predict(np.c_[xx.ravel(), yy.ravel()])\n",
245 " Z = Z.reshape(xx.shape)\n",
246 " \n",
247 " plt.figure(figsize=(10, 6))\n",
248 " plt.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlBu')\n",
249 " plt.scatter(X[y==0, 0], X[y==0, 1], c='blue', label='Class 0', edgecolors='black')\n",
250 " plt.scatter(X[y==1, 0], X[y==1, 1], c='red', label='Class 1', edgecolors='black')\n",
251 " plt.xlabel('Feature 1')\n",
252 " plt.ylabel('Feature 2')\n",
253 " plt.title(title)\n",
254 " plt.legend()\n",
255 " plt.show()\n",
256 "\n",
257 "plot_decision_boundary(model, X_test, y_test, 'Logistic Regression Decision Boundary')"
258 ]
259 },
260 {
261 "cell_type": "markdown",
262 "metadata": {},
263 "source": [
264 "## 7. λ€μ€ ν΄λμ€ λΆλ₯ (Iris Dataset)"
265 ]
266 },
267 {
268 "cell_type": "code",
269 "execution_count": null,
270 "metadata": {},
271 "outputs": [],
272 "source": [
273 "# Iris λ°μ΄ν° λ‘λ\n",
274 "iris = load_iris()\n",
275 "X_iris = iris.data\n",
276 "y_iris = iris.target\n",
277 "\n",
278 "print(f\"Features: {iris.feature_names}\")\n",
279 "print(f\"Classes: {iris.target_names}\")\n",
280 "print(f\"Shape: {X_iris.shape}\")"
281 ]
282 },
283 {
284 "cell_type": "code",
285 "execution_count": null,
286 "metadata": {},
287 "outputs": [],
288 "source": [
289 "# λ€μ€ ν΄λμ€ λ‘μ§μ€ν± νκ·\n",
290 "X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(\n",
291 " X_iris, y_iris, test_size=0.3, random_state=42\n",
292 ")\n",
293 "\n",
294 "model_multi = LogisticRegression(multi_class='multinomial', max_iter=200, random_state=42)\n",
295 "model_multi.fit(X_train_i, y_train_i)\n",
296 "\n",
297 "y_pred_i = model_multi.predict(X_test_i)\n",
298 "print(f\"Accuracy: {accuracy_score(y_test_i, y_pred_i):.4f}\")\n",
299 "print(f\"\\nClassification Report:\")\n",
300 "print(classification_report(y_test_i, y_pred_i, target_names=iris.target_names))"
301 ]
302 },
303 {
304 "cell_type": "code",
305 "execution_count": null,
306 "metadata": {},
307 "outputs": [],
308 "source": [
309 "# λ€μ€ ν΄λμ€ νΌλ νλ ¬\n",
310 "cm_multi = confusion_matrix(y_test_i, y_pred_i)\n",
311 "\n",
312 "plt.figure(figsize=(8, 6))\n",
313 "sns.heatmap(cm_multi, annot=True, fmt='d', cmap='Blues',\n",
314 " xticklabels=iris.target_names,\n",
315 " yticklabels=iris.target_names)\n",
316 "plt.title('Confusion Matrix - Iris Dataset')\n",
317 "plt.ylabel('Actual')\n",
318 "plt.xlabel('Predicted')\n",
319 "plt.show()"
320 ]
321 },
322 {
323 "cell_type": "markdown",
324 "metadata": {},
325 "source": [
326 "## μ 리\n",
327 "\n",
328 "### ν΅μ¬ κ°λ
\n",
329 "- **λ‘μ§μ€ν± νκ·**: μκ·Έλͺ¨μ΄λ ν¨μλ‘ νλ₯ μΆλ ₯ (0~1)\n",
330 "- **κ²°μ κ²½κ³**: P(y=1) = 0.5μΈ μ§μ \n",
331 "- **νκ° μ§ν**:\n",
332 " - Accuracy: μ 체 μ νλ\n",
333 " - Precision: μμ± μμΈ‘ μ€ μ€μ μμ± λΉμ¨\n",
334 " - Recall: μ€μ μμ± μ€ μμΈ‘ μ±κ³΅ λΉμ¨\n",
335 " - F1 Score: Precisionκ³Ό Recallμ μ‘°ν νκ· \n",
336 "\n",
337 "### λ€μ λ¨κ³\n",
338 "- κ·μ (L1/L2 Regularization)\n",
339 "- μκ³κ° μ‘°μ \n",
340 "- λΆκ· ν λ°μ΄ν° μ²λ¦¬"
341 ]
342 }
343 ],
344 "metadata": {
345 "kernelspec": {
346 "display_name": "Python 3",
347 "language": "python",
348 "name": "python3"
349 },
350 "language_info": {
351 "name": "python",
352 "version": "3.10.0"
353 }
354 },
355 "nbformat": 4,
356 "nbformat_minor": 4
357}