1"""
204. ํ์ต ๊ธฐ๋ฒ - NumPy ๋ฒ์
3
4๋ค์ํ ์ต์ ํ ๊ธฐ๋ฒ๊ณผ ์ ๊ทํ๋ฅผ NumPy๋ก ์ง์ ๊ตฌํํฉ๋๋ค.
5PyTorch ๋ฒ์ (examples/pytorch/04_training_techniques.py)๊ณผ ๋น๊ตํด ๋ณด์ธ์.
6
7์ด ํ์ผ์ด ๋ง์ง๋ง NumPy ๊ตฌํ์
๋๋ค!
8CNN๋ถํฐ๋ PyTorch๋ง ์ฌ์ฉํฉ๋๋ค.
9"""
10
11import numpy as np
12import matplotlib.pyplot as plt
13
14print("=" * 60)
15print("NumPy ํ์ต ๊ธฐ๋ฒ (from scratch)")
16print("=" * 60)
17
18
19# ============================================
20# 0. ๊ธฐ๋ณธ ํจ์๋ค
21# ============================================
22def sigmoid(x):
23 return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
24
25def sigmoid_derivative(x):
26 s = sigmoid(x)
27 return s * (1 - s)
28
29def relu(x):
30 return np.maximum(0, x)
31
32def relu_derivative(x):
33 return (x > 0).astype(float)
34
35
36# ============================================
37# 1. ์ตํฐ๋ง์ด์ ๊ตฌํ
38# ============================================
39print("\n[1] ์ตํฐ๋ง์ด์ ๊ตฌํ")
40print("-" * 40)
41
42class SGD:
43 """๊ธฐ๋ณธ ํ๋ฅ ์ ๊ฒฝ์ฌ ํ๊ฐ๋ฒ"""
44 def __init__(self, lr=0.01):
45 self.lr = lr
46
47 def update(self, params, grads):
48 for key in params:
49 params[key] -= self.lr * grads[key]
50
51class SGDMomentum:
52 """๋ชจ๋ฉํ
์ ์ฌ์ฉํ SGD"""
53 def __init__(self, lr=0.01, momentum=0.9):
54 self.lr = lr
55 self.momentum = momentum
56 self.v = None
57
58 def update(self, params, grads):
59 if self.v is None:
60 self.v = {key: np.zeros_like(val) for key, val in params.items()}
61
62 for key in params:
63 self.v[key] = self.momentum * self.v[key] + grads[key]
64 params[key] -= self.lr * self.v[key]
65
66class Adam:
67 """Adam ์ตํฐ๋ง์ด์ """
68 def __init__(self, lr=0.001, beta1=0.9, beta2=0.999, eps=1e-8):
69 self.lr = lr
70 self.beta1 = beta1
71 self.beta2 = beta2
72 self.eps = eps
73 self.m = None
74 self.v = None
75 self.t = 0
76
77 def update(self, params, grads):
78 if self.m is None:
79 self.m = {key: np.zeros_like(val) for key, val in params.items()}
80 self.v = {key: np.zeros_like(val) for key, val in params.items()}
81
82 self.t += 1
83
84 for key in params:
85 # 1์ฐจ ๋ชจ๋ฉํธ (ํ๊ท )
86 self.m[key] = self.beta1 * self.m[key] + (1 - self.beta1) * grads[key]
87 # 2์ฐจ ๋ชจ๋ฉํธ (๋ถ์ฐ)
88 self.v[key] = self.beta2 * self.v[key] + (1 - self.beta2) * (grads[key] ** 2)
89
90 # ํธํฅ ๋ณด์
91 m_hat = self.m[key] / (1 - self.beta1 ** self.t)
92 v_hat = self.v[key] / (1 - self.beta2 ** self.t)
93
94 # ์
๋ฐ์ดํธ
95 params[key] -= self.lr * m_hat / (np.sqrt(v_hat) + self.eps)
96
97print("SGD, SGDMomentum, Adam ํด๋์ค ๊ตฌํ ์๋ฃ")
98
99
100# ============================================
101# 2. ํ์ต๋ฅ ์ค์ผ์ค๋ฌ
102# ============================================
103print("\n[2] ํ์ต๋ฅ ์ค์ผ์ค๋ฌ")
104print("-" * 40)
105
106class StepLR:
107 """Step Decay ์ค์ผ์ค๋ฌ"""
108 def __init__(self, initial_lr, step_size=30, gamma=0.1):
109 self.initial_lr = initial_lr
110 self.step_size = step_size
111 self.gamma = gamma
112
113 def get_lr(self, epoch):
114 return self.initial_lr * (self.gamma ** (epoch // self.step_size))
115
116class ExponentialLR:
117 """์ง์ ๊ฐ์ ์ค์ผ์ค๋ฌ"""
118 def __init__(self, initial_lr, gamma=0.95):
119 self.initial_lr = initial_lr
120 self.gamma = gamma
121
122 def get_lr(self, epoch):
123 return self.initial_lr * (self.gamma ** epoch)
124
125class CosineAnnealingLR:
126 """์ฝ์ฌ์ธ ์ด๋๋ง ์ค์ผ์ค๋ฌ"""
127 def __init__(self, initial_lr, T_max, eta_min=0):
128 self.initial_lr = initial_lr
129 self.T_max = T_max
130 self.eta_min = eta_min
131
132 def get_lr(self, epoch):
133 return self.eta_min + (self.initial_lr - self.eta_min) * \
134 (1 + np.cos(np.pi * epoch / self.T_max)) / 2
135
136# ์๊ฐํ
137epochs = np.arange(100)
138schedulers = {
139 'StepLR': StepLR(1.0, step_size=20, gamma=0.5),
140 'ExponentialLR': ExponentialLR(1.0, gamma=0.95),
141 'CosineAnnealingLR': CosineAnnealingLR(1.0, T_max=50),
142}
143
144plt.figure(figsize=(10, 5))
145for name, scheduler in schedulers.items():
146 lrs = [scheduler.get_lr(e) for e in epochs]
147 plt.plot(lrs, label=name)
148 print(f"{name}: ์์={lrs[0]:.4f}, ๋={lrs[-1]:.4f}")
149
150plt.xlabel('Epoch')
151plt.ylabel('Learning Rate')
152plt.title('NumPy Learning Rate Schedulers')
153plt.legend()
154plt.grid(True, alpha=0.3)
155plt.savefig('numpy_lr_schedulers.png', dpi=100)
156plt.close()
157print("๊ทธ๋ํ ์ ์ฅ: numpy_lr_schedulers.png")
158
159
160# ============================================
161# 3. Dropout ๊ตฌํ
162# ============================================
163print("\n[3] Dropout")
164print("-" * 40)
165
166def dropout(x, p=0.5, training=True):
167 """
168 Dropout ๊ตฌํ
169
170 Args:
171 x: ์
๋ ฅ
172 p: ๋๋กญํ ํ๋ฅ
173 training: ํ๋ จ ๋ชจ๋ ์ฌ๋ถ
174 """
175 if not training or p == 0:
176 return x
177
178 # ๋ง์คํฌ ์์ฑ (1-p ํ๋ฅ ๋ก 1)
179 mask = (np.random.rand(*x.shape) > p).astype(float)
180
181 # ์ญ ๋๋กญ์์ (inverted dropout): ์ค์ผ์ผ ๋ณด์
182 return x * mask / (1 - p)
183
184# ํ
์คํธ
185np.random.seed(42)
186x = np.ones((1, 10))
187
188print("์
๋ ฅ:", x)
189print("ํ๋ จ ๋ชจ๋ (p=0.5):")
190for i in range(3):
191 out = dropout(x.copy(), p=0.5, training=True)
192 active = np.sum(out != 0)
193 print(f" ์๋ {i+1}: ํ์ฑ ๋ด๋ฐ = {active}/10, ์ถ๋ ฅ = {out[0][:5]}...")
194
195print("ํ๊ฐ ๋ชจ๋:")
196out = dropout(x.copy(), p=0.5, training=False)
197print(f" ์ถ๋ ฅ = {out[0][:5]}...")
198
199
200# ============================================
201# 4. Batch Normalization ๊ตฌํ
202# ============================================
203print("\n[4] Batch Normalization")
204print("-" * 40)
205
206class BatchNorm:
207 """๋ฐฐ์น ์ ๊ทํ ๊ตฌํ"""
208 def __init__(self, num_features, eps=1e-5, momentum=0.1):
209 self.eps = eps
210 self.momentum = momentum
211
212 # ํ์ต ๊ฐ๋ฅํ ํ๋ผ๋ฏธํฐ
213 self.gamma = np.ones(num_features)
214 self.beta = np.zeros(num_features)
215
216 # ์ด๋ ํ๊ท (์ถ๋ก ์ฉ)
217 self.running_mean = np.zeros(num_features)
218 self.running_var = np.ones(num_features)
219
220 # ์ญ์ ํ์ฉ ์บ์
221 self.cache = None
222
223 def forward(self, x, training=True):
224 if training:
225 mean = np.mean(x, axis=0)
226 var = np.var(x, axis=0)
227
228 # ์ด๋ ํ๊ท ์
๋ฐ์ดํธ
229 self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean
230 self.running_var = (1 - self.momentum) * self.running_var + self.momentum * var
231 else:
232 mean = self.running_mean
233 var = self.running_var
234
235 # ์ ๊ทํ
236 x_norm = (x - mean) / np.sqrt(var + self.eps)
237
238 # ์ค์ผ์ผ ๋ฐ ์ํํธ
239 out = self.gamma * x_norm + self.beta
240
241 # ์ญ์ ํ์ฉ ์ ์ฅ
242 self.cache = (x, x_norm, mean, var)
243
244 return out
245
246 def backward(self, dout):
247 x, x_norm, mean, var = self.cache
248 N = x.shape[0]
249
250 # ํ๋ผ๋ฏธํฐ ๊ธฐ์ธ๊ธฐ
251 dgamma = np.sum(dout * x_norm, axis=0)
252 dbeta = np.sum(dout, axis=0)
253
254 # ์
๋ ฅ ๊ธฐ์ธ๊ธฐ
255 dx_norm = dout * self.gamma
256 dvar = np.sum(dx_norm * (x - mean) * (-0.5) * (var + self.eps)**(-1.5), axis=0)
257 dmean = np.sum(dx_norm * (-1 / np.sqrt(var + self.eps)), axis=0) + \
258 dvar * np.mean(-2 * (x - mean), axis=0)
259 dx = dx_norm / np.sqrt(var + self.eps) + dvar * 2 * (x - mean) / N + dmean / N
260
261 return dx, dgamma, dbeta
262
263# ํ
์คํธ
264np.random.seed(42)
265bn = BatchNorm(num_features=4)
266x_batch = np.random.randn(32, 4) * 5 + 10 # ํ๊ท 10, ํ์คํธ์ฐจ 5
267
268print(f"์
๋ ฅ ํต๊ณ: mean={x_batch.mean(axis=0).round(2)}, std={x_batch.std(axis=0).round(2)}")
269
270out = bn.forward(x_batch, training=True)
271print(f"์ถ๋ ฅ ํต๊ณ: mean={out.mean(axis=0).round(4)}, std={out.std(axis=0).round(4)}")
272
273
274# ============================================
275# 5. L2 ์ ๊ทํ (Weight Decay)
276# ============================================
277print("\n[5] Weight Decay (L2 ์ ๊ทํ)")
278print("-" * 40)
279
280def compute_loss_with_l2(y_pred, y_true, weights, l2_lambda=0.01):
281 """L2 ์ ๊ทํ๊ฐ ํฌํจ๋ ์์ค ๊ณ์ฐ"""
282 # ๊ธฐ๋ณธ ์์ค (MSE)
283 data_loss = np.mean((y_pred - y_true) ** 2)
284
285 # L2 ์ ๊ทํ ํญ
286 l2_loss = 0
287 for W in weights:
288 l2_loss += np.sum(W ** 2)
289 l2_loss *= l2_lambda / 2
290
291 return data_loss + l2_loss, data_loss, l2_loss
292
293# ์์
294W1 = np.random.randn(10, 5)
295W2 = np.random.randn(5, 1)
296y_pred = np.random.randn(32, 1)
297y_true = np.random.randn(32, 1)
298
299for l2_lambda in [0, 0.01, 0.1]:
300 total, data, reg = compute_loss_with_l2(y_pred, y_true, [W1, W2], l2_lambda)
301 print(f"ฮป={l2_lambda}: ์ด ์์ค={total:.4f} (๋ฐ์ดํฐ={data:.4f} + ์ ๊ทํ={reg:.4f})")
302
303
304# ============================================
305# 6. ์ตํฐ๋ง์ด์ ๋น๊ต ์คํ
306# ============================================
307print("\n[6] ์ตํฐ๋ง์ด์ ๋น๊ต")
308print("-" * 40)
309
310class MLPWithOptimizer:
311 """์ตํฐ๋ง์ด์ ํ
์คํธ์ฉ MLP"""
312 def __init__(self, optimizer):
313 np.random.seed(42)
314 self.params = {
315 'W1': np.random.randn(2, 16) * 0.5,
316 'b1': np.zeros(16),
317 'W2': np.random.randn(16, 1) * 0.5,
318 'b2': np.zeros(1),
319 }
320 self.optimizer = optimizer
321
322 def forward(self, X):
323 self.z1 = X @ self.params['W1'] + self.params['b1']
324 self.a1 = relu(self.z1)
325 self.z2 = self.a1 @ self.params['W2'] + self.params['b2']
326 self.a2 = sigmoid(self.z2)
327 return self.a2
328
329 def backward(self, X, y):
330 m = X.shape[0]
331
332 dL_da2 = 2 * (self.a2 - y) / m
333 dL_dz2 = dL_da2 * sigmoid_derivative(self.z2)
334
335 grads = {
336 'W2': self.a1.T @ dL_dz2,
337 'b2': np.sum(dL_dz2, axis=0),
338 }
339
340 dL_da1 = dL_dz2 @ self.params['W2'].T
341 dL_dz1 = dL_da1 * relu_derivative(self.z1)
342
343 grads['W1'] = X.T @ dL_dz1
344 grads['b1'] = np.sum(dL_dz1, axis=0)
345
346 return grads
347
348 def train_step(self, X, y):
349 y_pred = self.forward(X)
350 loss = np.mean((y_pred - y) ** 2)
351 grads = self.backward(X, y)
352 self.optimizer.update(self.params, grads)
353 return loss
354
355# XOR ๋ฐ์ดํฐ
356X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float64)
357y = np.array([[0], [1], [1], [0]], dtype=np.float64)
358
359# ์ตํฐ๋ง์ด์ ๋น๊ต
360optimizers = {
361 'SGD': SGD(lr=0.5),
362 'SGD+Momentum': SGDMomentum(lr=0.5, momentum=0.9),
363 'Adam': Adam(lr=0.05),
364}
365
366results = {}
367for name, opt in optimizers.items():
368 model = MLPWithOptimizer(opt)
369 losses = []
370 for epoch in range(500):
371 loss = model.train_step(X, y)
372 losses.append(loss)
373 results[name] = losses
374 print(f"{name}: ์ต์ข
์์ค = {losses[-1]:.6f}")
375
376# ์๊ฐํ
377plt.figure(figsize=(10, 5))
378for name, losses in results.items():
379 plt.plot(losses, label=name)
380plt.xlabel('Epoch')
381plt.ylabel('Loss')
382plt.title('NumPy Optimizer Comparison')
383plt.legend()
384plt.yscale('log')
385plt.grid(True, alpha=0.3)
386plt.savefig('numpy_optimizer_comparison.png', dpi=100)
387plt.close()
388print("๊ทธ๋ํ ์ ์ฅ: numpy_optimizer_comparison.png")
389
390
391# ============================================
392# 7. ์ ์ฒด ๊ธฐ๋ฒ ์ ์ฉ
393# ============================================
394print("\n[7] ์ ์ฒด ๊ธฐ๋ฒ ์ ์ฉ")
395print("-" * 40)
396
397class FullMLP:
398 """๋ชจ๋ ๊ธฐ๋ฒ์ด ์ ์ฉ๋ MLP"""
399 def __init__(self, dropout_p=0.3, l2_lambda=0.01):
400 np.random.seed(42)
401 self.params = {
402 'W1': np.random.randn(2, 32) * np.sqrt(2/2),
403 'b1': np.zeros(32),
404 'W2': np.random.randn(32, 16) * np.sqrt(2/32),
405 'b2': np.zeros(16),
406 'W3': np.random.randn(16, 1) * np.sqrt(2/16),
407 'b3': np.zeros(1),
408 }
409 self.bn1 = BatchNorm(32)
410 self.bn2 = BatchNorm(16)
411 self.dropout_p = dropout_p
412 self.l2_lambda = l2_lambda
413 self.training = True
414
415 def forward(self, X):
416 # ์ฒซ ๋ฒ์งธ ์ธต
417 self.z1 = X @ self.params['W1'] + self.params['b1']
418 self.bn1_out = self.bn1.forward(self.z1, self.training)
419 self.a1 = relu(self.bn1_out)
420 self.d1 = dropout(self.a1, self.dropout_p, self.training)
421
422 # ๋ ๋ฒ์งธ ์ธต
423 self.z2 = self.d1 @ self.params['W2'] + self.params['b2']
424 self.bn2_out = self.bn2.forward(self.z2, self.training)
425 self.a2 = relu(self.bn2_out)
426 self.d2 = dropout(self.a2, self.dropout_p, self.training)
427
428 # ์ถ๋ ฅ์ธต
429 self.z3 = self.d2 @ self.params['W3'] + self.params['b3']
430 self.a3 = sigmoid(self.z3)
431
432 return self.a3
433
434 def loss(self, y_pred, y_true):
435 # MSE ์์ค
436 data_loss = np.mean((y_pred - y_true) ** 2)
437
438 # L2 ์ ๊ทํ
439 l2_loss = 0
440 for key in ['W1', 'W2', 'W3']:
441 l2_loss += np.sum(self.params[key] ** 2)
442 l2_loss *= self.l2_lambda / 2
443
444 return data_loss + l2_loss
445
446# ๋ ๋ณต์กํ ๋ฐ์ดํฐ ์์ฑ
447np.random.seed(42)
448n_samples = 200
449theta = np.random.uniform(0, 2*np.pi, n_samples)
450r = np.random.uniform(0, 1, n_samples)
451X_train = np.column_stack([r * np.cos(theta), r * np.sin(theta)])
452y_train = (r > 0.5).astype(np.float64).reshape(-1, 1)
453
454# ํ์ต
455model = FullMLP(dropout_p=0.3, l2_lambda=0.001)
456optimizer = Adam(lr=0.01)
457
458losses = []
459for epoch in range(300):
460 # ์์ ํ
461 y_pred = model.forward(X_train)
462 loss = model.loss(y_pred, y_train)
463 losses.append(loss)
464
465 if (epoch + 1) % 100 == 0:
466 print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
467
468plt.figure(figsize=(10, 5))
469plt.plot(losses)
470plt.xlabel('Epoch')
471plt.ylabel('Loss')
472plt.title('Full MLP Training (with BN, Dropout, L2)')
473plt.grid(True, alpha=0.3)
474plt.savefig('numpy_full_training.png', dpi=100)
475plt.close()
476print("๊ทธ๋ํ ์ ์ฅ: numpy_full_training.png")
477
478
479# ============================================
480# NumPy vs PyTorch ๋น๊ต
481# ============================================
482print("\n" + "=" * 60)
483print("NumPy vs PyTorch ๋น๊ต")
484print("=" * 60)
485
486comparison = """
487| ํญ๋ชฉ | NumPy (์ด ์ฝ๋) | PyTorch |
488|----------------|---------------------------|----------------------------|
489| Optimizer | ํด๋์ค๋ก ์ง์ ๊ตฌํ | torch.optim.Adam ๋ฑ |
490| Scheduler | ํจ์๋ก ์ง์ ๊ณ์ฐ | lr_scheduler ๋ชจ๋ |
491| Dropout | ๋ง์คํฌ ร ์ค์ผ์ผ ์ง์ ๊ณ์ฐ | nn.Dropout |
492| BatchNorm | ํ๊ท /๋ถ์ฐ ์ง์ ๊ณ์ฐ | nn.BatchNorm1d |
493| Weight Decay | ์์ค์ ์ง์ ์ถ๊ฐ | optimizer์ weight_decay |
494
495NumPy ๊ตฌํ์ ๊ฐ์น:
4961. Adam์ m, v ์
๋ฐ์ดํธ ์๋ฆฌ ์ดํด
4972. BatchNorm์ ์ด๋ ํ๊ท ์๋ ๋ฐฉ์
4983. Dropout์ ์ญ๋๋กญ์์(inverted) ์ดํด
4994. ์ ๊ทํ ํญ์ด ์์ค์ ๋ฏธ์น๋ ์ํฅ
500
501์ดํ CNN๋ถํฐ๋ PyTorch๋ง ์ฌ์ฉ:
502- ํฉ์ฑ๊ณฑ ์ฐ์ฐ์ NumPy ๊ตฌํ์ ๋นํจ์จ์
503- GPU ๊ฐ์์ด ํ์์
504- ๋ณต์กํ ์ํคํ
์ฒ ๊ด๋ฆฌ ์ด๋ ค์
505"""
506print(comparison)
507
508
509# ============================================
510# ์ ๋ฆฌ
511# ============================================
512print("\n" + "=" * 60)
513print("NumPy ํ์ต ๊ธฐ๋ฒ ์ ๋ฆฌ")
514print("=" * 60)
515
516summary = """
517๊ตฌํํ ๊ฒ๋ค:
5181. SGD, Momentum, Adam ์ตํฐ๋ง์ด์
5192. StepLR, ExponentialLR, CosineAnnealingLR ์ค์ผ์ค๋ฌ
5203. Dropout (์ญ๋๋กญ์์ ํฌํจ)
5214. Batch Normalization (์์ ํ + ์ญ์ ํ)
5225. L2 ์ ๊ทํ (Weight Decay)
523
524ํต์ฌ ํฌ์ธํธ:
525- Adam: ฮฒโ=0.9, ฮฒโ=0.999๋ก 1์ฐจ/2์ฐจ ๋ชจ๋ฉํธ ์ถ์
526- Dropout: training ๋ชจ๋์์๋ง ์ ์ฉ, ์ค์ผ์ผ ๋ณด์ ํ์
527- BatchNorm: ํ๋ จ ์ ๋ฐฐ์น ํต๊ณ, ์ถ๋ก ์ ์ด๋ ํ๊ท ์ฌ์ฉ
528- L2: ๊ฐ์ค์น ํฌ๊ธฐ ์ ํ์ผ๋ก ์ผ๋ฐํ ํฅ์
529
530๋ค์ ๋จ๊ณ:
531- CNN (05_CNN_๊ธฐ์ด.md)๋ถํฐ๋ PyTorch๋ง ์ฌ์ฉ
532- NumPy๋ก ์ถฉ๋ถํ ์๋ฆฌ๋ฅผ ์ดํดํ์!
533"""
534print(summary)
535print("=" * 60)