04_training_techniques.py

Download
python 536 lines 15.5 KB
  1"""
  204. ํ•™์Šต ๊ธฐ๋ฒ• - NumPy ๋ฒ„์ „
  3
  4๋‹ค์–‘ํ•œ ์ตœ์ ํ™” ๊ธฐ๋ฒ•๊ณผ ์ •๊ทœํ™”๋ฅผ NumPy๋กœ ์ง์ ‘ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
  5PyTorch ๋ฒ„์ „(examples/pytorch/04_training_techniques.py)๊ณผ ๋น„๊ตํ•ด ๋ณด์„ธ์š”.
  6
  7์ด ํŒŒ์ผ์ด ๋งˆ์ง€๋ง‰ NumPy ๊ตฌํ˜„์ž…๋‹ˆ๋‹ค!
  8CNN๋ถ€ํ„ฐ๋Š” PyTorch๋งŒ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
  9"""
 10
 11import numpy as np
 12import matplotlib.pyplot as plt
 13
 14print("=" * 60)
 15print("NumPy ํ•™์Šต ๊ธฐ๋ฒ• (from scratch)")
 16print("=" * 60)
 17
 18
 19# ============================================
 20# 0. ๊ธฐ๋ณธ ํ•จ์ˆ˜๋“ค
 21# ============================================
 22def sigmoid(x):
 23    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
 24
 25def sigmoid_derivative(x):
 26    s = sigmoid(x)
 27    return s * (1 - s)
 28
 29def relu(x):
 30    return np.maximum(0, x)
 31
 32def relu_derivative(x):
 33    return (x > 0).astype(float)
 34
 35
 36# ============================================
 37# 1. ์˜ตํ‹ฐ๋งˆ์ด์ € ๊ตฌํ˜„
 38# ============================================
 39print("\n[1] ์˜ตํ‹ฐ๋งˆ์ด์ € ๊ตฌํ˜„")
 40print("-" * 40)
 41
 42class SGD:
 43    """๊ธฐ๋ณธ ํ™•๋ฅ ์  ๊ฒฝ์‚ฌ ํ•˜๊ฐ•๋ฒ•"""
 44    def __init__(self, lr=0.01):
 45        self.lr = lr
 46
 47    def update(self, params, grads):
 48        for key in params:
 49            params[key] -= self.lr * grads[key]
 50
 51class SGDMomentum:
 52    """๋ชจ๋ฉ˜ํ…€์„ ์‚ฌ์šฉํ•œ SGD"""
 53    def __init__(self, lr=0.01, momentum=0.9):
 54        self.lr = lr
 55        self.momentum = momentum
 56        self.v = None
 57
 58    def update(self, params, grads):
 59        if self.v is None:
 60            self.v = {key: np.zeros_like(val) for key, val in params.items()}
 61
 62        for key in params:
 63            self.v[key] = self.momentum * self.v[key] + grads[key]
 64            params[key] -= self.lr * self.v[key]
 65
 66class Adam:
 67    """Adam ์˜ตํ‹ฐ๋งˆ์ด์ €"""
 68    def __init__(self, lr=0.001, beta1=0.9, beta2=0.999, eps=1e-8):
 69        self.lr = lr
 70        self.beta1 = beta1
 71        self.beta2 = beta2
 72        self.eps = eps
 73        self.m = None
 74        self.v = None
 75        self.t = 0
 76
 77    def update(self, params, grads):
 78        if self.m is None:
 79            self.m = {key: np.zeros_like(val) for key, val in params.items()}
 80            self.v = {key: np.zeros_like(val) for key, val in params.items()}
 81
 82        self.t += 1
 83
 84        for key in params:
 85            # 1์ฐจ ๋ชจ๋ฉ˜ํŠธ (ํ‰๊ท )
 86            self.m[key] = self.beta1 * self.m[key] + (1 - self.beta1) * grads[key]
 87            # 2์ฐจ ๋ชจ๋ฉ˜ํŠธ (๋ถ„์‚ฐ)
 88            self.v[key] = self.beta2 * self.v[key] + (1 - self.beta2) * (grads[key] ** 2)
 89
 90            # ํŽธํ–ฅ ๋ณด์ •
 91            m_hat = self.m[key] / (1 - self.beta1 ** self.t)
 92            v_hat = self.v[key] / (1 - self.beta2 ** self.t)
 93
 94            # ์—…๋ฐ์ดํŠธ
 95            params[key] -= self.lr * m_hat / (np.sqrt(v_hat) + self.eps)
 96
 97print("SGD, SGDMomentum, Adam ํด๋ž˜์Šค ๊ตฌํ˜„ ์™„๋ฃŒ")
 98
 99
100# ============================================
101# 2. ํ•™์Šต๋ฅ  ์Šค์ผ€์ค„๋Ÿฌ
102# ============================================
103print("\n[2] ํ•™์Šต๋ฅ  ์Šค์ผ€์ค„๋Ÿฌ")
104print("-" * 40)
105
106class StepLR:
107    """Step Decay ์Šค์ผ€์ค„๋Ÿฌ"""
108    def __init__(self, initial_lr, step_size=30, gamma=0.1):
109        self.initial_lr = initial_lr
110        self.step_size = step_size
111        self.gamma = gamma
112
113    def get_lr(self, epoch):
114        return self.initial_lr * (self.gamma ** (epoch // self.step_size))
115
116class ExponentialLR:
117    """์ง€์ˆ˜ ๊ฐ์‡  ์Šค์ผ€์ค„๋Ÿฌ"""
118    def __init__(self, initial_lr, gamma=0.95):
119        self.initial_lr = initial_lr
120        self.gamma = gamma
121
122    def get_lr(self, epoch):
123        return self.initial_lr * (self.gamma ** epoch)
124
125class CosineAnnealingLR:
126    """์ฝ”์‚ฌ์ธ ์–ด๋‹๋ง ์Šค์ผ€์ค„๋Ÿฌ"""
127    def __init__(self, initial_lr, T_max, eta_min=0):
128        self.initial_lr = initial_lr
129        self.T_max = T_max
130        self.eta_min = eta_min
131
132    def get_lr(self, epoch):
133        return self.eta_min + (self.initial_lr - self.eta_min) * \
134               (1 + np.cos(np.pi * epoch / self.T_max)) / 2
135
136# ์‹œ๊ฐํ™”
137epochs = np.arange(100)
138schedulers = {
139    'StepLR': StepLR(1.0, step_size=20, gamma=0.5),
140    'ExponentialLR': ExponentialLR(1.0, gamma=0.95),
141    'CosineAnnealingLR': CosineAnnealingLR(1.0, T_max=50),
142}
143
144plt.figure(figsize=(10, 5))
145for name, scheduler in schedulers.items():
146    lrs = [scheduler.get_lr(e) for e in epochs]
147    plt.plot(lrs, label=name)
148    print(f"{name}: ์‹œ์ž‘={lrs[0]:.4f}, ๋={lrs[-1]:.4f}")
149
150plt.xlabel('Epoch')
151plt.ylabel('Learning Rate')
152plt.title('NumPy Learning Rate Schedulers')
153plt.legend()
154plt.grid(True, alpha=0.3)
155plt.savefig('numpy_lr_schedulers.png', dpi=100)
156plt.close()
157print("๊ทธ๋ž˜ํ”„ ์ €์žฅ: numpy_lr_schedulers.png")
158
159
160# ============================================
161# 3. Dropout ๊ตฌํ˜„
162# ============================================
163print("\n[3] Dropout")
164print("-" * 40)
165
166def dropout(x, p=0.5, training=True):
167    """
168    Dropout ๊ตฌํ˜„
169
170    Args:
171        x: ์ž…๋ ฅ
172        p: ๋“œ๋กญํ•  ํ™•๋ฅ 
173        training: ํ›ˆ๋ จ ๋ชจ๋“œ ์—ฌ๋ถ€
174    """
175    if not training or p == 0:
176        return x
177
178    # ๋งˆ์Šคํฌ ์ƒ์„ฑ (1-p ํ™•๋ฅ ๋กœ 1)
179    mask = (np.random.rand(*x.shape) > p).astype(float)
180
181    # ์—ญ ๋“œ๋กญ์•„์›ƒ (inverted dropout): ์Šค์ผ€์ผ ๋ณด์ •
182    return x * mask / (1 - p)
183
184# ํ…Œ์ŠคํŠธ
185np.random.seed(42)
186x = np.ones((1, 10))
187
188print("์ž…๋ ฅ:", x)
189print("ํ›ˆ๋ จ ๋ชจ๋“œ (p=0.5):")
190for i in range(3):
191    out = dropout(x.copy(), p=0.5, training=True)
192    active = np.sum(out != 0)
193    print(f"  ์‹œ๋„ {i+1}: ํ™œ์„ฑ ๋‰ด๋Ÿฐ = {active}/10, ์ถœ๋ ฅ = {out[0][:5]}...")
194
195print("ํ‰๊ฐ€ ๋ชจ๋“œ:")
196out = dropout(x.copy(), p=0.5, training=False)
197print(f"  ์ถœ๋ ฅ = {out[0][:5]}...")
198
199
200# ============================================
201# 4. Batch Normalization ๊ตฌํ˜„
202# ============================================
203print("\n[4] Batch Normalization")
204print("-" * 40)
205
206class BatchNorm:
207    """๋ฐฐ์น˜ ์ •๊ทœํ™” ๊ตฌํ˜„"""
208    def __init__(self, num_features, eps=1e-5, momentum=0.1):
209        self.eps = eps
210        self.momentum = momentum
211
212        # ํ•™์Šต ๊ฐ€๋Šฅํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ
213        self.gamma = np.ones(num_features)
214        self.beta = np.zeros(num_features)
215
216        # ์ด๋™ ํ‰๊ท  (์ถ”๋ก ์šฉ)
217        self.running_mean = np.zeros(num_features)
218        self.running_var = np.ones(num_features)
219
220        # ์—ญ์ „ํŒŒ์šฉ ์บ์‹œ
221        self.cache = None
222
223    def forward(self, x, training=True):
224        if training:
225            mean = np.mean(x, axis=0)
226            var = np.var(x, axis=0)
227
228            # ์ด๋™ ํ‰๊ท  ์—…๋ฐ์ดํŠธ
229            self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean
230            self.running_var = (1 - self.momentum) * self.running_var + self.momentum * var
231        else:
232            mean = self.running_mean
233            var = self.running_var
234
235        # ์ •๊ทœํ™”
236        x_norm = (x - mean) / np.sqrt(var + self.eps)
237
238        # ์Šค์ผ€์ผ ๋ฐ ์‹œํ”„ํŠธ
239        out = self.gamma * x_norm + self.beta
240
241        # ์—ญ์ „ํŒŒ์šฉ ์ €์žฅ
242        self.cache = (x, x_norm, mean, var)
243
244        return out
245
246    def backward(self, dout):
247        x, x_norm, mean, var = self.cache
248        N = x.shape[0]
249
250        # ํŒŒ๋ผ๋ฏธํ„ฐ ๊ธฐ์šธ๊ธฐ
251        dgamma = np.sum(dout * x_norm, axis=0)
252        dbeta = np.sum(dout, axis=0)
253
254        # ์ž…๋ ฅ ๊ธฐ์šธ๊ธฐ
255        dx_norm = dout * self.gamma
256        dvar = np.sum(dx_norm * (x - mean) * (-0.5) * (var + self.eps)**(-1.5), axis=0)
257        dmean = np.sum(dx_norm * (-1 / np.sqrt(var + self.eps)), axis=0) + \
258                dvar * np.mean(-2 * (x - mean), axis=0)
259        dx = dx_norm / np.sqrt(var + self.eps) + dvar * 2 * (x - mean) / N + dmean / N
260
261        return dx, dgamma, dbeta
262
263# ํ…Œ์ŠคํŠธ
264np.random.seed(42)
265bn = BatchNorm(num_features=4)
266x_batch = np.random.randn(32, 4) * 5 + 10  # ํ‰๊ท  10, ํ‘œ์ค€ํŽธ์ฐจ 5
267
268print(f"์ž…๋ ฅ ํ†ต๊ณ„: mean={x_batch.mean(axis=0).round(2)}, std={x_batch.std(axis=0).round(2)}")
269
270out = bn.forward(x_batch, training=True)
271print(f"์ถœ๋ ฅ ํ†ต๊ณ„: mean={out.mean(axis=0).round(4)}, std={out.std(axis=0).round(4)}")
272
273
274# ============================================
275# 5. L2 ์ •๊ทœํ™” (Weight Decay)
276# ============================================
277print("\n[5] Weight Decay (L2 ์ •๊ทœํ™”)")
278print("-" * 40)
279
280def compute_loss_with_l2(y_pred, y_true, weights, l2_lambda=0.01):
281    """L2 ์ •๊ทœํ™”๊ฐ€ ํฌํ•จ๋œ ์†์‹ค ๊ณ„์‚ฐ"""
282    # ๊ธฐ๋ณธ ์†์‹ค (MSE)
283    data_loss = np.mean((y_pred - y_true) ** 2)
284
285    # L2 ์ •๊ทœํ™” ํ•ญ
286    l2_loss = 0
287    for W in weights:
288        l2_loss += np.sum(W ** 2)
289    l2_loss *= l2_lambda / 2
290
291    return data_loss + l2_loss, data_loss, l2_loss
292
293# ์˜ˆ์‹œ
294W1 = np.random.randn(10, 5)
295W2 = np.random.randn(5, 1)
296y_pred = np.random.randn(32, 1)
297y_true = np.random.randn(32, 1)
298
299for l2_lambda in [0, 0.01, 0.1]:
300    total, data, reg = compute_loss_with_l2(y_pred, y_true, [W1, W2], l2_lambda)
301    print(f"ฮป={l2_lambda}: ์ด ์†์‹ค={total:.4f} (๋ฐ์ดํ„ฐ={data:.4f} + ์ •๊ทœํ™”={reg:.4f})")
302
303
304# ============================================
305# 6. ์˜ตํ‹ฐ๋งˆ์ด์ € ๋น„๊ต ์‹คํ—˜
306# ============================================
307print("\n[6] ์˜ตํ‹ฐ๋งˆ์ด์ € ๋น„๊ต")
308print("-" * 40)
309
310class MLPWithOptimizer:
311    """์˜ตํ‹ฐ๋งˆ์ด์ € ํ…Œ์ŠคํŠธ์šฉ MLP"""
312    def __init__(self, optimizer):
313        np.random.seed(42)
314        self.params = {
315            'W1': np.random.randn(2, 16) * 0.5,
316            'b1': np.zeros(16),
317            'W2': np.random.randn(16, 1) * 0.5,
318            'b2': np.zeros(1),
319        }
320        self.optimizer = optimizer
321
322    def forward(self, X):
323        self.z1 = X @ self.params['W1'] + self.params['b1']
324        self.a1 = relu(self.z1)
325        self.z2 = self.a1 @ self.params['W2'] + self.params['b2']
326        self.a2 = sigmoid(self.z2)
327        return self.a2
328
329    def backward(self, X, y):
330        m = X.shape[0]
331
332        dL_da2 = 2 * (self.a2 - y) / m
333        dL_dz2 = dL_da2 * sigmoid_derivative(self.z2)
334
335        grads = {
336            'W2': self.a1.T @ dL_dz2,
337            'b2': np.sum(dL_dz2, axis=0),
338        }
339
340        dL_da1 = dL_dz2 @ self.params['W2'].T
341        dL_dz1 = dL_da1 * relu_derivative(self.z1)
342
343        grads['W1'] = X.T @ dL_dz1
344        grads['b1'] = np.sum(dL_dz1, axis=0)
345
346        return grads
347
348    def train_step(self, X, y):
349        y_pred = self.forward(X)
350        loss = np.mean((y_pred - y) ** 2)
351        grads = self.backward(X, y)
352        self.optimizer.update(self.params, grads)
353        return loss
354
355# XOR ๋ฐ์ดํ„ฐ
356X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float64)
357y = np.array([[0], [1], [1], [0]], dtype=np.float64)
358
359# ์˜ตํ‹ฐ๋งˆ์ด์ € ๋น„๊ต
360optimizers = {
361    'SGD': SGD(lr=0.5),
362    'SGD+Momentum': SGDMomentum(lr=0.5, momentum=0.9),
363    'Adam': Adam(lr=0.05),
364}
365
366results = {}
367for name, opt in optimizers.items():
368    model = MLPWithOptimizer(opt)
369    losses = []
370    for epoch in range(500):
371        loss = model.train_step(X, y)
372        losses.append(loss)
373    results[name] = losses
374    print(f"{name}: ์ตœ์ข… ์†์‹ค = {losses[-1]:.6f}")
375
376# ์‹œ๊ฐํ™”
377plt.figure(figsize=(10, 5))
378for name, losses in results.items():
379    plt.plot(losses, label=name)
380plt.xlabel('Epoch')
381plt.ylabel('Loss')
382plt.title('NumPy Optimizer Comparison')
383plt.legend()
384plt.yscale('log')
385plt.grid(True, alpha=0.3)
386plt.savefig('numpy_optimizer_comparison.png', dpi=100)
387plt.close()
388print("๊ทธ๋ž˜ํ”„ ์ €์žฅ: numpy_optimizer_comparison.png")
389
390
391# ============================================
392# 7. ์ „์ฒด ๊ธฐ๋ฒ• ์ ์šฉ
393# ============================================
394print("\n[7] ์ „์ฒด ๊ธฐ๋ฒ• ์ ์šฉ")
395print("-" * 40)
396
397class FullMLP:
398    """๋ชจ๋“  ๊ธฐ๋ฒ•์ด ์ ์šฉ๋œ MLP"""
399    def __init__(self, dropout_p=0.3, l2_lambda=0.01):
400        np.random.seed(42)
401        self.params = {
402            'W1': np.random.randn(2, 32) * np.sqrt(2/2),
403            'b1': np.zeros(32),
404            'W2': np.random.randn(32, 16) * np.sqrt(2/32),
405            'b2': np.zeros(16),
406            'W3': np.random.randn(16, 1) * np.sqrt(2/16),
407            'b3': np.zeros(1),
408        }
409        self.bn1 = BatchNorm(32)
410        self.bn2 = BatchNorm(16)
411        self.dropout_p = dropout_p
412        self.l2_lambda = l2_lambda
413        self.training = True
414
415    def forward(self, X):
416        # ์ฒซ ๋ฒˆ์งธ ์ธต
417        self.z1 = X @ self.params['W1'] + self.params['b1']
418        self.bn1_out = self.bn1.forward(self.z1, self.training)
419        self.a1 = relu(self.bn1_out)
420        self.d1 = dropout(self.a1, self.dropout_p, self.training)
421
422        # ๋‘ ๋ฒˆ์งธ ์ธต
423        self.z2 = self.d1 @ self.params['W2'] + self.params['b2']
424        self.bn2_out = self.bn2.forward(self.z2, self.training)
425        self.a2 = relu(self.bn2_out)
426        self.d2 = dropout(self.a2, self.dropout_p, self.training)
427
428        # ์ถœ๋ ฅ์ธต
429        self.z3 = self.d2 @ self.params['W3'] + self.params['b3']
430        self.a3 = sigmoid(self.z3)
431
432        return self.a3
433
434    def loss(self, y_pred, y_true):
435        # MSE ์†์‹ค
436        data_loss = np.mean((y_pred - y_true) ** 2)
437
438        # L2 ์ •๊ทœํ™”
439        l2_loss = 0
440        for key in ['W1', 'W2', 'W3']:
441            l2_loss += np.sum(self.params[key] ** 2)
442        l2_loss *= self.l2_lambda / 2
443
444        return data_loss + l2_loss
445
446# ๋” ๋ณต์žกํ•œ ๋ฐ์ดํ„ฐ ์ƒ์„ฑ
447np.random.seed(42)
448n_samples = 200
449theta = np.random.uniform(0, 2*np.pi, n_samples)
450r = np.random.uniform(0, 1, n_samples)
451X_train = np.column_stack([r * np.cos(theta), r * np.sin(theta)])
452y_train = (r > 0.5).astype(np.float64).reshape(-1, 1)
453
454# ํ•™์Šต
455model = FullMLP(dropout_p=0.3, l2_lambda=0.001)
456optimizer = Adam(lr=0.01)
457
458losses = []
459for epoch in range(300):
460    # ์ˆœ์ „ํŒŒ
461    y_pred = model.forward(X_train)
462    loss = model.loss(y_pred, y_train)
463    losses.append(loss)
464
465    if (epoch + 1) % 100 == 0:
466        print(f"Epoch {epoch+1}: Loss = {loss:.6f}")
467
468plt.figure(figsize=(10, 5))
469plt.plot(losses)
470plt.xlabel('Epoch')
471plt.ylabel('Loss')
472plt.title('Full MLP Training (with BN, Dropout, L2)')
473plt.grid(True, alpha=0.3)
474plt.savefig('numpy_full_training.png', dpi=100)
475plt.close()
476print("๊ทธ๋ž˜ํ”„ ์ €์žฅ: numpy_full_training.png")
477
478
479# ============================================
480# NumPy vs PyTorch ๋น„๊ต
481# ============================================
482print("\n" + "=" * 60)
483print("NumPy vs PyTorch ๋น„๊ต")
484print("=" * 60)
485
486comparison = """
487| ํ•ญ๋ชฉ           | NumPy (์ด ์ฝ”๋“œ)            | PyTorch                    |
488|----------------|---------------------------|----------------------------|
489| Optimizer      | ํด๋ž˜์Šค๋กœ ์ง์ ‘ ๊ตฌํ˜„          | torch.optim.Adam ๋“ฑ        |
490| Scheduler      | ํ•จ์ˆ˜๋กœ ์ง์ ‘ ๊ณ„์‚ฐ            | lr_scheduler ๋ชจ๋“ˆ          |
491| Dropout        | ๋งˆ์Šคํฌ ร— ์Šค์ผ€์ผ ์ง์ ‘ ๊ณ„์‚ฐ   | nn.Dropout                 |
492| BatchNorm      | ํ‰๊ท /๋ถ„์‚ฐ ์ง์ ‘ ๊ณ„์‚ฐ         | nn.BatchNorm1d             |
493| Weight Decay   | ์†์‹ค์— ์ง์ ‘ ์ถ”๊ฐ€            | optimizer์˜ weight_decay   |
494
495NumPy ๊ตฌํ˜„์˜ ๊ฐ€์น˜:
4961. Adam์˜ m, v ์—…๋ฐ์ดํŠธ ์›๋ฆฌ ์ดํ•ด
4972. BatchNorm์˜ ์ด๋™ ํ‰๊ท  ์ž‘๋™ ๋ฐฉ์‹
4983. Dropout์˜ ์—ญ๋“œ๋กญ์•„์›ƒ(inverted) ์ดํ•ด
4994. ์ •๊ทœํ™” ํ•ญ์ด ์†์‹ค์— ๋ฏธ์น˜๋Š” ์˜ํ–ฅ
500
501์ดํ›„ CNN๋ถ€ํ„ฐ๋Š” PyTorch๋งŒ ์‚ฌ์šฉ:
502- ํ•ฉ์„ฑ๊ณฑ ์—ฐ์‚ฐ์˜ NumPy ๊ตฌํ˜„์€ ๋น„ํšจ์œจ์ 
503- GPU ๊ฐ€์†์ด ํ•„์ˆ˜์ 
504- ๋ณต์žกํ•œ ์•„ํ‚คํ…์ฒ˜ ๊ด€๋ฆฌ ์–ด๋ ค์›€
505"""
506print(comparison)
507
508
509# ============================================
510# ์ •๋ฆฌ
511# ============================================
512print("\n" + "=" * 60)
513print("NumPy ํ•™์Šต ๊ธฐ๋ฒ• ์ •๋ฆฌ")
514print("=" * 60)
515
516summary = """
517๊ตฌํ˜„ํ•œ ๊ฒƒ๋“ค:
5181. SGD, Momentum, Adam ์˜ตํ‹ฐ๋งˆ์ด์ €
5192. StepLR, ExponentialLR, CosineAnnealingLR ์Šค์ผ€์ค„๋Ÿฌ
5203. Dropout (์—ญ๋“œ๋กญ์•„์›ƒ ํฌํ•จ)
5214. Batch Normalization (์ˆœ์ „ํŒŒ + ์—ญ์ „ํŒŒ)
5225. L2 ์ •๊ทœํ™” (Weight Decay)
523
524ํ•ต์‹ฌ ํฌ์ธํŠธ:
525- Adam: ฮฒโ‚=0.9, ฮฒโ‚‚=0.999๋กœ 1์ฐจ/2์ฐจ ๋ชจ๋ฉ˜ํŠธ ์ถ”์ •
526- Dropout: training ๋ชจ๋“œ์—์„œ๋งŒ ์ ์šฉ, ์Šค์ผ€์ผ ๋ณด์ • ํ•„์ˆ˜
527- BatchNorm: ํ›ˆ๋ จ ์‹œ ๋ฐฐ์น˜ ํ†ต๊ณ„, ์ถ”๋ก  ์‹œ ์ด๋™ ํ‰๊ท  ์‚ฌ์šฉ
528- L2: ๊ฐ€์ค‘์น˜ ํฌ๊ธฐ ์ œํ•œ์œผ๋กœ ์ผ๋ฐ˜ํ™” ํ–ฅ์ƒ
529
530๋‹ค์Œ ๋‹จ๊ณ„:
531- CNN (05_CNN_๊ธฐ์ดˆ.md)๋ถ€ํ„ฐ๋Š” PyTorch๋งŒ ์‚ฌ์šฉ
532- NumPy๋กœ ์ถฉ๋ถ„ํžˆ ์›๋ฆฌ๋ฅผ ์ดํ•ดํ–ˆ์Œ!
533"""
534print(summary)
535print("=" * 60)