04_training_techniques.py

  1"""
  204. 학습 기법 - PyTorch 버전
  3
  4다양한 최적화 기법과 정규화를 PyTorch로 구현합니다.
  5NumPy 버전(examples/numpy/04_training_techniques.py)과 비교해 보세요.
  6"""
  7
  8import torch
  9import torch.nn as nn
 10import torch.nn.functional as F
 11from torch.utils.data import DataLoader, TensorDataset
 12import numpy as np
 13import matplotlib.pyplot as plt
 14
 15print("=" * 60)
 16print("PyTorch 학습 기법")
 17print("=" * 60)
 18
 19
 20# ============================================
 21# 1. 옵티마이저 비교
 22# ============================================
 23print("\n[1] 옵티마이저 비교")
 24print("-" * 40)
 25
 26# 간단한 모델 정의
 27class SimpleNet(nn.Module):
 28    def __init__(self):
 29        super().__init__()
 30        self.fc1 = nn.Linear(2, 16)
 31        self.fc2 = nn.Linear(16, 1)
 32
 33    def forward(self, x):
 34        x = F.relu(self.fc1(x))
 35        x = torch.sigmoid(self.fc2(x))
 36        return x
 37
 38# XOR 데이터
 39X = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
 40y = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)
 41
 42def train_with_optimizer(optimizer_class, **kwargs):
 43    """주어진 옵티마이저로 학습"""
 44    torch.manual_seed(42)
 45    model = SimpleNet()
 46    optimizer = optimizer_class(model.parameters(), **kwargs)
 47    criterion = nn.BCELoss()
 48
 49    losses = []
 50    for epoch in range(500):
 51        pred = model(X)
 52        loss = criterion(pred, y)
 53        losses.append(loss.item())
 54
 55        optimizer.zero_grad()
 56        loss.backward()
 57        optimizer.step()
 58
 59    return losses
 60
 61# 다양한 옵티마이저 테스트
 62optimizers = {
 63    'SGD (lr=0.5)': (torch.optim.SGD, {'lr': 0.5}),
 64    'SGD+Momentum': (torch.optim.SGD, {'lr': 0.5, 'momentum': 0.9}),
 65    'Adam': (torch.optim.Adam, {'lr': 0.01}),
 66    'RMSprop': (torch.optim.RMSprop, {'lr': 0.01}),
 67}
 68
 69results = {}
 70for name, (opt_class, params) in optimizers.items():
 71    losses = train_with_optimizer(opt_class, **params)
 72    results[name] = losses
 73    print(f"{name}: 최종 손실 = {losses[-1]:.6f}")
 74
 75# 시각화
 76plt.figure(figsize=(10, 5))
 77for name, losses in results.items():
 78    plt.plot(losses, label=name)
 79plt.xlabel('Epoch')
 80plt.ylabel('Loss')
 81plt.title('Optimizer Comparison')
 82plt.legend()
 83plt.yscale('log')
 84plt.grid(True, alpha=0.3)
 85plt.savefig('optimizer_comparison.png', dpi=100)
 86plt.close()
 87print("그래프 저장: optimizer_comparison.png")
 88
 89
 90# ============================================
 91# 2. 학습률 스케줄러
 92# ============================================
 93print("\n[2] 학습률 스케줄러")
 94print("-" * 40)
 95
 96# 스케줄러 테스트
 97def test_scheduler(scheduler_class, **kwargs):
 98    torch.manual_seed(42)
 99    model = SimpleNet()
100    optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
101    scheduler = scheduler_class(optimizer, **kwargs)
102
103    lrs = []
104    for epoch in range(100):
105        lrs.append(optimizer.param_groups[0]['lr'])
106        scheduler.step()
107
108    return lrs
109
110schedulers = {
111    'StepLR': (torch.optim.lr_scheduler.StepLR, {'step_size': 20, 'gamma': 0.5}),
112    'ExponentialLR': (torch.optim.lr_scheduler.ExponentialLR, {'gamma': 0.95}),
113    'CosineAnnealingLR': (torch.optim.lr_scheduler.CosineAnnealingLR, {'T_max': 50}),
114}
115
116plt.figure(figsize=(10, 5))
117for name, (sched_class, params) in schedulers.items():
118    lrs = test_scheduler(sched_class, **params)
119    plt.plot(lrs, label=name)
120    print(f"{name}: 시작 {lrs[0]:.4f} → 끝 {lrs[-1]:.4f}")
121
122plt.xlabel('Epoch')
123plt.ylabel('Learning Rate')
124plt.title('Learning Rate Schedulers')
125plt.legend()
126plt.grid(True, alpha=0.3)
127plt.savefig('lr_schedulers.png', dpi=100)
128plt.close()
129print("그래프 저장: lr_schedulers.png")
130
131
132# ============================================
133# 3. Dropout
134# ============================================
135print("\n[3] Dropout")
136print("-" * 40)
137
138class NetWithDropout(nn.Module):
139    def __init__(self, dropout_p=0.5):
140        super().__init__()
141        self.fc1 = nn.Linear(2, 32)
142        self.dropout = nn.Dropout(p=dropout_p)
143        self.fc2 = nn.Linear(32, 1)
144
145    def forward(self, x):
146        x = F.relu(self.fc1(x))
147        x = self.dropout(x)
148        x = torch.sigmoid(self.fc2(x))
149        return x
150
151# Dropout 효과 확인
152model = NetWithDropout(dropout_p=0.5)
153x_test = torch.randn(1, 2)
154
155model.train()
156print("훈련 모드 (Dropout 활성):")
157for i in range(3):
158    out = model.fc1(x_test)
159    out = F.relu(out)
160    out = model.dropout(out)
161    print(f"  시도 {i+1}: 활성 뉴런 = {(out != 0).sum().item()}/32")
162
163model.eval()
164print("\n평가 모드 (Dropout 비활성):")
165out = model.fc1(x_test)
166out = F.relu(out)
167out = model.dropout(out)  # eval 모드에서는 전체 통과
168print(f"  활성 뉴런 = {(out != 0).sum().item()}/32")
169
170
171# ============================================
172# 4. Batch Normalization
173# ============================================
174print("\n[4] Batch Normalization")
175print("-" * 40)
176
177class NetWithBatchNorm(nn.Module):
178    def __init__(self):
179        super().__init__()
180        self.fc1 = nn.Linear(2, 16)
181        self.bn1 = nn.BatchNorm1d(16)
182        self.fc2 = nn.Linear(16, 1)
183
184    def forward(self, x):
185        x = self.fc1(x)
186        x = self.bn1(x)
187        x = F.relu(x)
188        x = torch.sigmoid(self.fc2(x))
189        return x
190
191bn_model = NetWithBatchNorm()
192print(f"BatchNorm1d 파라미터:")
193print(f"  weight (γ): {bn_model.bn1.weight.shape}")
194print(f"  bias (β): {bn_model.bn1.bias.shape}")
195print(f"  running_mean: {bn_model.bn1.running_mean.shape}")
196print(f"  running_var: {bn_model.bn1.running_var.shape}")
197
198# 훈련 vs 평가 모드
199x_batch = torch.randn(32, 2)
200
201bn_model.train()
202out_train = bn_model.fc1(x_batch)
203out_train = bn_model.bn1(out_train)
204print(f"\n훈련 모드 - 출력 통계:")
205print(f"  mean: {out_train.mean(dim=0)[:3].tolist()}")
206print(f"  std: {out_train.std(dim=0)[:3].tolist()}")
207
208bn_model.eval()
209out_eval = bn_model.fc1(x_batch)
210out_eval = bn_model.bn1(out_eval)
211print(f"평가 모드 - 출력 통계:")
212print(f"  mean: {out_eval.mean(dim=0)[:3].tolist()}")
213
214
215# ============================================
216# 5. Weight Decay (L2 정규화)
217# ============================================
218print("\n[5] Weight Decay")
219print("-" * 40)
220
221def train_with_weight_decay(weight_decay):
222    torch.manual_seed(42)
223    model = SimpleNet()
224    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=weight_decay)
225    criterion = nn.BCELoss()
226
227    for epoch in range(500):
228        pred = model(X)
229        loss = criterion(pred, y)
230        optimizer.zero_grad()
231        loss.backward()
232        optimizer.step()
233
234    # 가중치 크기 확인
235    weight_norm = sum(p.norm().item() for p in model.parameters())
236    return loss.item(), weight_norm
237
238for wd in [0, 0.01, 0.1]:
239    loss, w_norm = train_with_weight_decay(wd)
240    print(f"Weight Decay={wd}: 손실={loss:.4f}, 가중치 norm={w_norm:.4f}")
241
242
243# ============================================
244# 6. 조기 종료 (Early Stopping)
245# ============================================
246print("\n[6] 조기 종료")
247print("-" * 40)
248
249class EarlyStopping:
250    def __init__(self, patience=10, min_delta=0):
251        self.patience = patience
252        self.min_delta = min_delta
253        self.counter = 0
254        self.best_loss = None
255        self.early_stop = False
256        self.best_model = None
257
258    def __call__(self, val_loss, model):
259        if self.best_loss is None:
260            self.best_loss = val_loss
261            self.best_model = model.state_dict().copy()
262        elif val_loss > self.best_loss - self.min_delta:
263            self.counter += 1
264            if self.counter >= self.patience:
265                self.early_stop = True
266        else:
267            self.best_loss = val_loss
268            self.best_model = model.state_dict().copy()
269            self.counter = 0
270
271# 데모 (시뮬레이션된 검증 손실)
272early_stopping = EarlyStopping(patience=5)
273val_losses = [1.0, 0.9, 0.85, 0.8, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87]
274
275model = SimpleNet()
276for epoch, val_loss in enumerate(val_losses):
277    early_stopping(val_loss, model)
278    status = "STOP" if early_stopping.early_stop else f"patience={early_stopping.counter}"
279    print(f"Epoch {epoch+1}: val_loss={val_loss:.2f}, {status}")
280    if early_stopping.early_stop:
281        break
282
283
284# ============================================
285# 7. 전체 학습 예제
286# ============================================
287print("\n[7] 전체 학습 예제")
288print("-" * 40)
289
290# 더 큰 데이터셋 생성
291np.random.seed(42)
292n_samples = 200
293
294# 원형 데이터 (비선형 문제)
295theta = np.random.uniform(0, 2*np.pi, n_samples)
296r = np.random.uniform(0, 1, n_samples)
297X_train = np.column_stack([r * np.cos(theta), r * np.sin(theta)])
298y_train = (r > 0.5).astype(np.float32)
299
300X_train = torch.tensor(X_train, dtype=torch.float32)
301y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
302
303# 검증 데이터
304X_val = X_train[:40]
305y_val = y_train[:40]
306X_train = X_train[40:]
307y_train = y_train[40:]
308
309# DataLoader
310train_dataset = TensorDataset(X_train, y_train)
311train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
312
313class FullModel(nn.Module):
314    def __init__(self):
315        super().__init__()
316        self.fc1 = nn.Linear(2, 32)
317        self.bn1 = nn.BatchNorm1d(32)
318        self.dropout1 = nn.Dropout(0.3)
319        self.fc2 = nn.Linear(32, 16)
320        self.bn2 = nn.BatchNorm1d(16)
321        self.dropout2 = nn.Dropout(0.3)
322        self.fc3 = nn.Linear(16, 1)
323
324    def forward(self, x):
325        x = F.relu(self.bn1(self.fc1(x)))
326        x = self.dropout1(x)
327        x = F.relu(self.bn2(self.fc2(x)))
328        x = self.dropout2(x)
329        x = torch.sigmoid(self.fc3(x))
330        return x
331
332# 모델 초기화
333torch.manual_seed(42)
334model = FullModel()
335criterion = nn.BCELoss()
336optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
337scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5)
338early_stopping = EarlyStopping(patience=20)
339
340# 학습
341train_losses = []
342val_losses = []
343
344for epoch in range(200):
345    # 훈련
346    model.train()
347    epoch_loss = 0
348    for X_batch, y_batch in train_loader:
349        pred = model(X_batch)
350        loss = criterion(pred, y_batch)
351
352        optimizer.zero_grad()
353        loss.backward()
354        optimizer.step()
355        epoch_loss += loss.item()
356
357    train_losses.append(epoch_loss / len(train_loader))
358
359    # 검증
360    model.eval()
361    with torch.no_grad():
362        val_pred = model(X_val)
363        val_loss = criterion(val_pred, y_val).item()
364        val_losses.append(val_loss)
365
366    # 스케줄러 업데이트
367    scheduler.step(val_loss)
368
369    # 조기 종료 체크
370    early_stopping(val_loss, model)
371
372    if (epoch + 1) % 40 == 0:
373        lr = optimizer.param_groups[0]['lr']
374        print(f"Epoch {epoch+1}: train={train_losses[-1]:.4f}, val={val_loss:.4f}, lr={lr:.6f}")
375
376    if early_stopping.early_stop:
377        print(f"조기 종료 at epoch {epoch+1}")
378        break
379
380# 최고 모델 복원
381if early_stopping.best_model:
382    model.load_state_dict(early_stopping.best_model)
383
384# 결과 시각화
385plt.figure(figsize=(10, 5))
386plt.plot(train_losses, label='Train Loss')
387plt.plot(val_losses, label='Val Loss')
388plt.xlabel('Epoch')
389plt.ylabel('Loss')
390plt.title('Training with Regularization')
391plt.legend()
392plt.grid(True, alpha=0.3)
393plt.savefig('full_training.png', dpi=100)
394plt.close()
395print("그래프 저장: full_training.png")
396
397
398# ============================================
399# 정리
400# ============================================
401print("\n" + "=" * 60)
402print("학습 기법 정리")
403print("=" * 60)
404
405summary = """
406권장 기본 설정:
407    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
408    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)
409    EarlyStopping(patience=10)
410
411정규화 조합:
412    - Dropout (0.2~0.5): 과적합 방지
413    - BatchNorm: 학습 안정화
414    - Weight Decay (1e-4~1e-2): 가중치 크기 제한
415
416학습 루프 체크리스트:
417    1. model.train() / model.eval() 모드 전환
418    2. optimizer.zero_grad() 호출
419    3. loss.backward()
420    4. optimizer.step()
421    5. scheduler.step() (에폭 끝)
422    6. EarlyStopping 체크
423"""
424print(summary)
425print("=" * 60)