05_cnn_basic.py - Examples

  1"""
  205. CNN 기초 - PyTorch 버전
  3
  4합성곱 신경망(CNN)을 PyTorch로 구현합니다.
  5MNIST와 CIFAR-10 분류를 수행합니다.
  6"""
  7
  8import torch
  9import torch.nn as nn
 10import torch.nn.functional as F
 11from torch.utils.data import DataLoader
 12from torchvision import datasets, transforms
 13import matplotlib.pyplot as plt
 14import numpy as np
 15
 16print("=" * 60)
 17print("PyTorch CNN 기초")
 18print("=" * 60)
 19
 20device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 21print(f"사용 장치: {device}")
 22
 23
 24# ============================================
 25# 1. 합성곱 연산 이해
 26# ============================================
 27print("\n[1] 합성곱 연산 이해")
 28print("-" * 40)
 29
 30# Conv2d 기본
 31conv = nn.Conv2d(
 32    in_channels=1,    # 입력 채널
 33    out_channels=3,   # 필터 개수 (출력 채널)
 34    kernel_size=3,    # 필터 크기
 35    stride=1,         # 이동 간격
 36    padding=1         # 패딩
 37)
 38
 39print(f"Conv2d 파라미터:")
 40print(f"  weight shape: {conv.weight.shape}")  # (out, in, H, W)
 41print(f"  bias shape: {conv.bias.shape}")       # (out,)
 42
 43# 입력/출력 확인
 44x = torch.randn(1, 1, 8, 8)  # (batch, channel, H, W)
 45out = conv(x)
 46print(f"\n입력: {x.shape} → 출력: {out.shape}")
 47
 48
 49# 출력 크기 계산
 50def calc_output_size(input_size, kernel_size, stride=1, padding=0):
 51    return (input_size - kernel_size + 2 * padding) // stride + 1
 52
 53print("\n출력 크기 공식: (입력 - 커널 + 2×패딩) / 스트라이드 + 1")
 54for k, s, p in [(3, 1, 0), (3, 1, 1), (3, 2, 0), (5, 1, 2)]:
 55    out_size = calc_output_size(32, k, s, p)
 56    print(f"  입력=32, kernel={k}, stride={s}, pad={p} → 출력={out_size}")
 57
 58
 59# ============================================
 60# 2. 풀링 연산
 61# ============================================
 62print("\n[2] 풀링 연산")
 63print("-" * 40)
 64
 65# MaxPool2d
 66pool = nn.MaxPool2d(kernel_size=2, stride=2)
 67
 68x = torch.tensor([[[[1, 2, 3, 4],
 69                    [5, 6, 7, 8],
 70                    [9, 10, 11, 12],
 71                    [13, 14, 15, 16]]]], dtype=torch.float32)
 72
 73print(f"입력:\n{x.squeeze()}")
 74print(f"\nMaxPool2d(2,2) 출력:\n{pool(x).squeeze()}")
 75
 76# AvgPool2d
 77avg_pool = nn.AvgPool2d(2, 2)
 78print(f"\nAvgPool2d(2,2) 출력:\n{avg_pool(x).squeeze()}")
 79
 80
 81# ============================================
 82# 3. MNIST CNN
 83# ============================================
 84print("\n[3] MNIST CNN")
 85print("-" * 40)
 86
 87class MNISTNet(nn.Module):
 88    """MNIST용 간단한 CNN"""
 89    def __init__(self):
 90        super().__init__()
 91        # Conv 블록 1: 1→32 채널, 28→14
 92        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
 93        self.bn1 = nn.BatchNorm2d(32)
 94        self.pool1 = nn.MaxPool2d(2, 2)
 95
 96        # Conv 블록 2: 32→64 채널, 14→7
 97        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
 98        self.bn2 = nn.BatchNorm2d(64)
 99        self.pool2 = nn.MaxPool2d(2, 2)
100
101        # FC 블록
102        self.fc1 = nn.Linear(64 * 7 * 7, 128)
103        self.dropout = nn.Dropout(0.5)
104        self.fc2 = nn.Linear(128, 10)
105
106    def forward(self, x):
107        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
108        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
109        x = x.view(-1, 64 * 7 * 7)
110        x = F.relu(self.fc1(x))
111        x = self.dropout(x)
112        x = self.fc2(x)
113        return x
114
115model = MNISTNet()
116print(model)
117
118# 파라미터 수 계산
119total = sum(p.numel() for p in model.parameters())
120print(f"\n총 파라미터: {total:,}")
121
122
123# ============================================
124# 4. MNIST 학습
125# ============================================
126print("\n[4] MNIST 학습")
127print("-" * 40)
128
129# 데이터 로드
130transform = transforms.Compose([
131    transforms.ToTensor(),
132    transforms.Normalize((0.1307,), (0.3081,))
133])
134
135try:
136    train_data = datasets.MNIST('data', train=True, download=True, transform=transform)
137    test_data = datasets.MNIST('data', train=False, transform=transform)
138
139    train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
140    test_loader = DataLoader(test_data, batch_size=1000)
141
142    print(f"훈련 데이터: {len(train_data)} 샘플")
143    print(f"테스트 데이터: {len(test_data)} 샘플")
144
145    # 모델, 손실, 옵티마이저
146    model = MNISTNet().to(device)
147    criterion = nn.CrossEntropyLoss()
148    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
149
150    # 학습
151    epochs = 3
152    train_losses = []
153
154    for epoch in range(epochs):
155        model.train()
156        epoch_loss = 0
157        correct = 0
158        total = 0
159
160        for images, labels in train_loader:
161            images, labels = images.to(device), labels.to(device)
162
163            outputs = model(images)
164            loss = criterion(outputs, labels)
165
166            optimizer.zero_grad()
167            loss.backward()
168            optimizer.step()
169
170            epoch_loss += loss.item()
171            _, predicted = outputs.max(1)
172            total += labels.size(0)
173            correct += predicted.eq(labels).sum().item()
174
175        acc = 100. * correct / total
176        avg_loss = epoch_loss / len(train_loader)
177        train_losses.append(avg_loss)
178        print(f"Epoch {epoch+1}: Loss={avg_loss:.4f}, Acc={acc:.2f}%")
179
180    # 테스트
181    model.eval()
182    correct = 0
183    total = 0
184    with torch.no_grad():
185        for images, labels in test_loader:
186            images, labels = images.to(device), labels.to(device)
187            outputs = model(images)
188            _, predicted = outputs.max(1)
189            total += labels.size(0)
190            correct += predicted.eq(labels).sum().item()
191
192    print(f"\n테스트 정확도: {100. * correct / total:.2f}%")
193
194except Exception as e:
195    print(f"MNIST 로드 실패 (오프라인?): {e}")
196    print("데모 모드로 진행합니다.")
197
198    # 더미 데이터로 테스트
199    x_dummy = torch.randn(4, 1, 28, 28)
200    model = MNISTNet()
201    out = model(x_dummy)
202    print(f"더미 입력: {x_dummy.shape} → 출력: {out.shape}")
203
204
205# ============================================
206# 5. 특징 맵 시각화
207# ============================================
208print("\n[5] 특징 맵 시각화")
209print("-" * 40)
210
211def visualize_feature_maps(model, image, layer_name='conv1'):
212    """특징 맵 시각화"""
213    model.eval()
214
215    # 훅으로 중간 출력 캡처
216    activations = {}
217    def hook_fn(module, input, output):
218        activations['output'] = output.detach()
219
220    hook = getattr(model, layer_name).register_forward_hook(hook_fn)
221
222    with torch.no_grad():
223        model(image)
224
225    hook.remove()
226    feature_maps = activations['output']
227
228    # 시각화
229    n_maps = min(16, feature_maps.shape[1])
230    fig, axes = plt.subplots(4, 4, figsize=(8, 8))
231
232    for i, ax in enumerate(axes.flat):
233        if i < n_maps:
234            ax.imshow(feature_maps[0, i].cpu().numpy(), cmap='viridis')
235        ax.axis('off')
236
237    plt.suptitle(f'{layer_name} Feature Maps')
238    plt.tight_layout()
239    plt.savefig('cnn_feature_maps.png', dpi=100)
240    plt.close()
241    print(f"특징 맵 저장: cnn_feature_maps.png")
242
243# 시각화 (학습된 모델이 있는 경우)
244try:
245    sample_image = train_data[0][0].unsqueeze(0).to(device)
246    visualize_feature_maps(model, sample_image, 'conv1')
247except:
248    print("시각화 스킵 (데이터 없음)")
249
250
251# ============================================
252# 6. 필터 시각화
253# ============================================
254print("\n[6] 필터 시각화")
255print("-" * 40)
256
257def visualize_filters(model, layer_name='conv1'):
258    """Conv 필터 시각화"""
259    filters = getattr(model, layer_name).weight.detach().cpu()
260
261    # 첫 16개 필터
262    n_filters = min(16, filters.shape[0])
263    fig, axes = plt.subplots(4, 4, figsize=(8, 8))
264
265    for i, ax in enumerate(axes.flat):
266        if i < n_filters:
267            # 첫 번째 입력 채널의 필터
268            ax.imshow(filters[i, 0].numpy(), cmap='gray')
269        ax.axis('off')
270
271    plt.suptitle(f'{layer_name} Filters')
272    plt.tight_layout()
273    plt.savefig('cnn_filters.png', dpi=100)
274    plt.close()
275    print(f"필터 저장: cnn_filters.png")
276
277try:
278    visualize_filters(model, 'conv1')
279except:
280    print("필터 시각화 스킵")
281
282
283# ============================================
284# 7. CIFAR-10 CNN
285# ============================================
286print("\n[7] CIFAR-10 CNN")
287print("-" * 40)
288
289class CIFAR10Net(nn.Module):
290    """CIFAR-10용 CNN"""
291    def __init__(self):
292        super().__init__()
293        self.features = nn.Sequential(
294            # 블록 1: 3→64, 32→16
295            nn.Conv2d(3, 64, 3, padding=1),
296            nn.BatchNorm2d(64),
297            nn.ReLU(),
298            nn.Conv2d(64, 64, 3, padding=1),
299            nn.BatchNorm2d(64),
300            nn.ReLU(),
301            nn.MaxPool2d(2, 2),
302            nn.Dropout2d(0.25),
303
304            # 블록 2: 64→128, 16→8
305            nn.Conv2d(64, 128, 3, padding=1),
306            nn.BatchNorm2d(128),
307            nn.ReLU(),
308            nn.Conv2d(128, 128, 3, padding=1),
309            nn.BatchNorm2d(128),
310            nn.ReLU(),
311            nn.MaxPool2d(2, 2),
312            nn.Dropout2d(0.25),
313
314            # 블록 3: 128→256, 8→4
315            nn.Conv2d(128, 256, 3, padding=1),
316            nn.BatchNorm2d(256),
317            nn.ReLU(),
318            nn.Conv2d(256, 256, 3, padding=1),
319            nn.BatchNorm2d(256),
320            nn.ReLU(),
321            nn.MaxPool2d(2, 2),
322            nn.Dropout2d(0.25),
323        )
324
325        self.classifier = nn.Sequential(
326            nn.Flatten(),
327            nn.Linear(256 * 4 * 4, 512),
328            nn.ReLU(),
329            nn.Dropout(0.5),
330            nn.Linear(512, 10),
331        )
332
333    def forward(self, x):
334        x = self.features(x)
335        x = self.classifier(x)
336        return x
337
338cifar_model = CIFAR10Net()
339print(cifar_model)
340
341# 파라미터 수
342total = sum(p.numel() for p in cifar_model.parameters())
343print(f"\n총 파라미터: {total:,}")
344
345# 테스트
346x_test = torch.randn(2, 3, 32, 32)
347out = cifar_model(x_test)
348print(f"입력: {x_test.shape} → 출력: {out.shape}")
349
350
351# ============================================
352# 8. 데이터 증강
353# ============================================
354print("\n[8] 데이터 증강")
355print("-" * 40)
356
357train_transform = transforms.Compose([
358    transforms.RandomCrop(32, padding=4),
359    transforms.RandomHorizontalFlip(),
360    transforms.ColorJitter(brightness=0.2, contrast=0.2),
361    transforms.ToTensor(),
362    transforms.Normalize((0.4914, 0.4822, 0.4465),
363                        (0.2470, 0.2435, 0.2616))
364])
365
366test_transform = transforms.Compose([
367    transforms.ToTensor(),
368    transforms.Normalize((0.4914, 0.4822, 0.4465),
369                        (0.2470, 0.2435, 0.2616))
370])
371
372print("훈련 변환: RandomCrop, Flip, ColorJitter, Normalize")
373print("테스트 변환: ToTensor, Normalize")
374
375
376# ============================================
377# 9. 모델 저장/로드
378# ============================================
379print("\n[9] 모델 저장/로드")
380print("-" * 40)
381
382# 저장
383torch.save(cifar_model.state_dict(), 'cifar_cnn.pth')
384print("모델 저장: cifar_cnn.pth")
385
386# 로드
387loaded_model = CIFAR10Net()
388loaded_model.load_state_dict(torch.load('cifar_cnn.pth', weights_only=True))
389loaded_model.eval()
390print("모델 로드 완료")
391
392
393# ============================================
394# 정리
395# ============================================
396print("\n" + "=" * 60)
397print("CNN 기초 정리")
398print("=" * 60)
399
400summary = """
401CNN 구성요소:
4021. Conv2d: 지역 패턴 추출
4032. BatchNorm2d: 학습 안정화
4043. ReLU: 비선형성
4054. MaxPool2d: 공간 축소
4065. Dropout2d: 과적합 방지
4076. Flatten + Linear: 분류
408
409출력 크기 공식:
410    output = (input - kernel + 2*padding) / stride + 1
411
412일반적인 패턴:
413    Conv → BN → ReLU → Pool (반복) → Flatten → FC
414
415권장 설정:
416- kernel_size=3, padding=1 (same padding)
417- 채널 증가: 64 → 128 → 256
418- Pool로 공간 축소
419- FC 앞에 Dropout
420"""
421print(summary)
422print("=" * 60)