05_conv_understanding.py

  1"""
  205. 합성곱 이해 - NumPy 버전 (교육용)
  3
  4합성곱 연산의 원리를 NumPy로 이해합니다.
  5실제 CNN 학습에는 PyTorch를 사용하세요!
  6
  7이 파일은 합성곱이 어떻게 동작하는지 이해하기 위한 것입니다.
  8"""
  9
 10import numpy as np
 11import matplotlib.pyplot as plt
 12
 13print("=" * 60)
 14print("NumPy 합성곱 이해 (교육용)")
 15print("=" * 60)
 16
 17
 18# ============================================
 19# 1. 기본 2D 합성곱
 20# ============================================
 21print("\n[1] 기본 2D 합성곱")
 22print("-" * 40)
 23
 24def conv2d_basic(image, kernel):
 25    """
 26    가장 기본적인 2D 합성곱 구현
 27
 28    Args:
 29        image: 2D 배열 (H, W)
 30        kernel: 2D 배열 (kH, kW)
 31
 32    Returns:
 33        출력 (H-kH+1, W-kW+1)
 34    """
 35    h, w = image.shape
 36    kh, kw = kernel.shape
 37    oh, ow = h - kh + 1, w - kw + 1
 38
 39    output = np.zeros((oh, ow))
 40
 41    for i in range(oh):
 42        for j in range(ow):
 43            # 영역 추출
 44            region = image[i:i+kh, j:j+kw]
 45            # 요소별 곱셈 후 합산
 46            output[i, j] = np.sum(region * kernel)
 47
 48    return output
 49
 50# 테스트
 51image = np.array([
 52    [1, 2, 3, 0],
 53    [0, 1, 2, 3],
 54    [3, 0, 1, 2],
 55    [2, 3, 0, 1]
 56], dtype=float)
 57
 58kernel = np.array([
 59    [1, 0],
 60    [0, -1]
 61], dtype=float)
 62
 63output = conv2d_basic(image, kernel)
 64print(f"입력 이미지 (4×4):\n{image}")
 65print(f"\n커널 (2×2):\n{kernel}")
 66print(f"\n출력 (3×3):\n{output}")
 67print(f"\n예시 계산 (좌상단):")
 68print(f"  {image[0,0]}×{kernel[0,0]} + {image[0,1]}×{kernel[0,1]} + {image[1,0]}×{kernel[1,0]} + {image[1,1]}×{kernel[1,1]}")
 69print(f"  = 1×1 + 2×0 + 0×0 + 1×(-1) = 0")
 70
 71
 72# ============================================
 73# 2. 패딩과 스트라이드
 74# ============================================
 75print("\n[2] 패딩과 스트라이드")
 76print("-" * 40)
 77
 78def conv2d_with_padding(image, kernel, padding=0, stride=1):
 79    """패딩과 스트라이드를 지원하는 합성곱"""
 80    # 패딩 적용
 81    if padding > 0:
 82        image = np.pad(image, padding, mode='constant', constant_values=0)
 83
 84    h, w = image.shape
 85    kh, kw = kernel.shape
 86    oh = (h - kh) // stride + 1
 87    ow = (w - kw) // stride + 1
 88
 89    output = np.zeros((oh, ow))
 90
 91    for i in range(oh):
 92        for j in range(ow):
 93            si, sj = i * stride, j * stride
 94            region = image[si:si+kh, sj:sj+kw]
 95            output[i, j] = np.sum(region * kernel)
 96
 97    return output
 98
 99# 테스트
100image = np.ones((4, 4))
101kernel = np.ones((3, 3))
102
103print("입력: 4×4, 커널: 3×3")
104for p in [0, 1]:
105    for s in [1, 2]:
106        out = conv2d_with_padding(image, kernel, padding=p, stride=s)
107        print(f"  padding={p}, stride={s} → 출력: {out.shape}")
108
109
110# ============================================
111# 3. 에지 검출 필터
112# ============================================
113print("\n[3] 에지 검출 필터")
114print("-" * 40)
115
116# 샘플 이미지 생성
117def create_sample_image():
118    """간단한 패턴 이미지 생성"""
119    img = np.zeros((8, 8))
120    img[2:6, 2:6] = 1  # 중앙 사각형
121    return img
122
123image = create_sample_image()
124
125# 에지 검출 필터들
126sobel_x = np.array([[-1, 0, 1],
127                    [-2, 0, 2],
128                    [-1, 0, 1]])
129
130sobel_y = np.array([[-1, -2, -1],
131                    [ 0,  0,  0],
132                    [ 1,  2,  1]])
133
134laplacian = np.array([[0,  1, 0],
135                      [1, -4, 1],
136                      [0,  1, 0]])
137
138# 필터 적용
139edge_x = conv2d_with_padding(image, sobel_x, padding=1)
140edge_y = conv2d_with_padding(image, sobel_y, padding=1)
141edge_laplace = conv2d_with_padding(image, laplacian, padding=1)
142
143# 시각화
144fig, axes = plt.subplots(2, 3, figsize=(12, 8))
145axes[0, 0].imshow(image, cmap='gray')
146axes[0, 0].set_title('Original')
147axes[0, 1].imshow(sobel_x, cmap='RdBu')
148axes[0, 1].set_title('Sobel X Filter')
149axes[0, 2].imshow(sobel_y, cmap='RdBu')
150axes[0, 2].set_title('Sobel Y Filter')
151axes[1, 0].imshow(edge_x, cmap='gray')
152axes[1, 0].set_title('Sobel X Edge')
153axes[1, 1].imshow(edge_y, cmap='gray')
154axes[1, 1].set_title('Sobel Y Edge')
155axes[1, 2].imshow(edge_laplace, cmap='gray')
156axes[1, 2].set_title('Laplacian Edge')
157
158for ax in axes.flat:
159    ax.axis('off')
160
161plt.tight_layout()
162plt.savefig('numpy_edge_detection.png', dpi=100)
163plt.close()
164print("에지 검출 저장: numpy_edge_detection.png")
165
166
167# ============================================
168# 4. 풀링 연산
169# ============================================
170print("\n[4] 풀링 연산")
171print("-" * 40)
172
173def max_pool2d(image, pool_size=2, stride=2):
174    """Max Pooling 구현"""
175    h, w = image.shape
176    oh = (h - pool_size) // stride + 1
177    ow = (w - pool_size) // stride + 1
178
179    output = np.zeros((oh, ow))
180
181    for i in range(oh):
182        for j in range(ow):
183            si, sj = i * stride, j * stride
184            region = image[si:si+pool_size, sj:sj+pool_size]
185            output[i, j] = np.max(region)
186
187    return output
188
189def avg_pool2d(image, pool_size=2, stride=2):
190    """Average Pooling 구현"""
191    h, w = image.shape
192    oh = (h - pool_size) // stride + 1
193    ow = (w - pool_size) // stride + 1
194
195    output = np.zeros((oh, ow))
196
197    for i in range(oh):
198        for j in range(ow):
199            si, sj = i * stride, j * stride
200            region = image[si:si+pool_size, sj:sj+pool_size]
201            output[i, j] = np.mean(region)
202
203    return output
204
205# 테스트
206image = np.array([
207    [1, 2, 3, 4],
208    [5, 6, 7, 8],
209    [9, 10, 11, 12],
210    [13, 14, 15, 16]
211], dtype=float)
212
213print(f"입력:\n{image}")
214print(f"\nMax Pooling (2×2):\n{max_pool2d(image)}")
215print(f"\nAvg Pooling (2×2):\n{avg_pool2d(image)}")
216
217
218# ============================================
219# 5. 다채널 합성곱
220# ============================================
221print("\n[5] 다채널 합성곱")
222print("-" * 40)
223
224def conv2d_multichannel(image, kernels, bias=0):
225    """
226    다채널 합성곱 (RGB 이미지 등)
227
228    Args:
229        image: (C, H, W) - C개 채널
230        kernels: (C, kH, kW) - 각 채널용 커널
231        bias: 편향
232
233    Returns:
234        출력: (H-kH+1, W-kW+1)
235    """
236    c, h, w = image.shape
237    _, kh, kw = kernels.shape
238    oh, ow = h - kh + 1, w - kw + 1
239
240    output = np.zeros((oh, ow))
241
242    # 각 채널에 대해 합성곱 후 합산
243    for ch in range(c):
244        output += conv2d_basic(image[ch], kernels[ch])
245
246    return output + bias
247
248# RGB 이미지 예시
249rgb_image = np.random.rand(3, 8, 8)  # (C, H, W)
250kernels = np.random.rand(3, 3, 3)    # (C, kH, kW)
251
252output = conv2d_multichannel(rgb_image, kernels)
253print(f"입력: {rgb_image.shape} (3채널)")
254print(f"커널: {kernels.shape} (채널별 3×3)")
255print(f"출력: {output.shape}")
256
257
258# ============================================
259# 6. 여러 필터 적용
260# ============================================
261print("\n[6] 여러 필터 적용")
262print("-" * 40)
263
264def conv2d_layer(image, filters, biases):
265    """
266    Conv 층 시뮬레이션
267
268    Args:
269        image: (C_in, H, W)
270        filters: (C_out, C_in, kH, kW)
271        biases: (C_out,)
272
273    Returns:
274        출력: (C_out, oH, oW)
275    """
276    c_out, c_in, kh, kw = filters.shape
277    _, h, w = image.shape
278    oh, ow = h - kh + 1, w - kw + 1
279
280    output = np.zeros((c_out, oh, ow))
281
282    for f in range(c_out):
283        output[f] = conv2d_multichannel(image, filters[f], biases[f])
284
285    return output
286
287# 예시: 3채널 입력 → 8채널 출력
288image = np.random.rand(3, 16, 16)
289filters = np.random.rand(8, 3, 3, 3)  # 8개 필터
290biases = np.zeros(8)
291
292output = conv2d_layer(image, filters, biases)
293print(f"입력: {image.shape}")
294print(f"필터: {filters.shape}")
295print(f"출력: {output.shape}")
296
297
298# ============================================
299# 7. CNN 순전파 시뮬레이션
300# ============================================
301print("\n[7] CNN 순전파 시뮬레이션")
302print("-" * 40)
303
304def relu(x):
305    return np.maximum(0, x)
306
307def simple_cnn_forward(image):
308    """
309    간단한 CNN 순전파
310
311    입력 (1, 8, 8) → Conv (2, 6, 6) → Pool (2, 3, 3) → FC → 출력
312    """
313    # Conv1: 1→2 채널, 3×3 커널
314    filters1 = np.random.randn(2, 1, 3, 3) * 0.5
315    biases1 = np.zeros(2)
316
317    conv1_out = conv2d_layer(image, filters1, biases1)
318    relu1_out = relu(conv1_out)
319    print(f"  Conv1 후: {relu1_out.shape}")
320
321    # MaxPool: 2×2
322    pool_out = np.zeros((2, 3, 3))
323    for c in range(2):
324        pool_out[c] = max_pool2d(relu1_out[c], 2, 2)
325    print(f"  Pool 후: {pool_out.shape}")
326
327    # Flatten
328    flat = pool_out.flatten()
329    print(f"  Flatten: {flat.shape}")
330
331    # FC
332    fc_weights = np.random.randn(10, 18) * 0.5
333    fc_bias = np.zeros(10)
334    output = fc_weights @ flat + fc_bias
335    print(f"  FC 출력: {output.shape}")
336
337    return output
338
339# 테스트
340image = np.random.rand(1, 8, 8)
341print(f"입력: {image.shape}")
342output = simple_cnn_forward(image)
343
344
345# ============================================
346# 왜 PyTorch를 사용해야 하는가?
347# ============================================
348print("\n" + "=" * 60)
349print("NumPy CNN의 한계")
350print("=" * 60)
351
352limitations = """
353NumPy 구현의 문제점:
354
3551. 속도
356   - 순수 Python 루프는 매우 느림
357   - 28×28 MNIST도 수천 배 느림
358   - GPU 가속 불가능
359
3602. 역전파
361   - 합성곱 역전파 구현이 복잡
362   - im2col 등 최적화 필요
363   - 실수하기 쉬움
364
3653. 메모리
366   - 비효율적인 메모리 사용
367   - 배치 처리 어려움
368
3694. 기능
370   - BatchNorm, Dropout 구현 복잡
371   - 다양한 층/연산 부족
372
373PyTorch 사용 이유:
374   ✓ cuDNN으로 최적화된 합성곱
375   ✓ 자동 미분 (역전파 자동)
376   ✓ GPU 지원
377   ✓ 풍부한 레이어/함수 제공
378"""
379print(limitations)
380
381
382# ============================================
383# 정리
384# ============================================
385print("=" * 60)
386print("합성곱 핵심 정리")
387print("=" * 60)
388
389summary = """
390합성곱 연산:
391    output[i,j] = Σ input[i+m, j+n] × kernel[m, n]
392
393출력 크기:
394    output_size = (input - kernel + 2×padding) / stride + 1
395
396풀링:
397    - MaxPool: 영역 내 최대값 선택
398    - AvgPool: 영역 내 평균
399
400다채널:
401    - 각 채널에 별도 커널 적용 후 합산
402    - 여러 필터 = 여러 출력 채널
403
404학습:
405    - 커널의 가중치가 학습됨
406    - 역전파로 최적화
407
408NumPy로 배운 것:
409    1. 합성곱의 수학적 정의
410    2. 패딩과 스트라이드의 효과
411    3. 풀링의 동작 원리
412    4. 다채널 처리 방식
413
414실전에서는 PyTorch!
415"""
416print(summary)
417print("=" * 60)