conv_numpy.py - Examples

  1"""
  2NumPy로 구현한 Convolution 연산
  3
  4이 파일에서는 Convolution의 forward/backward를 순수 NumPy로 구현합니다.
  5"""
  6
  7import numpy as np
  8from typing import Tuple, Optional
  9
 10
 11def conv2d_naive(
 12    input: np.ndarray,
 13    kernel: np.ndarray,
 14    bias: Optional[np.ndarray] = None,
 15    stride: int = 1,
 16    padding: int = 0
 17) -> np.ndarray:
 18    """
 19    2D Convolution (naive implementation with loops)
 20
 21    Args:
 22        input: (N, C_in, H, W) - 배치 입력
 23        kernel: (C_out, C_in, K_h, K_w) - 필터
 24        bias: (C_out,) - 편향
 25        stride: 스트라이드
 26        padding: 패딩
 27
 28    Returns:
 29        output: (N, C_out, H_out, W_out)
 30    """
 31    N, C_in, H, W = input.shape
 32    C_out, _, K_h, K_w = kernel.shape
 33
 34    # 패딩 적용
 35    if padding > 0:
 36        input_padded = np.pad(
 37            input,
 38            ((0, 0), (0, 0), (padding, padding), (padding, padding)),
 39            mode='constant'
 40        )
 41    else:
 42        input_padded = input
 43
 44    # 출력 크기 계산
 45    H_out = (H + 2 * padding - K_h) // stride + 1
 46    W_out = (W + 2 * padding - K_w) // stride + 1
 47
 48    output = np.zeros((N, C_out, H_out, W_out))
 49
 50    # Convolution 연산 (6중 루프 - 매우 느림)
 51    for n in range(N):                          # 배치
 52        for c_out in range(C_out):              # 출력 채널
 53            for h in range(H_out):              # 출력 높이
 54                for w in range(W_out):          # 출력 너비
 55                    # 수용 영역
 56                    h_start = h * stride
 57                    h_end = h_start + K_h
 58                    w_start = w * stride
 59                    w_end = w_start + K_w
 60
 61                    # 수용 영역과 커널의 element-wise 곱의 합
 62                    receptive_field = input_padded[n, :, h_start:h_end, w_start:w_end]
 63                    output[n, c_out, h, w] = np.sum(receptive_field * kernel[c_out])
 64
 65    # 편향 추가
 66    if bias is not None:
 67        output += bias.reshape(1, -1, 1, 1)
 68
 69    return output
 70
 71
 72def im2col(
 73    input: np.ndarray,
 74    kernel_size: Tuple[int, int],
 75    stride: int = 1,
 76    padding: int = 0
 77) -> np.ndarray:
 78    """
 79    im2col: 이미지를 행렬로 변환 (효율적인 convolution을 위해)
 80
 81    Convolution을 행렬 곱셈으로 변환:
 82    - 각 수용 영역을 열 벡터로 변환
 83    - 커널을 행 벡터로 변환
 84    - 행렬 곱셈으로 convolution 수행
 85
 86    Args:
 87        input: (N, C, H, W)
 88        kernel_size: (K_h, K_w)
 89        stride: 스트라이드
 90        padding: 패딩
 91
 92    Returns:
 93        col: (N, C * K_h * K_w, H_out * W_out)
 94    """
 95    N, C, H, W = input.shape
 96    K_h, K_w = kernel_size
 97
 98    # 패딩
 99    if padding > 0:
100        input_padded = np.pad(
101            input,
102            ((0, 0), (0, 0), (padding, padding), (padding, padding)),
103            mode='constant'
104        )
105    else:
106        input_padded = input
107
108    H_padded, W_padded = input_padded.shape[2], input_padded.shape[3]
109
110    # 출력 크기
111    H_out = (H_padded - K_h) // stride + 1
112    W_out = (W_padded - K_w) // stride + 1
113
114    # im2col 행렬
115    col = np.zeros((N, C, K_h, K_w, H_out, W_out))
116
117    for h in range(K_h):
118        h_max = h + stride * H_out
119        for w in range(K_w):
120            w_max = w + stride * W_out
121            col[:, :, h, w, :, :] = input_padded[:, :, h:h_max:stride, w:w_max:stride]
122
123    # (N, C, K_h, K_w, H_out, W_out) → (N, C*K_h*K_w, H_out*W_out)
124    col = col.transpose(0, 1, 2, 3, 4, 5).reshape(N, C * K_h * K_w, H_out * W_out)
125
126    return col
127
128
129def col2im(
130    col: np.ndarray,
131    input_shape: Tuple[int, int, int, int],
132    kernel_size: Tuple[int, int],
133    stride: int = 1,
134    padding: int = 0
135) -> np.ndarray:
136    """
137    col2im: im2col의 역연산
138
139    Backward pass에서 gradient를 원래 이미지 형태로 복원
140
141    Args:
142        col: (N, C * K_h * K_w, H_out * W_out)
143        input_shape: (N, C, H, W) 원본 입력 shape
144        kernel_size: (K_h, K_w)
145        stride: 스트라이드
146        padding: 패딩
147
148    Returns:
149        input_grad: (N, C, H, W)
150    """
151    N, C, H, W = input_shape
152    K_h, K_w = kernel_size
153
154    H_padded = H + 2 * padding
155    W_padded = W + 2 * padding
156    H_out = (H_padded - K_h) // stride + 1
157    W_out = (W_padded - K_w) // stride + 1
158
159    # col reshape: (N, C*K_h*K_w, H_out*W_out) → (N, C, K_h, K_w, H_out, W_out)
160    col = col.reshape(N, C, K_h, K_w, H_out, W_out)
161
162    # 출력 배열 (패딩 포함)
163    input_padded = np.zeros((N, C, H_padded, W_padded))
164
165    # 누적 (stride 위치에 값 더하기)
166    for h in range(K_h):
167        h_max = h + stride * H_out
168        for w in range(K_w):
169            w_max = w + stride * W_out
170            input_padded[:, :, h:h_max:stride, w:w_max:stride] += col[:, :, h, w, :, :]
171
172    # 패딩 제거
173    if padding > 0:
174        return input_padded[:, :, padding:-padding, padding:-padding]
175    return input_padded
176
177
178def conv2d_im2col(
179    input: np.ndarray,
180    kernel: np.ndarray,
181    bias: Optional[np.ndarray] = None,
182    stride: int = 1,
183    padding: int = 0
184) -> np.ndarray:
185    """
186    im2col을 사용한 효율적인 Convolution
187
188    연산: Y = W · col(X) + b
189    """
190    N, C_in, H, W = input.shape
191    C_out, _, K_h, K_w = kernel.shape
192
193    # im2col 변환
194    col = im2col(input, (K_h, K_w), stride, padding)  # (N, C_in*K_h*K_w, H_out*W_out)
195
196    # 커널을 행렬로 변환
197    kernel_mat = kernel.reshape(C_out, -1)  # (C_out, C_in*K_h*K_w)
198
199    # 행렬 곱셈
200    H_out = (H + 2 * padding - K_h) // stride + 1
201    W_out = (W + 2 * padding - K_w) // stride + 1
202
203    # (C_out, C_in*K_h*K_w) @ (N, C_in*K_h*K_w, H_out*W_out)
204    # → (N, C_out, H_out*W_out)
205    output = np.zeros((N, C_out, H_out * W_out))
206    for n in range(N):
207        output[n] = kernel_mat @ col[n]
208
209    # Reshape
210    output = output.reshape(N, C_out, H_out, W_out)
211
212    # 편향
213    if bias is not None:
214        output += bias.reshape(1, -1, 1, 1)
215
216    return output
217
218
219class Conv2dNumpy:
220    """
221    NumPy Convolution 레이어 (학습 가능)
222
223    forward/backward 모두 구현
224    """
225
226    def __init__(
227        self,
228        in_channels: int,
229        out_channels: int,
230        kernel_size: int,
231        stride: int = 1,
232        padding: int = 0
233    ):
234        self.in_channels = in_channels
235        self.out_channels = out_channels
236        self.kernel_size = kernel_size
237        self.stride = stride
238        self.padding = padding
239
240        # Kaiming (He) 초기화
241        scale = np.sqrt(2.0 / (in_channels * kernel_size * kernel_size))
242        self.weight = np.random.randn(
243            out_channels, in_channels, kernel_size, kernel_size
244        ) * scale
245        self.bias = np.zeros(out_channels)
246
247        # Gradient 저장
248        self.weight_grad = None
249        self.bias_grad = None
250
251        # Backward를 위한 캐시
252        self.cache = {}
253
254    def forward(self, input: np.ndarray) -> np.ndarray:
255        """Forward pass"""
256        N, C, H, W = input.shape
257
258        # im2col
259        col = im2col(input, (self.kernel_size, self.kernel_size),
260                     self.stride, self.padding)
261
262        # 캐시 저장
263        self.cache['input_shape'] = input.shape
264        self.cache['col'] = col
265
266        # 행렬 곱셈
267        kernel_mat = self.weight.reshape(self.out_channels, -1)
268
269        H_out = (H + 2 * self.padding - self.kernel_size) // self.stride + 1
270        W_out = (W + 2 * self.padding - self.kernel_size) // self.stride + 1
271
272        output = np.zeros((N, self.out_channels, H_out * W_out))
273        for n in range(N):
274            output[n] = kernel_mat @ col[n]
275
276        output = output.reshape(N, self.out_channels, H_out, W_out)
277        output += self.bias.reshape(1, -1, 1, 1)
278
279        return output
280
281    def backward(self, grad_output: np.ndarray) -> np.ndarray:
282        """
283        Backward pass
284
285        Args:
286            grad_output: ∂L/∂Y (N, C_out, H_out, W_out)
287
288        Returns:
289            grad_input: ∂L/∂X (N, C_in, H, W)
290        """
291        N, C_out, H_out, W_out = grad_output.shape
292        input_shape = self.cache['input_shape']
293        col = self.cache['col']
294
295        # Bias gradient: ∂L/∂b = Σ ∂L/∂Y
296        self.bias_grad = np.sum(grad_output, axis=(0, 2, 3))
297
298        # grad_output을 행렬로 변환
299        grad_output_mat = grad_output.reshape(N, C_out, -1)  # (N, C_out, H_out*W_out)
300
301        # Weight gradient: ∂L/∂W = ∂L/∂Y · col(X)^T
302        kernel_mat = self.weight.reshape(self.out_channels, -1)
303        self.weight_grad = np.zeros_like(kernel_mat)
304
305        for n in range(N):
306            self.weight_grad += grad_output_mat[n] @ col[n].T
307
308        self.weight_grad = self.weight_grad.reshape(self.weight.shape)
309
310        # Input gradient: ∂L/∂X = col2im(W^T · ∂L/∂Y)
311        grad_col = np.zeros_like(col)
312        for n in range(N):
313            grad_col[n] = kernel_mat.T @ grad_output_mat[n]
314
315        grad_input = col2im(
316            grad_col, input_shape,
317            (self.kernel_size, self.kernel_size),
318            self.stride, self.padding
319        )
320
321        return grad_input
322
323    def update(self, lr: float):
324        """가중치 업데이트"""
325        self.weight -= lr * self.weight_grad
326        self.bias -= lr * self.bias_grad
327
328
329# 테스트
330if __name__ == "__main__":
331    np.random.seed(42)
332
333    # 테스트 입력
334    N, C_in, H, W = 2, 3, 8, 8
335    C_out, K = 4, 3
336
337    input = np.random.randn(N, C_in, H, W)
338    kernel = np.random.randn(C_out, C_in, K, K)
339    bias = np.random.randn(C_out)
340
341    # Naive vs im2col 비교
342    output_naive = conv2d_naive(input, kernel, bias, stride=1, padding=1)
343    output_im2col = conv2d_im2col(input, kernel, bias, stride=1, padding=1)
344
345    print("Output shape:", output_naive.shape)
346    print("Naive vs im2col 차이:", np.max(np.abs(output_naive - output_im2col)))
347
348    # Conv2dNumpy 테스트
349    conv = Conv2dNumpy(C_in, C_out, K, stride=1, padding=1)
350    output = conv.forward(input)
351    print("\nConv2dNumpy output shape:", output.shape)
352
353    # Backward 테스트
354    grad_output = np.random.randn(*output.shape)
355    grad_input = conv.backward(grad_output)
356    print("Grad input shape:", grad_input.shape)
357    print("Weight grad shape:", conv.weight_grad.shape)
358
359    # Gradient check
360    def numerical_gradient(f, x, h=1e-5):
361        """수치 미분으로 gradient 검증"""
362        grad = np.zeros_like(x)
363        it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
364
365        while not it.finished:
366            idx = it.multi_index
367            old_val = x[idx]
368
369            x[idx] = old_val + h
370            fxh1 = f()
371
372            x[idx] = old_val - h
373            fxh2 = f()
374
375            grad[idx] = (fxh1 - fxh2) / (2 * h)
376            x[idx] = old_val
377
378            it.iternext()
379
380        return grad
381
382    print("\n=== Gradient Check ===")
383
384    # 작은 입력으로 gradient check
385    small_input = np.random.randn(1, 2, 4, 4)
386    small_conv = Conv2dNumpy(2, 2, 3, stride=1, padding=1)
387
388    def loss_fn():
389        out = small_conv.forward(small_input)
390        return np.sum(out ** 2)
391
392    # Analytical gradient
393    output = small_conv.forward(small_input)
394    grad_output = 2 * output  # d(sum(x^2))/dx = 2x
395    grad_input = small_conv.backward(grad_output)
396
397    # Numerical gradient (입력에 대해)
398    num_grad = numerical_gradient(loss_fn, small_input)
399
400    print("Input gradient check:")
401    print(f"  Max diff: {np.max(np.abs(grad_input - num_grad)):.2e}")
402
403    # Weight gradient check
404    def loss_fn_weight():
405        out = small_conv.forward(small_input)
406        return np.sum(out ** 2)
407
408    num_grad_weight = numerical_gradient(loss_fn_weight, small_conv.weight)
409
410    # Backward로 계산
411    output = small_conv.forward(small_input)
412    small_conv.backward(2 * output)
413
414    print("Weight gradient check:")
415    print(f"  Max diff: {np.max(np.abs(small_conv.weight_grad - num_grad_weight)):.2e}")