conv_numpy.py

Download
python 416 lines 11.6 KB
  1"""
  2NumPy๋กœ ๊ตฌํ˜„ํ•œ Convolution ์—ฐ์‚ฐ
  3
  4์ด ํŒŒ์ผ์—์„œ๋Š” Convolution์˜ forward/backward๋ฅผ ์ˆœ์ˆ˜ NumPy๋กœ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
  5"""
  6
  7import numpy as np
  8from typing import Tuple, Optional
  9
 10
 11def conv2d_naive(
 12    input: np.ndarray,
 13    kernel: np.ndarray,
 14    bias: Optional[np.ndarray] = None,
 15    stride: int = 1,
 16    padding: int = 0
 17) -> np.ndarray:
 18    """
 19    2D Convolution (naive implementation with loops)
 20
 21    Args:
 22        input: (N, C_in, H, W) - ๋ฐฐ์น˜ ์ž…๋ ฅ
 23        kernel: (C_out, C_in, K_h, K_w) - ํ•„ํ„ฐ
 24        bias: (C_out,) - ํŽธํ–ฅ
 25        stride: ์ŠคํŠธ๋ผ์ด๋“œ
 26        padding: ํŒจ๋”ฉ
 27
 28    Returns:
 29        output: (N, C_out, H_out, W_out)
 30    """
 31    N, C_in, H, W = input.shape
 32    C_out, _, K_h, K_w = kernel.shape
 33
 34    # ํŒจ๋”ฉ ์ ์šฉ
 35    if padding > 0:
 36        input_padded = np.pad(
 37            input,
 38            ((0, 0), (0, 0), (padding, padding), (padding, padding)),
 39            mode='constant'
 40        )
 41    else:
 42        input_padded = input
 43
 44    # ์ถœ๋ ฅ ํฌ๊ธฐ ๊ณ„์‚ฐ
 45    H_out = (H + 2 * padding - K_h) // stride + 1
 46    W_out = (W + 2 * padding - K_w) // stride + 1
 47
 48    output = np.zeros((N, C_out, H_out, W_out))
 49
 50    # Convolution ์—ฐ์‚ฐ (6์ค‘ ๋ฃจํ”„ - ๋งค์šฐ ๋А๋ฆผ)
 51    for n in range(N):                          # ๋ฐฐ์น˜
 52        for c_out in range(C_out):              # ์ถœ๋ ฅ ์ฑ„๋„
 53            for h in range(H_out):              # ์ถœ๋ ฅ ๋†’์ด
 54                for w in range(W_out):          # ์ถœ๋ ฅ ๋„ˆ๋น„
 55                    # ์ˆ˜์šฉ ์˜์—ญ
 56                    h_start = h * stride
 57                    h_end = h_start + K_h
 58                    w_start = w * stride
 59                    w_end = w_start + K_w
 60
 61                    # ์ˆ˜์šฉ ์˜์—ญ๊ณผ ์ปค๋„์˜ element-wise ๊ณฑ์˜ ํ•ฉ
 62                    receptive_field = input_padded[n, :, h_start:h_end, w_start:w_end]
 63                    output[n, c_out, h, w] = np.sum(receptive_field * kernel[c_out])
 64
 65    # ํŽธํ–ฅ ์ถ”๊ฐ€
 66    if bias is not None:
 67        output += bias.reshape(1, -1, 1, 1)
 68
 69    return output
 70
 71
 72def im2col(
 73    input: np.ndarray,
 74    kernel_size: Tuple[int, int],
 75    stride: int = 1,
 76    padding: int = 0
 77) -> np.ndarray:
 78    """
 79    im2col: ์ด๋ฏธ์ง€๋ฅผ ํ–‰๋ ฌ๋กœ ๋ณ€ํ™˜ (ํšจ์œจ์ ์ธ convolution์„ ์œ„ํ•ด)
 80
 81    Convolution์„ ํ–‰๋ ฌ ๊ณฑ์…ˆ์œผ๋กœ ๋ณ€ํ™˜:
 82    - ๊ฐ ์ˆ˜์šฉ ์˜์—ญ์„ ์—ด ๋ฒกํ„ฐ๋กœ ๋ณ€ํ™˜
 83    - ์ปค๋„์„ ํ–‰ ๋ฒกํ„ฐ๋กœ ๋ณ€ํ™˜
 84    - ํ–‰๋ ฌ ๊ณฑ์…ˆ์œผ๋กœ convolution ์ˆ˜ํ–‰
 85
 86    Args:
 87        input: (N, C, H, W)
 88        kernel_size: (K_h, K_w)
 89        stride: ์ŠคํŠธ๋ผ์ด๋“œ
 90        padding: ํŒจ๋”ฉ
 91
 92    Returns:
 93        col: (N, C * K_h * K_w, H_out * W_out)
 94    """
 95    N, C, H, W = input.shape
 96    K_h, K_w = kernel_size
 97
 98    # ํŒจ๋”ฉ
 99    if padding > 0:
100        input_padded = np.pad(
101            input,
102            ((0, 0), (0, 0), (padding, padding), (padding, padding)),
103            mode='constant'
104        )
105    else:
106        input_padded = input
107
108    H_padded, W_padded = input_padded.shape[2], input_padded.shape[3]
109
110    # ์ถœ๋ ฅ ํฌ๊ธฐ
111    H_out = (H_padded - K_h) // stride + 1
112    W_out = (W_padded - K_w) // stride + 1
113
114    # im2col ํ–‰๋ ฌ
115    col = np.zeros((N, C, K_h, K_w, H_out, W_out))
116
117    for h in range(K_h):
118        h_max = h + stride * H_out
119        for w in range(K_w):
120            w_max = w + stride * W_out
121            col[:, :, h, w, :, :] = input_padded[:, :, h:h_max:stride, w:w_max:stride]
122
123    # (N, C, K_h, K_w, H_out, W_out) โ†’ (N, C*K_h*K_w, H_out*W_out)
124    col = col.transpose(0, 1, 2, 3, 4, 5).reshape(N, C * K_h * K_w, H_out * W_out)
125
126    return col
127
128
129def col2im(
130    col: np.ndarray,
131    input_shape: Tuple[int, int, int, int],
132    kernel_size: Tuple[int, int],
133    stride: int = 1,
134    padding: int = 0
135) -> np.ndarray:
136    """
137    col2im: im2col์˜ ์—ญ์—ฐ์‚ฐ
138
139    Backward pass์—์„œ gradient๋ฅผ ์›๋ž˜ ์ด๋ฏธ์ง€ ํ˜•ํƒœ๋กœ ๋ณต์›
140
141    Args:
142        col: (N, C * K_h * K_w, H_out * W_out)
143        input_shape: (N, C, H, W) ์›๋ณธ ์ž…๋ ฅ shape
144        kernel_size: (K_h, K_w)
145        stride: ์ŠคํŠธ๋ผ์ด๋“œ
146        padding: ํŒจ๋”ฉ
147
148    Returns:
149        input_grad: (N, C, H, W)
150    """
151    N, C, H, W = input_shape
152    K_h, K_w = kernel_size
153
154    H_padded = H + 2 * padding
155    W_padded = W + 2 * padding
156    H_out = (H_padded - K_h) // stride + 1
157    W_out = (W_padded - K_w) // stride + 1
158
159    # col reshape: (N, C*K_h*K_w, H_out*W_out) โ†’ (N, C, K_h, K_w, H_out, W_out)
160    col = col.reshape(N, C, K_h, K_w, H_out, W_out)
161
162    # ์ถœ๋ ฅ ๋ฐฐ์—ด (ํŒจ๋”ฉ ํฌํ•จ)
163    input_padded = np.zeros((N, C, H_padded, W_padded))
164
165    # ๋ˆ„์  (stride ์œ„์น˜์— ๊ฐ’ ๋”ํ•˜๊ธฐ)
166    for h in range(K_h):
167        h_max = h + stride * H_out
168        for w in range(K_w):
169            w_max = w + stride * W_out
170            input_padded[:, :, h:h_max:stride, w:w_max:stride] += col[:, :, h, w, :, :]
171
172    # ํŒจ๋”ฉ ์ œ๊ฑฐ
173    if padding > 0:
174        return input_padded[:, :, padding:-padding, padding:-padding]
175    return input_padded
176
177
178def conv2d_im2col(
179    input: np.ndarray,
180    kernel: np.ndarray,
181    bias: Optional[np.ndarray] = None,
182    stride: int = 1,
183    padding: int = 0
184) -> np.ndarray:
185    """
186    im2col์„ ์‚ฌ์šฉํ•œ ํšจ์œจ์ ์ธ Convolution
187
188    ์—ฐ์‚ฐ: Y = W ยท col(X) + b
189    """
190    N, C_in, H, W = input.shape
191    C_out, _, K_h, K_w = kernel.shape
192
193    # im2col ๋ณ€ํ™˜
194    col = im2col(input, (K_h, K_w), stride, padding)  # (N, C_in*K_h*K_w, H_out*W_out)
195
196    # ์ปค๋„์„ ํ–‰๋ ฌ๋กœ ๋ณ€ํ™˜
197    kernel_mat = kernel.reshape(C_out, -1)  # (C_out, C_in*K_h*K_w)
198
199    # ํ–‰๋ ฌ ๊ณฑ์…ˆ
200    H_out = (H + 2 * padding - K_h) // stride + 1
201    W_out = (W + 2 * padding - K_w) // stride + 1
202
203    # (C_out, C_in*K_h*K_w) @ (N, C_in*K_h*K_w, H_out*W_out)
204    # โ†’ (N, C_out, H_out*W_out)
205    output = np.zeros((N, C_out, H_out * W_out))
206    for n in range(N):
207        output[n] = kernel_mat @ col[n]
208
209    # Reshape
210    output = output.reshape(N, C_out, H_out, W_out)
211
212    # ํŽธํ–ฅ
213    if bias is not None:
214        output += bias.reshape(1, -1, 1, 1)
215
216    return output
217
218
219class Conv2dNumpy:
220    """
221    NumPy Convolution ๋ ˆ์ด์–ด (ํ•™์Šต ๊ฐ€๋Šฅ)
222
223    forward/backward ๋ชจ๋‘ ๊ตฌํ˜„
224    """
225
226    def __init__(
227        self,
228        in_channels: int,
229        out_channels: int,
230        kernel_size: int,
231        stride: int = 1,
232        padding: int = 0
233    ):
234        self.in_channels = in_channels
235        self.out_channels = out_channels
236        self.kernel_size = kernel_size
237        self.stride = stride
238        self.padding = padding
239
240        # Kaiming (He) ์ดˆ๊ธฐํ™”
241        scale = np.sqrt(2.0 / (in_channels * kernel_size * kernel_size))
242        self.weight = np.random.randn(
243            out_channels, in_channels, kernel_size, kernel_size
244        ) * scale
245        self.bias = np.zeros(out_channels)
246
247        # Gradient ์ €์žฅ
248        self.weight_grad = None
249        self.bias_grad = None
250
251        # Backward๋ฅผ ์œ„ํ•œ ์บ์‹œ
252        self.cache = {}
253
254    def forward(self, input: np.ndarray) -> np.ndarray:
255        """Forward pass"""
256        N, C, H, W = input.shape
257
258        # im2col
259        col = im2col(input, (self.kernel_size, self.kernel_size),
260                     self.stride, self.padding)
261
262        # ์บ์‹œ ์ €์žฅ
263        self.cache['input_shape'] = input.shape
264        self.cache['col'] = col
265
266        # ํ–‰๋ ฌ ๊ณฑ์…ˆ
267        kernel_mat = self.weight.reshape(self.out_channels, -1)
268
269        H_out = (H + 2 * self.padding - self.kernel_size) // self.stride + 1
270        W_out = (W + 2 * self.padding - self.kernel_size) // self.stride + 1
271
272        output = np.zeros((N, self.out_channels, H_out * W_out))
273        for n in range(N):
274            output[n] = kernel_mat @ col[n]
275
276        output = output.reshape(N, self.out_channels, H_out, W_out)
277        output += self.bias.reshape(1, -1, 1, 1)
278
279        return output
280
281    def backward(self, grad_output: np.ndarray) -> np.ndarray:
282        """
283        Backward pass
284
285        Args:
286            grad_output: โˆ‚L/โˆ‚Y (N, C_out, H_out, W_out)
287
288        Returns:
289            grad_input: โˆ‚L/โˆ‚X (N, C_in, H, W)
290        """
291        N, C_out, H_out, W_out = grad_output.shape
292        input_shape = self.cache['input_shape']
293        col = self.cache['col']
294
295        # Bias gradient: โˆ‚L/โˆ‚b = ฮฃ โˆ‚L/โˆ‚Y
296        self.bias_grad = np.sum(grad_output, axis=(0, 2, 3))
297
298        # grad_output์„ ํ–‰๋ ฌ๋กœ ๋ณ€ํ™˜
299        grad_output_mat = grad_output.reshape(N, C_out, -1)  # (N, C_out, H_out*W_out)
300
301        # Weight gradient: โˆ‚L/โˆ‚W = โˆ‚L/โˆ‚Y ยท col(X)^T
302        kernel_mat = self.weight.reshape(self.out_channels, -1)
303        self.weight_grad = np.zeros_like(kernel_mat)
304
305        for n in range(N):
306            self.weight_grad += grad_output_mat[n] @ col[n].T
307
308        self.weight_grad = self.weight_grad.reshape(self.weight.shape)
309
310        # Input gradient: โˆ‚L/โˆ‚X = col2im(W^T ยท โˆ‚L/โˆ‚Y)
311        grad_col = np.zeros_like(col)
312        for n in range(N):
313            grad_col[n] = kernel_mat.T @ grad_output_mat[n]
314
315        grad_input = col2im(
316            grad_col, input_shape,
317            (self.kernel_size, self.kernel_size),
318            self.stride, self.padding
319        )
320
321        return grad_input
322
323    def update(self, lr: float):
324        """๊ฐ€์ค‘์น˜ ์—…๋ฐ์ดํŠธ"""
325        self.weight -= lr * self.weight_grad
326        self.bias -= lr * self.bias_grad
327
328
329# ํ…Œ์ŠคํŠธ
330if __name__ == "__main__":
331    np.random.seed(42)
332
333    # ํ…Œ์ŠคํŠธ ์ž…๋ ฅ
334    N, C_in, H, W = 2, 3, 8, 8
335    C_out, K = 4, 3
336
337    input = np.random.randn(N, C_in, H, W)
338    kernel = np.random.randn(C_out, C_in, K, K)
339    bias = np.random.randn(C_out)
340
341    # Naive vs im2col ๋น„๊ต
342    output_naive = conv2d_naive(input, kernel, bias, stride=1, padding=1)
343    output_im2col = conv2d_im2col(input, kernel, bias, stride=1, padding=1)
344
345    print("Output shape:", output_naive.shape)
346    print("Naive vs im2col ์ฐจ์ด:", np.max(np.abs(output_naive - output_im2col)))
347
348    # Conv2dNumpy ํ…Œ์ŠคํŠธ
349    conv = Conv2dNumpy(C_in, C_out, K, stride=1, padding=1)
350    output = conv.forward(input)
351    print("\nConv2dNumpy output shape:", output.shape)
352
353    # Backward ํ…Œ์ŠคํŠธ
354    grad_output = np.random.randn(*output.shape)
355    grad_input = conv.backward(grad_output)
356    print("Grad input shape:", grad_input.shape)
357    print("Weight grad shape:", conv.weight_grad.shape)
358
359    # Gradient check
360    def numerical_gradient(f, x, h=1e-5):
361        """์ˆ˜์น˜ ๋ฏธ๋ถ„์œผ๋กœ gradient ๊ฒ€์ฆ"""
362        grad = np.zeros_like(x)
363        it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
364
365        while not it.finished:
366            idx = it.multi_index
367            old_val = x[idx]
368
369            x[idx] = old_val + h
370            fxh1 = f()
371
372            x[idx] = old_val - h
373            fxh2 = f()
374
375            grad[idx] = (fxh1 - fxh2) / (2 * h)
376            x[idx] = old_val
377
378            it.iternext()
379
380        return grad
381
382    print("\n=== Gradient Check ===")
383
384    # ์ž‘์€ ์ž…๋ ฅ์œผ๋กœ gradient check
385    small_input = np.random.randn(1, 2, 4, 4)
386    small_conv = Conv2dNumpy(2, 2, 3, stride=1, padding=1)
387
388    def loss_fn():
389        out = small_conv.forward(small_input)
390        return np.sum(out ** 2)
391
392    # Analytical gradient
393    output = small_conv.forward(small_input)
394    grad_output = 2 * output  # d(sum(x^2))/dx = 2x
395    grad_input = small_conv.backward(grad_output)
396
397    # Numerical gradient (์ž…๋ ฅ์— ๋Œ€ํ•ด)
398    num_grad = numerical_gradient(loss_fn, small_input)
399
400    print("Input gradient check:")
401    print(f"  Max diff: {np.max(np.abs(grad_input - num_grad)):.2e}")
402
403    # Weight gradient check
404    def loss_fn_weight():
405        out = small_conv.forward(small_input)
406        return np.sum(out ** 2)
407
408    num_grad_weight = numerical_gradient(loss_fn_weight, small_conv.weight)
409
410    # Backward๋กœ ๊ณ„์‚ฐ
411    output = small_conv.forward(small_input)
412    small_conv.backward(2 * output)
413
414    print("Weight gradient check:")
415    print(f"  Max diff: {np.max(np.abs(small_conv.weight_grad - num_grad_weight)):.2e}")