1"""
2NumPy๋ก ๊ตฌํํ Convolution ์ฐ์ฐ
3
4์ด ํ์ผ์์๋ Convolution์ forward/backward๋ฅผ ์์ NumPy๋ก ๊ตฌํํฉ๋๋ค.
5"""
6
7import numpy as np
8from typing import Tuple, Optional
9
10
11def conv2d_naive(
12 input: np.ndarray,
13 kernel: np.ndarray,
14 bias: Optional[np.ndarray] = None,
15 stride: int = 1,
16 padding: int = 0
17) -> np.ndarray:
18 """
19 2D Convolution (naive implementation with loops)
20
21 Args:
22 input: (N, C_in, H, W) - ๋ฐฐ์น ์
๋ ฅ
23 kernel: (C_out, C_in, K_h, K_w) - ํํฐ
24 bias: (C_out,) - ํธํฅ
25 stride: ์คํธ๋ผ์ด๋
26 padding: ํจ๋ฉ
27
28 Returns:
29 output: (N, C_out, H_out, W_out)
30 """
31 N, C_in, H, W = input.shape
32 C_out, _, K_h, K_w = kernel.shape
33
34 # ํจ๋ฉ ์ ์ฉ
35 if padding > 0:
36 input_padded = np.pad(
37 input,
38 ((0, 0), (0, 0), (padding, padding), (padding, padding)),
39 mode='constant'
40 )
41 else:
42 input_padded = input
43
44 # ์ถ๋ ฅ ํฌ๊ธฐ ๊ณ์ฐ
45 H_out = (H + 2 * padding - K_h) // stride + 1
46 W_out = (W + 2 * padding - K_w) // stride + 1
47
48 output = np.zeros((N, C_out, H_out, W_out))
49
50 # Convolution ์ฐ์ฐ (6์ค ๋ฃจํ - ๋งค์ฐ ๋๋ฆผ)
51 for n in range(N): # ๋ฐฐ์น
52 for c_out in range(C_out): # ์ถ๋ ฅ ์ฑ๋
53 for h in range(H_out): # ์ถ๋ ฅ ๋์ด
54 for w in range(W_out): # ์ถ๋ ฅ ๋๋น
55 # ์์ฉ ์์ญ
56 h_start = h * stride
57 h_end = h_start + K_h
58 w_start = w * stride
59 w_end = w_start + K_w
60
61 # ์์ฉ ์์ญ๊ณผ ์ปค๋์ element-wise ๊ณฑ์ ํฉ
62 receptive_field = input_padded[n, :, h_start:h_end, w_start:w_end]
63 output[n, c_out, h, w] = np.sum(receptive_field * kernel[c_out])
64
65 # ํธํฅ ์ถ๊ฐ
66 if bias is not None:
67 output += bias.reshape(1, -1, 1, 1)
68
69 return output
70
71
72def im2col(
73 input: np.ndarray,
74 kernel_size: Tuple[int, int],
75 stride: int = 1,
76 padding: int = 0
77) -> np.ndarray:
78 """
79 im2col: ์ด๋ฏธ์ง๋ฅผ ํ๋ ฌ๋ก ๋ณํ (ํจ์จ์ ์ธ convolution์ ์ํด)
80
81 Convolution์ ํ๋ ฌ ๊ณฑ์
์ผ๋ก ๋ณํ:
82 - ๊ฐ ์์ฉ ์์ญ์ ์ด ๋ฒกํฐ๋ก ๋ณํ
83 - ์ปค๋์ ํ ๋ฒกํฐ๋ก ๋ณํ
84 - ํ๋ ฌ ๊ณฑ์
์ผ๋ก convolution ์ํ
85
86 Args:
87 input: (N, C, H, W)
88 kernel_size: (K_h, K_w)
89 stride: ์คํธ๋ผ์ด๋
90 padding: ํจ๋ฉ
91
92 Returns:
93 col: (N, C * K_h * K_w, H_out * W_out)
94 """
95 N, C, H, W = input.shape
96 K_h, K_w = kernel_size
97
98 # ํจ๋ฉ
99 if padding > 0:
100 input_padded = np.pad(
101 input,
102 ((0, 0), (0, 0), (padding, padding), (padding, padding)),
103 mode='constant'
104 )
105 else:
106 input_padded = input
107
108 H_padded, W_padded = input_padded.shape[2], input_padded.shape[3]
109
110 # ์ถ๋ ฅ ํฌ๊ธฐ
111 H_out = (H_padded - K_h) // stride + 1
112 W_out = (W_padded - K_w) // stride + 1
113
114 # im2col ํ๋ ฌ
115 col = np.zeros((N, C, K_h, K_w, H_out, W_out))
116
117 for h in range(K_h):
118 h_max = h + stride * H_out
119 for w in range(K_w):
120 w_max = w + stride * W_out
121 col[:, :, h, w, :, :] = input_padded[:, :, h:h_max:stride, w:w_max:stride]
122
123 # (N, C, K_h, K_w, H_out, W_out) โ (N, C*K_h*K_w, H_out*W_out)
124 col = col.transpose(0, 1, 2, 3, 4, 5).reshape(N, C * K_h * K_w, H_out * W_out)
125
126 return col
127
128
129def col2im(
130 col: np.ndarray,
131 input_shape: Tuple[int, int, int, int],
132 kernel_size: Tuple[int, int],
133 stride: int = 1,
134 padding: int = 0
135) -> np.ndarray:
136 """
137 col2im: im2col์ ์ญ์ฐ์ฐ
138
139 Backward pass์์ gradient๋ฅผ ์๋ ์ด๋ฏธ์ง ํํ๋ก ๋ณต์
140
141 Args:
142 col: (N, C * K_h * K_w, H_out * W_out)
143 input_shape: (N, C, H, W) ์๋ณธ ์
๋ ฅ shape
144 kernel_size: (K_h, K_w)
145 stride: ์คํธ๋ผ์ด๋
146 padding: ํจ๋ฉ
147
148 Returns:
149 input_grad: (N, C, H, W)
150 """
151 N, C, H, W = input_shape
152 K_h, K_w = kernel_size
153
154 H_padded = H + 2 * padding
155 W_padded = W + 2 * padding
156 H_out = (H_padded - K_h) // stride + 1
157 W_out = (W_padded - K_w) // stride + 1
158
159 # col reshape: (N, C*K_h*K_w, H_out*W_out) โ (N, C, K_h, K_w, H_out, W_out)
160 col = col.reshape(N, C, K_h, K_w, H_out, W_out)
161
162 # ์ถ๋ ฅ ๋ฐฐ์ด (ํจ๋ฉ ํฌํจ)
163 input_padded = np.zeros((N, C, H_padded, W_padded))
164
165 # ๋์ (stride ์์น์ ๊ฐ ๋ํ๊ธฐ)
166 for h in range(K_h):
167 h_max = h + stride * H_out
168 for w in range(K_w):
169 w_max = w + stride * W_out
170 input_padded[:, :, h:h_max:stride, w:w_max:stride] += col[:, :, h, w, :, :]
171
172 # ํจ๋ฉ ์ ๊ฑฐ
173 if padding > 0:
174 return input_padded[:, :, padding:-padding, padding:-padding]
175 return input_padded
176
177
178def conv2d_im2col(
179 input: np.ndarray,
180 kernel: np.ndarray,
181 bias: Optional[np.ndarray] = None,
182 stride: int = 1,
183 padding: int = 0
184) -> np.ndarray:
185 """
186 im2col์ ์ฌ์ฉํ ํจ์จ์ ์ธ Convolution
187
188 ์ฐ์ฐ: Y = W ยท col(X) + b
189 """
190 N, C_in, H, W = input.shape
191 C_out, _, K_h, K_w = kernel.shape
192
193 # im2col ๋ณํ
194 col = im2col(input, (K_h, K_w), stride, padding) # (N, C_in*K_h*K_w, H_out*W_out)
195
196 # ์ปค๋์ ํ๋ ฌ๋ก ๋ณํ
197 kernel_mat = kernel.reshape(C_out, -1) # (C_out, C_in*K_h*K_w)
198
199 # ํ๋ ฌ ๊ณฑ์
200 H_out = (H + 2 * padding - K_h) // stride + 1
201 W_out = (W + 2 * padding - K_w) // stride + 1
202
203 # (C_out, C_in*K_h*K_w) @ (N, C_in*K_h*K_w, H_out*W_out)
204 # โ (N, C_out, H_out*W_out)
205 output = np.zeros((N, C_out, H_out * W_out))
206 for n in range(N):
207 output[n] = kernel_mat @ col[n]
208
209 # Reshape
210 output = output.reshape(N, C_out, H_out, W_out)
211
212 # ํธํฅ
213 if bias is not None:
214 output += bias.reshape(1, -1, 1, 1)
215
216 return output
217
218
219class Conv2dNumpy:
220 """
221 NumPy Convolution ๋ ์ด์ด (ํ์ต ๊ฐ๋ฅ)
222
223 forward/backward ๋ชจ๋ ๊ตฌํ
224 """
225
226 def __init__(
227 self,
228 in_channels: int,
229 out_channels: int,
230 kernel_size: int,
231 stride: int = 1,
232 padding: int = 0
233 ):
234 self.in_channels = in_channels
235 self.out_channels = out_channels
236 self.kernel_size = kernel_size
237 self.stride = stride
238 self.padding = padding
239
240 # Kaiming (He) ์ด๊ธฐํ
241 scale = np.sqrt(2.0 / (in_channels * kernel_size * kernel_size))
242 self.weight = np.random.randn(
243 out_channels, in_channels, kernel_size, kernel_size
244 ) * scale
245 self.bias = np.zeros(out_channels)
246
247 # Gradient ์ ์ฅ
248 self.weight_grad = None
249 self.bias_grad = None
250
251 # Backward๋ฅผ ์ํ ์บ์
252 self.cache = {}
253
254 def forward(self, input: np.ndarray) -> np.ndarray:
255 """Forward pass"""
256 N, C, H, W = input.shape
257
258 # im2col
259 col = im2col(input, (self.kernel_size, self.kernel_size),
260 self.stride, self.padding)
261
262 # ์บ์ ์ ์ฅ
263 self.cache['input_shape'] = input.shape
264 self.cache['col'] = col
265
266 # ํ๋ ฌ ๊ณฑ์
267 kernel_mat = self.weight.reshape(self.out_channels, -1)
268
269 H_out = (H + 2 * self.padding - self.kernel_size) // self.stride + 1
270 W_out = (W + 2 * self.padding - self.kernel_size) // self.stride + 1
271
272 output = np.zeros((N, self.out_channels, H_out * W_out))
273 for n in range(N):
274 output[n] = kernel_mat @ col[n]
275
276 output = output.reshape(N, self.out_channels, H_out, W_out)
277 output += self.bias.reshape(1, -1, 1, 1)
278
279 return output
280
281 def backward(self, grad_output: np.ndarray) -> np.ndarray:
282 """
283 Backward pass
284
285 Args:
286 grad_output: โL/โY (N, C_out, H_out, W_out)
287
288 Returns:
289 grad_input: โL/โX (N, C_in, H, W)
290 """
291 N, C_out, H_out, W_out = grad_output.shape
292 input_shape = self.cache['input_shape']
293 col = self.cache['col']
294
295 # Bias gradient: โL/โb = ฮฃ โL/โY
296 self.bias_grad = np.sum(grad_output, axis=(0, 2, 3))
297
298 # grad_output์ ํ๋ ฌ๋ก ๋ณํ
299 grad_output_mat = grad_output.reshape(N, C_out, -1) # (N, C_out, H_out*W_out)
300
301 # Weight gradient: โL/โW = โL/โY ยท col(X)^T
302 kernel_mat = self.weight.reshape(self.out_channels, -1)
303 self.weight_grad = np.zeros_like(kernel_mat)
304
305 for n in range(N):
306 self.weight_grad += grad_output_mat[n] @ col[n].T
307
308 self.weight_grad = self.weight_grad.reshape(self.weight.shape)
309
310 # Input gradient: โL/โX = col2im(W^T ยท โL/โY)
311 grad_col = np.zeros_like(col)
312 for n in range(N):
313 grad_col[n] = kernel_mat.T @ grad_output_mat[n]
314
315 grad_input = col2im(
316 grad_col, input_shape,
317 (self.kernel_size, self.kernel_size),
318 self.stride, self.padding
319 )
320
321 return grad_input
322
323 def update(self, lr: float):
324 """๊ฐ์ค์น ์
๋ฐ์ดํธ"""
325 self.weight -= lr * self.weight_grad
326 self.bias -= lr * self.bias_grad
327
328
329# ํ
์คํธ
330if __name__ == "__main__":
331 np.random.seed(42)
332
333 # ํ
์คํธ ์
๋ ฅ
334 N, C_in, H, W = 2, 3, 8, 8
335 C_out, K = 4, 3
336
337 input = np.random.randn(N, C_in, H, W)
338 kernel = np.random.randn(C_out, C_in, K, K)
339 bias = np.random.randn(C_out)
340
341 # Naive vs im2col ๋น๊ต
342 output_naive = conv2d_naive(input, kernel, bias, stride=1, padding=1)
343 output_im2col = conv2d_im2col(input, kernel, bias, stride=1, padding=1)
344
345 print("Output shape:", output_naive.shape)
346 print("Naive vs im2col ์ฐจ์ด:", np.max(np.abs(output_naive - output_im2col)))
347
348 # Conv2dNumpy ํ
์คํธ
349 conv = Conv2dNumpy(C_in, C_out, K, stride=1, padding=1)
350 output = conv.forward(input)
351 print("\nConv2dNumpy output shape:", output.shape)
352
353 # Backward ํ
์คํธ
354 grad_output = np.random.randn(*output.shape)
355 grad_input = conv.backward(grad_output)
356 print("Grad input shape:", grad_input.shape)
357 print("Weight grad shape:", conv.weight_grad.shape)
358
359 # Gradient check
360 def numerical_gradient(f, x, h=1e-5):
361 """์์น ๋ฏธ๋ถ์ผ๋ก gradient ๊ฒ์ฆ"""
362 grad = np.zeros_like(x)
363 it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
364
365 while not it.finished:
366 idx = it.multi_index
367 old_val = x[idx]
368
369 x[idx] = old_val + h
370 fxh1 = f()
371
372 x[idx] = old_val - h
373 fxh2 = f()
374
375 grad[idx] = (fxh1 - fxh2) / (2 * h)
376 x[idx] = old_val
377
378 it.iternext()
379
380 return grad
381
382 print("\n=== Gradient Check ===")
383
384 # ์์ ์
๋ ฅ์ผ๋ก gradient check
385 small_input = np.random.randn(1, 2, 4, 4)
386 small_conv = Conv2dNumpy(2, 2, 3, stride=1, padding=1)
387
388 def loss_fn():
389 out = small_conv.forward(small_input)
390 return np.sum(out ** 2)
391
392 # Analytical gradient
393 output = small_conv.forward(small_input)
394 grad_output = 2 * output # d(sum(x^2))/dx = 2x
395 grad_input = small_conv.backward(grad_output)
396
397 # Numerical gradient (์
๋ ฅ์ ๋ํด)
398 num_grad = numerical_gradient(loss_fn, small_input)
399
400 print("Input gradient check:")
401 print(f" Max diff: {np.max(np.abs(grad_input - num_grad)):.2e}")
402
403 # Weight gradient check
404 def loss_fn_weight():
405 out = small_conv.forward(small_input)
406 return np.sum(out ** 2)
407
408 num_grad_weight = numerical_gradient(loss_fn_weight, small_conv.weight)
409
410 # Backward๋ก ๊ณ์ฐ
411 output = small_conv.forward(small_input)
412 small_conv.backward(2 * output)
413
414 print("Weight gradient check:")
415 print(f" Max diff: {np.max(np.abs(small_conv.weight_grad - num_grad_weight)):.2e}")