1"""
205. ํฉ์ฑ๊ณฑ ์ดํด - NumPy ๋ฒ์ (๊ต์ก์ฉ)
3
4ํฉ์ฑ๊ณฑ ์ฐ์ฐ์ ์๋ฆฌ๋ฅผ NumPy๋ก ์ดํดํฉ๋๋ค.
5์ค์ CNN ํ์ต์๋ PyTorch๋ฅผ ์ฌ์ฉํ์ธ์!
6
7์ด ํ์ผ์ ํฉ์ฑ๊ณฑ์ด ์ด๋ป๊ฒ ๋์ํ๋์ง ์ดํดํ๊ธฐ ์ํ ๊ฒ์
๋๋ค.
8"""
9
10import numpy as np
11import matplotlib.pyplot as plt
12
13print("=" * 60)
14print("NumPy ํฉ์ฑ๊ณฑ ์ดํด (๊ต์ก์ฉ)")
15print("=" * 60)
16
17
18# ============================================
19# 1. ๊ธฐ๋ณธ 2D ํฉ์ฑ๊ณฑ
20# ============================================
21print("\n[1] ๊ธฐ๋ณธ 2D ํฉ์ฑ๊ณฑ")
22print("-" * 40)
23
24def conv2d_basic(image, kernel):
25 """
26 ๊ฐ์ฅ ๊ธฐ๋ณธ์ ์ธ 2D ํฉ์ฑ๊ณฑ ๊ตฌํ
27
28 Args:
29 image: 2D ๋ฐฐ์ด (H, W)
30 kernel: 2D ๋ฐฐ์ด (kH, kW)
31
32 Returns:
33 ์ถ๋ ฅ (H-kH+1, W-kW+1)
34 """
35 h, w = image.shape
36 kh, kw = kernel.shape
37 oh, ow = h - kh + 1, w - kw + 1
38
39 output = np.zeros((oh, ow))
40
41 for i in range(oh):
42 for j in range(ow):
43 # ์์ญ ์ถ์ถ
44 region = image[i:i+kh, j:j+kw]
45 # ์์๋ณ ๊ณฑ์
ํ ํฉ์ฐ
46 output[i, j] = np.sum(region * kernel)
47
48 return output
49
50# ํ
์คํธ
51image = np.array([
52 [1, 2, 3, 0],
53 [0, 1, 2, 3],
54 [3, 0, 1, 2],
55 [2, 3, 0, 1]
56], dtype=float)
57
58kernel = np.array([
59 [1, 0],
60 [0, -1]
61], dtype=float)
62
63output = conv2d_basic(image, kernel)
64print(f"์
๋ ฅ ์ด๋ฏธ์ง (4ร4):\n{image}")
65print(f"\n์ปค๋ (2ร2):\n{kernel}")
66print(f"\n์ถ๋ ฅ (3ร3):\n{output}")
67print(f"\n์์ ๊ณ์ฐ (์ข์๋จ):")
68print(f" {image[0,0]}ร{kernel[0,0]} + {image[0,1]}ร{kernel[0,1]} + {image[1,0]}ร{kernel[1,0]} + {image[1,1]}ร{kernel[1,1]}")
69print(f" = 1ร1 + 2ร0 + 0ร0 + 1ร(-1) = 0")
70
71
72# ============================================
73# 2. ํจ๋ฉ๊ณผ ์คํธ๋ผ์ด๋
74# ============================================
75print("\n[2] ํจ๋ฉ๊ณผ ์คํธ๋ผ์ด๋")
76print("-" * 40)
77
78def conv2d_with_padding(image, kernel, padding=0, stride=1):
79 """ํจ๋ฉ๊ณผ ์คํธ๋ผ์ด๋๋ฅผ ์ง์ํ๋ ํฉ์ฑ๊ณฑ"""
80 # ํจ๋ฉ ์ ์ฉ
81 if padding > 0:
82 image = np.pad(image, padding, mode='constant', constant_values=0)
83
84 h, w = image.shape
85 kh, kw = kernel.shape
86 oh = (h - kh) // stride + 1
87 ow = (w - kw) // stride + 1
88
89 output = np.zeros((oh, ow))
90
91 for i in range(oh):
92 for j in range(ow):
93 si, sj = i * stride, j * stride
94 region = image[si:si+kh, sj:sj+kw]
95 output[i, j] = np.sum(region * kernel)
96
97 return output
98
99# ํ
์คํธ
100image = np.ones((4, 4))
101kernel = np.ones((3, 3))
102
103print("์
๋ ฅ: 4ร4, ์ปค๋: 3ร3")
104for p in [0, 1]:
105 for s in [1, 2]:
106 out = conv2d_with_padding(image, kernel, padding=p, stride=s)
107 print(f" padding={p}, stride={s} โ ์ถ๋ ฅ: {out.shape}")
108
109
110# ============================================
111# 3. ์์ง ๊ฒ์ถ ํํฐ
112# ============================================
113print("\n[3] ์์ง ๊ฒ์ถ ํํฐ")
114print("-" * 40)
115
116# ์ํ ์ด๋ฏธ์ง ์์ฑ
117def create_sample_image():
118 """๊ฐ๋จํ ํจํด ์ด๋ฏธ์ง ์์ฑ"""
119 img = np.zeros((8, 8))
120 img[2:6, 2:6] = 1 # ์ค์ ์ฌ๊ฐํ
121 return img
122
123image = create_sample_image()
124
125# ์์ง ๊ฒ์ถ ํํฐ๋ค
126sobel_x = np.array([[-1, 0, 1],
127 [-2, 0, 2],
128 [-1, 0, 1]])
129
130sobel_y = np.array([[-1, -2, -1],
131 [ 0, 0, 0],
132 [ 1, 2, 1]])
133
134laplacian = np.array([[0, 1, 0],
135 [1, -4, 1],
136 [0, 1, 0]])
137
138# ํํฐ ์ ์ฉ
139edge_x = conv2d_with_padding(image, sobel_x, padding=1)
140edge_y = conv2d_with_padding(image, sobel_y, padding=1)
141edge_laplace = conv2d_with_padding(image, laplacian, padding=1)
142
143# ์๊ฐํ
144fig, axes = plt.subplots(2, 3, figsize=(12, 8))
145axes[0, 0].imshow(image, cmap='gray')
146axes[0, 0].set_title('Original')
147axes[0, 1].imshow(sobel_x, cmap='RdBu')
148axes[0, 1].set_title('Sobel X Filter')
149axes[0, 2].imshow(sobel_y, cmap='RdBu')
150axes[0, 2].set_title('Sobel Y Filter')
151axes[1, 0].imshow(edge_x, cmap='gray')
152axes[1, 0].set_title('Sobel X Edge')
153axes[1, 1].imshow(edge_y, cmap='gray')
154axes[1, 1].set_title('Sobel Y Edge')
155axes[1, 2].imshow(edge_laplace, cmap='gray')
156axes[1, 2].set_title('Laplacian Edge')
157
158for ax in axes.flat:
159 ax.axis('off')
160
161plt.tight_layout()
162plt.savefig('numpy_edge_detection.png', dpi=100)
163plt.close()
164print("์์ง ๊ฒ์ถ ์ ์ฅ: numpy_edge_detection.png")
165
166
167# ============================================
168# 4. ํ๋ง ์ฐ์ฐ
169# ============================================
170print("\n[4] ํ๋ง ์ฐ์ฐ")
171print("-" * 40)
172
173def max_pool2d(image, pool_size=2, stride=2):
174 """Max Pooling ๊ตฌํ"""
175 h, w = image.shape
176 oh = (h - pool_size) // stride + 1
177 ow = (w - pool_size) // stride + 1
178
179 output = np.zeros((oh, ow))
180
181 for i in range(oh):
182 for j in range(ow):
183 si, sj = i * stride, j * stride
184 region = image[si:si+pool_size, sj:sj+pool_size]
185 output[i, j] = np.max(region)
186
187 return output
188
189def avg_pool2d(image, pool_size=2, stride=2):
190 """Average Pooling ๊ตฌํ"""
191 h, w = image.shape
192 oh = (h - pool_size) // stride + 1
193 ow = (w - pool_size) // stride + 1
194
195 output = np.zeros((oh, ow))
196
197 for i in range(oh):
198 for j in range(ow):
199 si, sj = i * stride, j * stride
200 region = image[si:si+pool_size, sj:sj+pool_size]
201 output[i, j] = np.mean(region)
202
203 return output
204
205# ํ
์คํธ
206image = np.array([
207 [1, 2, 3, 4],
208 [5, 6, 7, 8],
209 [9, 10, 11, 12],
210 [13, 14, 15, 16]
211], dtype=float)
212
213print(f"์
๋ ฅ:\n{image}")
214print(f"\nMax Pooling (2ร2):\n{max_pool2d(image)}")
215print(f"\nAvg Pooling (2ร2):\n{avg_pool2d(image)}")
216
217
218# ============================================
219# 5. ๋ค์ฑ๋ ํฉ์ฑ๊ณฑ
220# ============================================
221print("\n[5] ๋ค์ฑ๋ ํฉ์ฑ๊ณฑ")
222print("-" * 40)
223
224def conv2d_multichannel(image, kernels, bias=0):
225 """
226 ๋ค์ฑ๋ ํฉ์ฑ๊ณฑ (RGB ์ด๋ฏธ์ง ๋ฑ)
227
228 Args:
229 image: (C, H, W) - C๊ฐ ์ฑ๋
230 kernels: (C, kH, kW) - ๊ฐ ์ฑ๋์ฉ ์ปค๋
231 bias: ํธํฅ
232
233 Returns:
234 ์ถ๋ ฅ: (H-kH+1, W-kW+1)
235 """
236 c, h, w = image.shape
237 _, kh, kw = kernels.shape
238 oh, ow = h - kh + 1, w - kw + 1
239
240 output = np.zeros((oh, ow))
241
242 # ๊ฐ ์ฑ๋์ ๋ํด ํฉ์ฑ๊ณฑ ํ ํฉ์ฐ
243 for ch in range(c):
244 output += conv2d_basic(image[ch], kernels[ch])
245
246 return output + bias
247
248# RGB ์ด๋ฏธ์ง ์์
249rgb_image = np.random.rand(3, 8, 8) # (C, H, W)
250kernels = np.random.rand(3, 3, 3) # (C, kH, kW)
251
252output = conv2d_multichannel(rgb_image, kernels)
253print(f"์
๋ ฅ: {rgb_image.shape} (3์ฑ๋)")
254print(f"์ปค๋: {kernels.shape} (์ฑ๋๋ณ 3ร3)")
255print(f"์ถ๋ ฅ: {output.shape}")
256
257
258# ============================================
259# 6. ์ฌ๋ฌ ํํฐ ์ ์ฉ
260# ============================================
261print("\n[6] ์ฌ๋ฌ ํํฐ ์ ์ฉ")
262print("-" * 40)
263
264def conv2d_layer(image, filters, biases):
265 """
266 Conv ์ธต ์๋ฎฌ๋ ์ด์
267
268 Args:
269 image: (C_in, H, W)
270 filters: (C_out, C_in, kH, kW)
271 biases: (C_out,)
272
273 Returns:
274 ์ถ๋ ฅ: (C_out, oH, oW)
275 """
276 c_out, c_in, kh, kw = filters.shape
277 _, h, w = image.shape
278 oh, ow = h - kh + 1, w - kw + 1
279
280 output = np.zeros((c_out, oh, ow))
281
282 for f in range(c_out):
283 output[f] = conv2d_multichannel(image, filters[f], biases[f])
284
285 return output
286
287# ์์: 3์ฑ๋ ์
๋ ฅ โ 8์ฑ๋ ์ถ๋ ฅ
288image = np.random.rand(3, 16, 16)
289filters = np.random.rand(8, 3, 3, 3) # 8๊ฐ ํํฐ
290biases = np.zeros(8)
291
292output = conv2d_layer(image, filters, biases)
293print(f"์
๋ ฅ: {image.shape}")
294print(f"ํํฐ: {filters.shape}")
295print(f"์ถ๋ ฅ: {output.shape}")
296
297
298# ============================================
299# 7. CNN ์์ ํ ์๋ฎฌ๋ ์ด์
300# ============================================
301print("\n[7] CNN ์์ ํ ์๋ฎฌ๋ ์ด์
")
302print("-" * 40)
303
304def relu(x):
305 return np.maximum(0, x)
306
307def simple_cnn_forward(image):
308 """
309 ๊ฐ๋จํ CNN ์์ ํ
310
311 ์
๋ ฅ (1, 8, 8) โ Conv (2, 6, 6) โ Pool (2, 3, 3) โ FC โ ์ถ๋ ฅ
312 """
313 # Conv1: 1โ2 ์ฑ๋, 3ร3 ์ปค๋
314 filters1 = np.random.randn(2, 1, 3, 3) * 0.5
315 biases1 = np.zeros(2)
316
317 conv1_out = conv2d_layer(image, filters1, biases1)
318 relu1_out = relu(conv1_out)
319 print(f" Conv1 ํ: {relu1_out.shape}")
320
321 # MaxPool: 2ร2
322 pool_out = np.zeros((2, 3, 3))
323 for c in range(2):
324 pool_out[c] = max_pool2d(relu1_out[c], 2, 2)
325 print(f" Pool ํ: {pool_out.shape}")
326
327 # Flatten
328 flat = pool_out.flatten()
329 print(f" Flatten: {flat.shape}")
330
331 # FC
332 fc_weights = np.random.randn(10, 18) * 0.5
333 fc_bias = np.zeros(10)
334 output = fc_weights @ flat + fc_bias
335 print(f" FC ์ถ๋ ฅ: {output.shape}")
336
337 return output
338
339# ํ
์คํธ
340image = np.random.rand(1, 8, 8)
341print(f"์
๋ ฅ: {image.shape}")
342output = simple_cnn_forward(image)
343
344
345# ============================================
346# ์ PyTorch๋ฅผ ์ฌ์ฉํด์ผ ํ๋๊ฐ?
347# ============================================
348print("\n" + "=" * 60)
349print("NumPy CNN์ ํ๊ณ")
350print("=" * 60)
351
352limitations = """
353NumPy ๊ตฌํ์ ๋ฌธ์ ์ :
354
3551. ์๋
356 - ์์ Python ๋ฃจํ๋ ๋งค์ฐ ๋๋ฆผ
357 - 28ร28 MNIST๋ ์์ฒ ๋ฐฐ ๋๋ฆผ
358 - GPU ๊ฐ์ ๋ถ๊ฐ๋ฅ
359
3602. ์ญ์ ํ
361 - ํฉ์ฑ๊ณฑ ์ญ์ ํ ๊ตฌํ์ด ๋ณต์ก
362 - im2col ๋ฑ ์ต์ ํ ํ์
363 - ์ค์ํ๊ธฐ ์ฌ์
364
3653. ๋ฉ๋ชจ๋ฆฌ
366 - ๋นํจ์จ์ ์ธ ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ
367 - ๋ฐฐ์น ์ฒ๋ฆฌ ์ด๋ ค์
368
3694. ๊ธฐ๋ฅ
370 - BatchNorm, Dropout ๊ตฌํ ๋ณต์ก
371 - ๋ค์ํ ์ธต/์ฐ์ฐ ๋ถ์กฑ
372
373PyTorch ์ฌ์ฉ ์ด์ :
374 โ cuDNN์ผ๋ก ์ต์ ํ๋ ํฉ์ฑ๊ณฑ
375 โ ์๋ ๋ฏธ๋ถ (์ญ์ ํ ์๋)
376 โ GPU ์ง์
377 โ ํ๋ถํ ๋ ์ด์ด/ํจ์ ์ ๊ณต
378"""
379print(limitations)
380
381
382# ============================================
383# ์ ๋ฆฌ
384# ============================================
385print("=" * 60)
386print("ํฉ์ฑ๊ณฑ ํต์ฌ ์ ๋ฆฌ")
387print("=" * 60)
388
389summary = """
390ํฉ์ฑ๊ณฑ ์ฐ์ฐ:
391 output[i,j] = ฮฃ input[i+m, j+n] ร kernel[m, n]
392
393์ถ๋ ฅ ํฌ๊ธฐ:
394 output_size = (input - kernel + 2รpadding) / stride + 1
395
396ํ๋ง:
397 - MaxPool: ์์ญ ๋ด ์ต๋๊ฐ ์ ํ
398 - AvgPool: ์์ญ ๋ด ํ๊ท
399
400๋ค์ฑ๋:
401 - ๊ฐ ์ฑ๋์ ๋ณ๋ ์ปค๋ ์ ์ฉ ํ ํฉ์ฐ
402 - ์ฌ๋ฌ ํํฐ = ์ฌ๋ฌ ์ถ๋ ฅ ์ฑ๋
403
404ํ์ต:
405 - ์ปค๋์ ๊ฐ์ค์น๊ฐ ํ์ต๋จ
406 - ์ญ์ ํ๋ก ์ต์ ํ
407
408NumPy๋ก ๋ฐฐ์ด ๊ฒ:
409 1. ํฉ์ฑ๊ณฑ์ ์ํ์ ์ ์
410 2. ํจ๋ฉ๊ณผ ์คํธ๋ผ์ด๋์ ํจ๊ณผ
411 3. ํ๋ง์ ๋์ ์๋ฆฌ
412 4. ๋ค์ฑ๋ ์ฒ๋ฆฌ ๋ฐฉ์
413
414์ค์ ์์๋ PyTorch!
415"""
416print(summary)
417print("=" * 60)