05_conv_understanding.py

Download
python 418 lines 10.2 KB
  1"""
  205. ํ•ฉ์„ฑ๊ณฑ ์ดํ•ด - NumPy ๋ฒ„์ „ (๊ต์œก์šฉ)
  3
  4ํ•ฉ์„ฑ๊ณฑ ์—ฐ์‚ฐ์˜ ์›๋ฆฌ๋ฅผ NumPy๋กœ ์ดํ•ดํ•ฉ๋‹ˆ๋‹ค.
  5์‹ค์ œ CNN ํ•™์Šต์—๋Š” PyTorch๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”!
  6
  7์ด ํŒŒ์ผ์€ ํ•ฉ์„ฑ๊ณฑ์ด ์–ด๋–ป๊ฒŒ ๋™์ž‘ํ•˜๋Š”์ง€ ์ดํ•ดํ•˜๊ธฐ ์œ„ํ•œ ๊ฒƒ์ž…๋‹ˆ๋‹ค.
  8"""
  9
 10import numpy as np
 11import matplotlib.pyplot as plt
 12
 13print("=" * 60)
 14print("NumPy ํ•ฉ์„ฑ๊ณฑ ์ดํ•ด (๊ต์œก์šฉ)")
 15print("=" * 60)
 16
 17
 18# ============================================
 19# 1. ๊ธฐ๋ณธ 2D ํ•ฉ์„ฑ๊ณฑ
 20# ============================================
 21print("\n[1] ๊ธฐ๋ณธ 2D ํ•ฉ์„ฑ๊ณฑ")
 22print("-" * 40)
 23
 24def conv2d_basic(image, kernel):
 25    """
 26    ๊ฐ€์žฅ ๊ธฐ๋ณธ์ ์ธ 2D ํ•ฉ์„ฑ๊ณฑ ๊ตฌํ˜„
 27
 28    Args:
 29        image: 2D ๋ฐฐ์—ด (H, W)
 30        kernel: 2D ๋ฐฐ์—ด (kH, kW)
 31
 32    Returns:
 33        ์ถœ๋ ฅ (H-kH+1, W-kW+1)
 34    """
 35    h, w = image.shape
 36    kh, kw = kernel.shape
 37    oh, ow = h - kh + 1, w - kw + 1
 38
 39    output = np.zeros((oh, ow))
 40
 41    for i in range(oh):
 42        for j in range(ow):
 43            # ์˜์—ญ ์ถ”์ถœ
 44            region = image[i:i+kh, j:j+kw]
 45            # ์š”์†Œ๋ณ„ ๊ณฑ์…ˆ ํ›„ ํ•ฉ์‚ฐ
 46            output[i, j] = np.sum(region * kernel)
 47
 48    return output
 49
 50# ํ…Œ์ŠคํŠธ
 51image = np.array([
 52    [1, 2, 3, 0],
 53    [0, 1, 2, 3],
 54    [3, 0, 1, 2],
 55    [2, 3, 0, 1]
 56], dtype=float)
 57
 58kernel = np.array([
 59    [1, 0],
 60    [0, -1]
 61], dtype=float)
 62
 63output = conv2d_basic(image, kernel)
 64print(f"์ž…๋ ฅ ์ด๋ฏธ์ง€ (4ร—4):\n{image}")
 65print(f"\n์ปค๋„ (2ร—2):\n{kernel}")
 66print(f"\n์ถœ๋ ฅ (3ร—3):\n{output}")
 67print(f"\n์˜ˆ์‹œ ๊ณ„์‚ฐ (์ขŒ์ƒ๋‹จ):")
 68print(f"  {image[0,0]}ร—{kernel[0,0]} + {image[0,1]}ร—{kernel[0,1]} + {image[1,0]}ร—{kernel[1,0]} + {image[1,1]}ร—{kernel[1,1]}")
 69print(f"  = 1ร—1 + 2ร—0 + 0ร—0 + 1ร—(-1) = 0")
 70
 71
 72# ============================================
 73# 2. ํŒจ๋”ฉ๊ณผ ์ŠคํŠธ๋ผ์ด๋“œ
 74# ============================================
 75print("\n[2] ํŒจ๋”ฉ๊ณผ ์ŠคํŠธ๋ผ์ด๋“œ")
 76print("-" * 40)
 77
 78def conv2d_with_padding(image, kernel, padding=0, stride=1):
 79    """ํŒจ๋”ฉ๊ณผ ์ŠคํŠธ๋ผ์ด๋“œ๋ฅผ ์ง€์›ํ•˜๋Š” ํ•ฉ์„ฑ๊ณฑ"""
 80    # ํŒจ๋”ฉ ์ ์šฉ
 81    if padding > 0:
 82        image = np.pad(image, padding, mode='constant', constant_values=0)
 83
 84    h, w = image.shape
 85    kh, kw = kernel.shape
 86    oh = (h - kh) // stride + 1
 87    ow = (w - kw) // stride + 1
 88
 89    output = np.zeros((oh, ow))
 90
 91    for i in range(oh):
 92        for j in range(ow):
 93            si, sj = i * stride, j * stride
 94            region = image[si:si+kh, sj:sj+kw]
 95            output[i, j] = np.sum(region * kernel)
 96
 97    return output
 98
 99# ํ…Œ์ŠคํŠธ
100image = np.ones((4, 4))
101kernel = np.ones((3, 3))
102
103print("์ž…๋ ฅ: 4ร—4, ์ปค๋„: 3ร—3")
104for p in [0, 1]:
105    for s in [1, 2]:
106        out = conv2d_with_padding(image, kernel, padding=p, stride=s)
107        print(f"  padding={p}, stride={s} โ†’ ์ถœ๋ ฅ: {out.shape}")
108
109
110# ============================================
111# 3. ์—์ง€ ๊ฒ€์ถœ ํ•„ํ„ฐ
112# ============================================
113print("\n[3] ์—์ง€ ๊ฒ€์ถœ ํ•„ํ„ฐ")
114print("-" * 40)
115
116# ์ƒ˜ํ”Œ ์ด๋ฏธ์ง€ ์ƒ์„ฑ
117def create_sample_image():
118    """๊ฐ„๋‹จํ•œ ํŒจํ„ด ์ด๋ฏธ์ง€ ์ƒ์„ฑ"""
119    img = np.zeros((8, 8))
120    img[2:6, 2:6] = 1  # ์ค‘์•™ ์‚ฌ๊ฐํ˜•
121    return img
122
123image = create_sample_image()
124
125# ์—์ง€ ๊ฒ€์ถœ ํ•„ํ„ฐ๋“ค
126sobel_x = np.array([[-1, 0, 1],
127                    [-2, 0, 2],
128                    [-1, 0, 1]])
129
130sobel_y = np.array([[-1, -2, -1],
131                    [ 0,  0,  0],
132                    [ 1,  2,  1]])
133
134laplacian = np.array([[0,  1, 0],
135                      [1, -4, 1],
136                      [0,  1, 0]])
137
138# ํ•„ํ„ฐ ์ ์šฉ
139edge_x = conv2d_with_padding(image, sobel_x, padding=1)
140edge_y = conv2d_with_padding(image, sobel_y, padding=1)
141edge_laplace = conv2d_with_padding(image, laplacian, padding=1)
142
143# ์‹œ๊ฐํ™”
144fig, axes = plt.subplots(2, 3, figsize=(12, 8))
145axes[0, 0].imshow(image, cmap='gray')
146axes[0, 0].set_title('Original')
147axes[0, 1].imshow(sobel_x, cmap='RdBu')
148axes[0, 1].set_title('Sobel X Filter')
149axes[0, 2].imshow(sobel_y, cmap='RdBu')
150axes[0, 2].set_title('Sobel Y Filter')
151axes[1, 0].imshow(edge_x, cmap='gray')
152axes[1, 0].set_title('Sobel X Edge')
153axes[1, 1].imshow(edge_y, cmap='gray')
154axes[1, 1].set_title('Sobel Y Edge')
155axes[1, 2].imshow(edge_laplace, cmap='gray')
156axes[1, 2].set_title('Laplacian Edge')
157
158for ax in axes.flat:
159    ax.axis('off')
160
161plt.tight_layout()
162plt.savefig('numpy_edge_detection.png', dpi=100)
163plt.close()
164print("์—์ง€ ๊ฒ€์ถœ ์ €์žฅ: numpy_edge_detection.png")
165
166
167# ============================================
168# 4. ํ’€๋ง ์—ฐ์‚ฐ
169# ============================================
170print("\n[4] ํ’€๋ง ์—ฐ์‚ฐ")
171print("-" * 40)
172
173def max_pool2d(image, pool_size=2, stride=2):
174    """Max Pooling ๊ตฌํ˜„"""
175    h, w = image.shape
176    oh = (h - pool_size) // stride + 1
177    ow = (w - pool_size) // stride + 1
178
179    output = np.zeros((oh, ow))
180
181    for i in range(oh):
182        for j in range(ow):
183            si, sj = i * stride, j * stride
184            region = image[si:si+pool_size, sj:sj+pool_size]
185            output[i, j] = np.max(region)
186
187    return output
188
189def avg_pool2d(image, pool_size=2, stride=2):
190    """Average Pooling ๊ตฌํ˜„"""
191    h, w = image.shape
192    oh = (h - pool_size) // stride + 1
193    ow = (w - pool_size) // stride + 1
194
195    output = np.zeros((oh, ow))
196
197    for i in range(oh):
198        for j in range(ow):
199            si, sj = i * stride, j * stride
200            region = image[si:si+pool_size, sj:sj+pool_size]
201            output[i, j] = np.mean(region)
202
203    return output
204
205# ํ…Œ์ŠคํŠธ
206image = np.array([
207    [1, 2, 3, 4],
208    [5, 6, 7, 8],
209    [9, 10, 11, 12],
210    [13, 14, 15, 16]
211], dtype=float)
212
213print(f"์ž…๋ ฅ:\n{image}")
214print(f"\nMax Pooling (2ร—2):\n{max_pool2d(image)}")
215print(f"\nAvg Pooling (2ร—2):\n{avg_pool2d(image)}")
216
217
218# ============================================
219# 5. ๋‹ค์ฑ„๋„ ํ•ฉ์„ฑ๊ณฑ
220# ============================================
221print("\n[5] ๋‹ค์ฑ„๋„ ํ•ฉ์„ฑ๊ณฑ")
222print("-" * 40)
223
224def conv2d_multichannel(image, kernels, bias=0):
225    """
226    ๋‹ค์ฑ„๋„ ํ•ฉ์„ฑ๊ณฑ (RGB ์ด๋ฏธ์ง€ ๋“ฑ)
227
228    Args:
229        image: (C, H, W) - C๊ฐœ ์ฑ„๋„
230        kernels: (C, kH, kW) - ๊ฐ ์ฑ„๋„์šฉ ์ปค๋„
231        bias: ํŽธํ–ฅ
232
233    Returns:
234        ์ถœ๋ ฅ: (H-kH+1, W-kW+1)
235    """
236    c, h, w = image.shape
237    _, kh, kw = kernels.shape
238    oh, ow = h - kh + 1, w - kw + 1
239
240    output = np.zeros((oh, ow))
241
242    # ๊ฐ ์ฑ„๋„์— ๋Œ€ํ•ด ํ•ฉ์„ฑ๊ณฑ ํ›„ ํ•ฉ์‚ฐ
243    for ch in range(c):
244        output += conv2d_basic(image[ch], kernels[ch])
245
246    return output + bias
247
248# RGB ์ด๋ฏธ์ง€ ์˜ˆ์‹œ
249rgb_image = np.random.rand(3, 8, 8)  # (C, H, W)
250kernels = np.random.rand(3, 3, 3)    # (C, kH, kW)
251
252output = conv2d_multichannel(rgb_image, kernels)
253print(f"์ž…๋ ฅ: {rgb_image.shape} (3์ฑ„๋„)")
254print(f"์ปค๋„: {kernels.shape} (์ฑ„๋„๋ณ„ 3ร—3)")
255print(f"์ถœ๋ ฅ: {output.shape}")
256
257
258# ============================================
259# 6. ์—ฌ๋Ÿฌ ํ•„ํ„ฐ ์ ์šฉ
260# ============================================
261print("\n[6] ์—ฌ๋Ÿฌ ํ•„ํ„ฐ ์ ์šฉ")
262print("-" * 40)
263
264def conv2d_layer(image, filters, biases):
265    """
266    Conv ์ธต ์‹œ๋ฎฌ๋ ˆ์ด์…˜
267
268    Args:
269        image: (C_in, H, W)
270        filters: (C_out, C_in, kH, kW)
271        biases: (C_out,)
272
273    Returns:
274        ์ถœ๋ ฅ: (C_out, oH, oW)
275    """
276    c_out, c_in, kh, kw = filters.shape
277    _, h, w = image.shape
278    oh, ow = h - kh + 1, w - kw + 1
279
280    output = np.zeros((c_out, oh, ow))
281
282    for f in range(c_out):
283        output[f] = conv2d_multichannel(image, filters[f], biases[f])
284
285    return output
286
287# ์˜ˆ์‹œ: 3์ฑ„๋„ ์ž…๋ ฅ โ†’ 8์ฑ„๋„ ์ถœ๋ ฅ
288image = np.random.rand(3, 16, 16)
289filters = np.random.rand(8, 3, 3, 3)  # 8๊ฐœ ํ•„ํ„ฐ
290biases = np.zeros(8)
291
292output = conv2d_layer(image, filters, biases)
293print(f"์ž…๋ ฅ: {image.shape}")
294print(f"ํ•„ํ„ฐ: {filters.shape}")
295print(f"์ถœ๋ ฅ: {output.shape}")
296
297
298# ============================================
299# 7. CNN ์ˆœ์ „ํŒŒ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
300# ============================================
301print("\n[7] CNN ์ˆœ์ „ํŒŒ ์‹œ๋ฎฌ๋ ˆ์ด์…˜")
302print("-" * 40)
303
304def relu(x):
305    return np.maximum(0, x)
306
307def simple_cnn_forward(image):
308    """
309    ๊ฐ„๋‹จํ•œ CNN ์ˆœ์ „ํŒŒ
310
311    ์ž…๋ ฅ (1, 8, 8) โ†’ Conv (2, 6, 6) โ†’ Pool (2, 3, 3) โ†’ FC โ†’ ์ถœ๋ ฅ
312    """
313    # Conv1: 1โ†’2 ์ฑ„๋„, 3ร—3 ์ปค๋„
314    filters1 = np.random.randn(2, 1, 3, 3) * 0.5
315    biases1 = np.zeros(2)
316
317    conv1_out = conv2d_layer(image, filters1, biases1)
318    relu1_out = relu(conv1_out)
319    print(f"  Conv1 ํ›„: {relu1_out.shape}")
320
321    # MaxPool: 2ร—2
322    pool_out = np.zeros((2, 3, 3))
323    for c in range(2):
324        pool_out[c] = max_pool2d(relu1_out[c], 2, 2)
325    print(f"  Pool ํ›„: {pool_out.shape}")
326
327    # Flatten
328    flat = pool_out.flatten()
329    print(f"  Flatten: {flat.shape}")
330
331    # FC
332    fc_weights = np.random.randn(10, 18) * 0.5
333    fc_bias = np.zeros(10)
334    output = fc_weights @ flat + fc_bias
335    print(f"  FC ์ถœ๋ ฅ: {output.shape}")
336
337    return output
338
339# ํ…Œ์ŠคํŠธ
340image = np.random.rand(1, 8, 8)
341print(f"์ž…๋ ฅ: {image.shape}")
342output = simple_cnn_forward(image)
343
344
345# ============================================
346# ์™œ PyTorch๋ฅผ ์‚ฌ์šฉํ•ด์•ผ ํ•˜๋Š”๊ฐ€?
347# ============================================
348print("\n" + "=" * 60)
349print("NumPy CNN์˜ ํ•œ๊ณ„")
350print("=" * 60)
351
352limitations = """
353NumPy ๊ตฌํ˜„์˜ ๋ฌธ์ œ์ :
354
3551. ์†๋„
356   - ์ˆœ์ˆ˜ Python ๋ฃจํ”„๋Š” ๋งค์šฐ ๋А๋ฆผ
357   - 28ร—28 MNIST๋„ ์ˆ˜์ฒœ ๋ฐฐ ๋А๋ฆผ
358   - GPU ๊ฐ€์† ๋ถˆ๊ฐ€๋Šฅ
359
3602. ์—ญ์ „ํŒŒ
361   - ํ•ฉ์„ฑ๊ณฑ ์—ญ์ „ํŒŒ ๊ตฌํ˜„์ด ๋ณต์žก
362   - im2col ๋“ฑ ์ตœ์ ํ™” ํ•„์š”
363   - ์‹ค์ˆ˜ํ•˜๊ธฐ ์‰ฌ์›€
364
3653. ๋ฉ”๋ชจ๋ฆฌ
366   - ๋น„ํšจ์œจ์ ์ธ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ
367   - ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ ์–ด๋ ค์›€
368
3694. ๊ธฐ๋Šฅ
370   - BatchNorm, Dropout ๊ตฌํ˜„ ๋ณต์žก
371   - ๋‹ค์–‘ํ•œ ์ธต/์—ฐ์‚ฐ ๋ถ€์กฑ
372
373PyTorch ์‚ฌ์šฉ ์ด์œ :
374   โœ“ cuDNN์œผ๋กœ ์ตœ์ ํ™”๋œ ํ•ฉ์„ฑ๊ณฑ
375   โœ“ ์ž๋™ ๋ฏธ๋ถ„ (์—ญ์ „ํŒŒ ์ž๋™)
376   โœ“ GPU ์ง€์›
377   โœ“ ํ’๋ถ€ํ•œ ๋ ˆ์ด์–ด/ํ•จ์ˆ˜ ์ œ๊ณต
378"""
379print(limitations)
380
381
382# ============================================
383# ์ •๋ฆฌ
384# ============================================
385print("=" * 60)
386print("ํ•ฉ์„ฑ๊ณฑ ํ•ต์‹ฌ ์ •๋ฆฌ")
387print("=" * 60)
388
389summary = """
390ํ•ฉ์„ฑ๊ณฑ ์—ฐ์‚ฐ:
391    output[i,j] = ฮฃ input[i+m, j+n] ร— kernel[m, n]
392
393์ถœ๋ ฅ ํฌ๊ธฐ:
394    output_size = (input - kernel + 2ร—padding) / stride + 1
395
396ํ’€๋ง:
397    - MaxPool: ์˜์—ญ ๋‚ด ์ตœ๋Œ€๊ฐ’ ์„ ํƒ
398    - AvgPool: ์˜์—ญ ๋‚ด ํ‰๊ท 
399
400๋‹ค์ฑ„๋„:
401    - ๊ฐ ์ฑ„๋„์— ๋ณ„๋„ ์ปค๋„ ์ ์šฉ ํ›„ ํ•ฉ์‚ฐ
402    - ์—ฌ๋Ÿฌ ํ•„ํ„ฐ = ์—ฌ๋Ÿฌ ์ถœ๋ ฅ ์ฑ„๋„
403
404ํ•™์Šต:
405    - ์ปค๋„์˜ ๊ฐ€์ค‘์น˜๊ฐ€ ํ•™์Šต๋จ
406    - ์—ญ์ „ํŒŒ๋กœ ์ตœ์ ํ™”
407
408NumPy๋กœ ๋ฐฐ์šด ๊ฒƒ:
409    1. ํ•ฉ์„ฑ๊ณฑ์˜ ์ˆ˜ํ•™์  ์ •์˜
410    2. ํŒจ๋”ฉ๊ณผ ์ŠคํŠธ๋ผ์ด๋“œ์˜ ํšจ๊ณผ
411    3. ํ’€๋ง์˜ ๋™์ž‘ ์›๋ฆฌ
412    4. ๋‹ค์ฑ„๋„ ์ฒ˜๋ฆฌ ๋ฐฉ์‹
413
414์‹ค์ „์—์„œ๋Š” PyTorch!
415"""
416print(summary)
417print("=" * 60)