02_neural_network_scratch.py

Download
python 314 lines 9.1 KB
  1"""
  202. ์‹ ๊ฒฝ๋ง ๊ธฐ์ดˆ - NumPy ๋ฒ„์ „ (from scratch)
  3
  4NumPy๋งŒ์œผ๋กœ MLP ์ˆœ์ „ํŒŒ๋ฅผ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
  5PyTorch ๋ฒ„์ „(examples/pytorch/02_neural_network.py)๊ณผ ๋น„๊ตํ•ด ๋ณด์„ธ์š”.
  6
  7ํ•ต์‹ฌ: ์—ญ์ „ํŒŒ ์—†์ด ์ˆœ์ „ํŒŒ๋งŒ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
  8     ์—ญ์ „ํŒŒ๋Š” 03_backprop_scratch.py์—์„œ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
  9"""
 10
 11import numpy as np
 12import matplotlib.pyplot as plt
 13
 14print("=" * 60)
 15print("NumPy ์‹ ๊ฒฝ๋ง ๊ธฐ์ดˆ (from scratch)")
 16print("=" * 60)
 17
 18
 19# ============================================
 20# 1. ํ™œ์„ฑํ™” ํ•จ์ˆ˜ ๊ตฌํ˜„
 21# ============================================
 22print("\n[1] ํ™œ์„ฑํ™” ํ•จ์ˆ˜ ๊ตฌํ˜„")
 23print("-" * 40)
 24
 25def sigmoid(x):
 26    """์‹œ๊ทธ๋ชจ์ด๋“œ: ฯƒ(x) = 1 / (1 + e^(-x))"""
 27    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
 28
 29def sigmoid_derivative(x):
 30    """์‹œ๊ทธ๋ชจ์ด๋“œ ๋ฏธ๋ถ„: ฯƒ'(x) = ฯƒ(x)(1 - ฯƒ(x))"""
 31    s = sigmoid(x)
 32    return s * (1 - s)
 33
 34def relu(x):
 35    """ReLU: max(0, x)"""
 36    return np.maximum(0, x)
 37
 38def relu_derivative(x):
 39    """ReLU ๋ฏธ๋ถ„: 1 if x > 0 else 0"""
 40    return (x > 0).astype(float)
 41
 42def tanh(x):
 43    """Tanh: (e^x - e^(-x)) / (e^x + e^(-x))"""
 44    return np.tanh(x)
 45
 46def tanh_derivative(x):
 47    """Tanh ๋ฏธ๋ถ„: 1 - tanhยฒ(x)"""
 48    return 1 - np.tanh(x)**2
 49
 50def softmax(x):
 51    """Softmax: e^xi / ฮฃe^xj"""
 52    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
 53    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)
 54
 55# ํ…Œ์ŠคํŠธ
 56x_test = np.array([-2, -1, 0, 1, 2])
 57print(f"์ž…๋ ฅ: {x_test}")
 58print(f"sigmoid: {sigmoid(x_test)}")
 59print(f"relu: {relu(x_test)}")
 60print(f"tanh: {tanh(x_test)}")
 61
 62# ์‹œ๊ฐํ™”
 63x = np.linspace(-5, 5, 100)
 64
 65fig, axes = plt.subplots(2, 2, figsize=(12, 8))
 66
 67axes[0, 0].plot(x, sigmoid(x), label='Sigmoid')
 68axes[0, 0].plot(x, sigmoid_derivative(x), '--', label='Derivative')
 69axes[0, 0].set_title('Sigmoid and Derivative')
 70axes[0, 0].legend()
 71axes[0, 0].grid(True, alpha=0.3)
 72
 73axes[0, 1].plot(x, tanh(x), label='Tanh')
 74axes[0, 1].plot(x, tanh_derivative(x), '--', label='Derivative')
 75axes[0, 1].set_title('Tanh and Derivative')
 76axes[0, 1].legend()
 77axes[0, 1].grid(True, alpha=0.3)
 78
 79axes[1, 0].plot(x, relu(x), label='ReLU')
 80axes[1, 0].plot(x, relu_derivative(x), '--', label='Derivative')
 81axes[1, 0].set_title('ReLU and Derivative')
 82axes[1, 0].legend()
 83axes[1, 0].grid(True, alpha=0.3)
 84
 85x_softmax = np.array([1, 2, 3, 4])
 86axes[1, 1].bar(range(4), softmax(x_softmax))
 87axes[1, 1].set_title(f'Softmax of {x_softmax}')
 88axes[1, 1].set_ylabel('Probability')
 89axes[1, 1].grid(True, alpha=0.3)
 90
 91plt.tight_layout()
 92plt.savefig('numpy_activation_functions.png', dpi=100)
 93plt.close()
 94print("ํ™œ์„ฑํ™” ํ•จ์ˆ˜ ๊ทธ๋ž˜ํ”„ ์ €์žฅ: numpy_activation_functions.png")
 95
 96
 97# ============================================
 98# 2. ํผ์…‰ํŠธ๋ก  (๋‹จ์ผ ๋‰ด๋Ÿฐ)
 99# ============================================
100print("\n[2] ํผ์…‰ํŠธ๋ก  ๊ตฌํ˜„")
101print("-" * 40)
102
103class Perceptron:
104    """๋‹จ์ผ ํผ์…‰ํŠธ๋ก """
105
106    def __init__(self, n_inputs):
107        # ๊ฐ€์ค‘์น˜ ์ดˆ๊ธฐํ™” (์ž‘์€ ๋žœ๋ค ๊ฐ’)
108        self.weights = np.random.randn(n_inputs) * 0.1
109        self.bias = 0.0
110
111    def forward(self, x):
112        """์ˆœ์ „ํŒŒ: z = wx + b, y = activation(z)"""
113        z = np.dot(x, self.weights) + self.bias
114        return sigmoid(z)
115
116# ํ…Œ์ŠคํŠธ
117perceptron = Perceptron(n_inputs=3)
118x_input = np.array([1.0, 2.0, 3.0])
119output = perceptron.forward(x_input)
120
121print(f"์ž…๋ ฅ: {x_input}")
122print(f"๊ฐ€์ค‘์น˜: {perceptron.weights}")
123print(f"ํŽธํ–ฅ: {perceptron.bias}")
124print(f"์ถœ๋ ฅ: {output:.4f}")
125
126
127# ============================================
128# 3. ๋‹ค์ธต ํผ์…‰ํŠธ๋ก  (MLP) ์ˆœ์ „ํŒŒ
129# ============================================
130print("\n[3] MLP ์ˆœ์ „ํŒŒ ๊ตฌํ˜„")
131print("-" * 40)
132
133class MLPNumpy:
134    """
135    NumPy๋กœ ๊ตฌํ˜„ํ•œ ๋‹ค์ธต ํผ์…‰ํŠธ๋ก 
136    ์ˆœ์ „ํŒŒ๋งŒ ๊ตฌํ˜„ (์—ญ์ „ํŒŒ๋Š” 03์—์„œ)
137    """
138
139    def __init__(self, layer_sizes):
140        """
141        layer_sizes: [์ž…๋ ฅ ์ฐจ์›, ์€๋‹‰์ธต1, ์€๋‹‰์ธต2, ..., ์ถœ๋ ฅ ์ฐจ์›]
142        ์˜ˆ: [784, 256, 128, 10] โ†’ ์ž…๋ ฅ 784, ์€๋‹‰ 256/128, ์ถœ๋ ฅ 10
143        """
144        self.num_layers = len(layer_sizes) - 1
145        self.weights = []
146        self.biases = []
147
148        # Xavier ์ดˆ๊ธฐํ™”
149        for i in range(self.num_layers):
150            fan_in = layer_sizes[i]
151            fan_out = layer_sizes[i + 1]
152            # Xavier ์ดˆ๊ธฐํ™”: std = sqrt(2 / (fan_in + fan_out))
153            std = np.sqrt(2.0 / (fan_in + fan_out))
154            W = np.random.randn(fan_in, fan_out) * std
155            b = np.zeros(fan_out)
156            self.weights.append(W)
157            self.biases.append(b)
158
159        print(f"MLP ์ƒ์„ฑ: {layer_sizes}")
160        for i, (W, b) in enumerate(zip(self.weights, self.biases)):
161            print(f"  Layer {i+1}: W{W.shape}, b{b.shape}")
162
163    def forward(self, x):
164        """์ˆœ์ „ํŒŒ"""
165        activations = [x]
166
167        for i in range(self.num_layers):
168            z = activations[-1] @ self.weights[i] + self.biases[i]
169
170            # ๋งˆ์ง€๋ง‰ ์ธต์€ ํ™œ์„ฑํ™” ์—†์Œ (๋˜๋Š” softmax)
171            if i < self.num_layers - 1:
172                a = relu(z)
173            else:
174                a = z  # ์ถœ๋ ฅ์ธต
175
176            activations.append(a)
177
178        return activations[-1], activations
179
180    def predict_proba(self, x):
181        """๋ถ„๋ฅ˜ ํ™•๋ฅ  (softmax)"""
182        output, _ = self.forward(x)
183        return softmax(output)
184
185    def predict(self, x):
186        """๋ถ„๋ฅ˜ ์˜ˆ์ธก"""
187        proba = self.predict_proba(x)
188        return np.argmax(proba, axis=-1)
189
190# MLP ํ…Œ์ŠคํŠธ
191mlp = MLPNumpy([10, 32, 16, 3])
192
193# ๋ฐฐ์น˜ ์ž…๋ ฅ (4๊ฐœ ์ƒ˜ํ”Œ, 10์ฐจ์›)
194x_batch = np.random.randn(4, 10)
195output, activations = mlp.forward(x_batch)
196
197print(f"\n์ž…๋ ฅ shape: {x_batch.shape}")
198print(f"์ถœ๋ ฅ shape: {output.shape}")
199print(f"์ถœ๋ ฅ ์˜ˆ์‹œ:\n{output}")
200
201# ํ™•๋ฅ ๊ณผ ์˜ˆ์ธก
202proba = mlp.predict_proba(x_batch)
203pred = mlp.predict(x_batch)
204print(f"\nSoftmax ํ™•๋ฅ :\n{proba}")
205print(f"์˜ˆ์ธก ํด๋ž˜์Šค: {pred}")
206
207
208# ============================================
209# 4. XOR ๋ฌธ์ œ - ์ˆœ์ „ํŒŒ๋งŒ
210# ============================================
211print("\n[4] XOR ๋ฌธ์ œ (์ˆœ์ „ํŒŒ๋งŒ)")
212print("-" * 40)
213
214# XOR ๋ฐ์ดํ„ฐ
215X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
216y_xor = np.array([0, 1, 1, 0])
217
218# ์ˆ˜๋™์œผ๋กœ ๊ฐ€์ค‘์น˜ ์„ค์ • (ํ•™์Šต ์—†์ด)
219# XOR์„ ํ•ด๊ฒฐํ•˜๋Š” ์ˆ˜๋™ ์„ค์ • ๊ฐ€์ค‘์น˜
220class XORNetManual:
221    def __init__(self):
222        # ์€๋‹‰์ธต: 2๊ฐœ ๋‰ด๋Ÿฐ
223        # ์ฒซ ๋ฒˆ์งธ ๋‰ด๋Ÿฐ: AND์ฒ˜๋Ÿผ ๋™์ž‘ (๋‘˜ ๋‹ค 1์ผ ๋•Œ)
224        # ๋‘ ๋ฒˆ์งธ ๋‰ด๋Ÿฐ: OR์ฒ˜๋Ÿผ ๋™์ž‘ (ํ•˜๋‚˜๋ผ๋„ 1์ผ ๋•Œ)
225        self.W1 = np.array([[ 20,  20],   # x1์— ๋Œ€ํ•œ ๊ฐ€์ค‘์น˜
226                           [ 20,  20]])   # x2์— ๋Œ€ํ•œ ๊ฐ€์ค‘์น˜
227        self.b1 = np.array([-30, -10])    # AND: -30, OR: -10
228
229        # ์ถœ๋ ฅ์ธต: OR - AND = XOR
230        self.W2 = np.array([[-20],        # AND ๋‰ด๋Ÿฐ์— ์Œ์ˆ˜
231                           [ 20]])        # OR ๋‰ด๋Ÿฐ์— ์–‘์ˆ˜
232        self.b2 = np.array([-10])
233
234    def forward(self, x):
235        z1 = x @ self.W1 + self.b1
236        a1 = sigmoid(z1)
237
238        z2 = a1 @ self.W2 + self.b2
239        a2 = sigmoid(z2)
240
241        return a2
242
243xor_manual = XORNetManual()
244
245print("์ˆ˜๋™ ์„ค์ • ๊ฐ€์ค‘์น˜๋กœ XOR ํ•ด๊ฒฐ:")
246for i in range(4):
247    x = X_xor[i:i+1]
248    y_pred = xor_manual.forward(x)
249    print(f"  {X_xor[i]} โ†’ {y_pred[0, 0]:.4f} (์ •๋‹ต: {y_xor[i]})")
250
251
252# ============================================
253# 5. ์ˆœ์ „ํŒŒ ๊ณผ์ • ์‹œ๊ฐํ™”
254# ============================================
255print("\n[5] ์ˆœ์ „ํŒŒ ๊ณผ์ • ์‹œ๊ฐํ™”")
256print("-" * 40)
257
258def visualize_forward_pass(x, model):
259    """์ˆœ์ „ํŒŒ ๊ณผ์ •์˜ ๊ฐ’ ๋ณ€ํ™” ์ถœ๋ ฅ"""
260    print(f"์ž…๋ ฅ: {x}")
261
262    a = x
263    for i in range(model.num_layers):
264        z = a @ model.weights[i] + model.biases[i]
265        print(f"\nLayer {i+1}:")
266        print(f"  z (์„ ํ˜• ๋ณ€ํ™˜): {z[:5]}...")  # ์ฒ˜์Œ 5๊ฐœ๋งŒ
267
268        if i < model.num_layers - 1:
269            a = relu(z)
270            print(f"  a (ReLU ํ›„):    {a[:5]}...")
271        else:
272            a = z
273            print(f"  ์ถœ๋ ฅ:           {a}")
274
275    return a
276
277# ๋‹จ์ผ ์ƒ˜ํ”Œ๋กœ ํ…Œ์ŠคํŠธ
278small_mlp = MLPNumpy([4, 8, 3])
279x_single = np.array([1.0, 2.0, 3.0, 4.0])
280output = visualize_forward_pass(x_single, small_mlp)
281
282
283# ============================================
284# 6. NumPy vs PyTorch ๋น„๊ต
285# ============================================
286print("\n" + "=" * 60)
287print("NumPy vs PyTorch ๋น„๊ต")
288print("=" * 60)
289
290comparison = """
291| ํ•ญ๋ชฉ          | NumPy (์ด ์ฝ”๋“œ)           | PyTorch                    |
292|---------------|---------------------------|----------------------------|
293| ์ˆœ์ „ํŒŒ        | x @ W + b ์ง์ ‘ ๊ณ„์‚ฐ       | model(x) ์ž๋™ ๊ณ„์‚ฐ         |
294| ํ™œ์„ฑํ™” ํ•จ์ˆ˜   | np.maximum(0, x)          | F.relu(x)                  |
295| ๊ฐ€์ค‘์น˜ ๊ด€๋ฆฌ   | ๋ฆฌ์ŠคํŠธ๋กœ ์ง์ ‘ ๊ด€๋ฆฌ        | model.parameters()         |
296| ์—ญ์ „ํŒŒ        | โŒ (๋‹ค์Œ ๋ ˆ์Šจ์—์„œ ๊ตฌํ˜„)   | loss.backward() ์ž๋™       |
297| ๋ฐฐ์น˜ ์ฒ˜๋ฆฌ     | ํ–‰๋ ฌ ๊ณฑ์…ˆ์œผ๋กœ ์ง์ ‘        | DataLoader ์ž๋™            |
298
299NumPy ๊ตฌํ˜„์˜ ์žฅ์ :
3001. ์ˆœ์ „ํŒŒ์˜ ์ˆ˜ํ•™์  ์›๋ฆฌ ์™„์ „ ์ดํ•ด
3012. ํ–‰๋ ฌ ์—ฐ์‚ฐ์˜ ์˜๋ฏธ ํŒŒ์•…
3023. ํ™œ์„ฑํ™” ํ•จ์ˆ˜์˜ ๋™์ž‘ ์ดํ•ด
303
304๋‹ค์Œ ๋‹จ๊ณ„ (03_backprop_scratch.py):
305- ์—ญ์ „ํŒŒ ์•Œ๊ณ ๋ฆฌ์ฆ˜ NumPy ๊ตฌํ˜„
306- ๊ฒฝ์‚ฌ ํ•˜๊ฐ•๋ฒ•์œผ๋กœ ๊ฐ€์ค‘์น˜ ์—…๋ฐ์ดํŠธ
307- XOR ๋ฌธ์ œ ํ•™์Šต์œผ๋กœ ํ•ด๊ฒฐ
308"""
309print(comparison)
310
311print("NumPy ์‹ ๊ฒฝ๋ง ๊ธฐ์ดˆ (์ˆœ์ „ํŒŒ) ์™„๋ฃŒ!")
312print("PyTorch ๋ฒ„์ „๊ณผ ๋น„๊ต: examples/pytorch/02_neural_network.py")
313print("=" * 60)