02_neural_network_scratch.py

  1"""
  202. 신경망 기초 - NumPy 버전 (from scratch)
  3
  4NumPy만으로 MLP 순전파를 구현합니다.
  5PyTorch 버전(examples/pytorch/02_neural_network.py)과 비교해 보세요.
  6
  7핵심: 역전파 없이 순전파만 구현합니다.
  8     역전파는 03_backprop_scratch.py에서 구현합니다.
  9"""
 10
 11import numpy as np
 12import matplotlib.pyplot as plt
 13
 14print("=" * 60)
 15print("NumPy 신경망 기초 (from scratch)")
 16print("=" * 60)
 17
 18
 19# ============================================
 20# 1. 활성화 함수 구현
 21# ============================================
 22print("\n[1] 활성화 함수 구현")
 23print("-" * 40)
 24
 25def sigmoid(x):
 26    """시그모이드: σ(x) = 1 / (1 + e^(-x))"""
 27    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
 28
 29def sigmoid_derivative(x):
 30    """시그모이드 미분: σ'(x) = σ(x)(1 - σ(x))"""
 31    s = sigmoid(x)
 32    return s * (1 - s)
 33
 34def relu(x):
 35    """ReLU: max(0, x)"""
 36    return np.maximum(0, x)
 37
 38def relu_derivative(x):
 39    """ReLU 미분: 1 if x > 0 else 0"""
 40    return (x > 0).astype(float)
 41
 42def tanh(x):
 43    """Tanh: (e^x - e^(-x)) / (e^x + e^(-x))"""
 44    return np.tanh(x)
 45
 46def tanh_derivative(x):
 47    """Tanh 미분: 1 - tanh²(x)"""
 48    return 1 - np.tanh(x)**2
 49
 50def softmax(x):
 51    """Softmax: e^xi / Σe^xj"""
 52    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
 53    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)
 54
 55# 테스트
 56x_test = np.array([-2, -1, 0, 1, 2])
 57print(f"입력: {x_test}")
 58print(f"sigmoid: {sigmoid(x_test)}")
 59print(f"relu: {relu(x_test)}")
 60print(f"tanh: {tanh(x_test)}")
 61
 62# 시각화
 63x = np.linspace(-5, 5, 100)
 64
 65fig, axes = plt.subplots(2, 2, figsize=(12, 8))
 66
 67axes[0, 0].plot(x, sigmoid(x), label='Sigmoid')
 68axes[0, 0].plot(x, sigmoid_derivative(x), '--', label='Derivative')
 69axes[0, 0].set_title('Sigmoid and Derivative')
 70axes[0, 0].legend()
 71axes[0, 0].grid(True, alpha=0.3)
 72
 73axes[0, 1].plot(x, tanh(x), label='Tanh')
 74axes[0, 1].plot(x, tanh_derivative(x), '--', label='Derivative')
 75axes[0, 1].set_title('Tanh and Derivative')
 76axes[0, 1].legend()
 77axes[0, 1].grid(True, alpha=0.3)
 78
 79axes[1, 0].plot(x, relu(x), label='ReLU')
 80axes[1, 0].plot(x, relu_derivative(x), '--', label='Derivative')
 81axes[1, 0].set_title('ReLU and Derivative')
 82axes[1, 0].legend()
 83axes[1, 0].grid(True, alpha=0.3)
 84
 85x_softmax = np.array([1, 2, 3, 4])
 86axes[1, 1].bar(range(4), softmax(x_softmax))
 87axes[1, 1].set_title(f'Softmax of {x_softmax}')
 88axes[1, 1].set_ylabel('Probability')
 89axes[1, 1].grid(True, alpha=0.3)
 90
 91plt.tight_layout()
 92plt.savefig('numpy_activation_functions.png', dpi=100)
 93plt.close()
 94print("활성화 함수 그래프 저장: numpy_activation_functions.png")
 95
 96
 97# ============================================
 98# 2. 퍼셉트론 (단일 뉴런)
 99# ============================================
100print("\n[2] 퍼셉트론 구현")
101print("-" * 40)
102
103class Perceptron:
104    """단일 퍼셉트론"""
105
106    def __init__(self, n_inputs):
107        # 가중치 초기화 (작은 랜덤 값)
108        self.weights = np.random.randn(n_inputs) * 0.1
109        self.bias = 0.0
110
111    def forward(self, x):
112        """순전파: z = wx + b, y = activation(z)"""
113        z = np.dot(x, self.weights) + self.bias
114        return sigmoid(z)
115
116# 테스트
117perceptron = Perceptron(n_inputs=3)
118x_input = np.array([1.0, 2.0, 3.0])
119output = perceptron.forward(x_input)
120
121print(f"입력: {x_input}")
122print(f"가중치: {perceptron.weights}")
123print(f"편향: {perceptron.bias}")
124print(f"출력: {output:.4f}")
125
126
127# ============================================
128# 3. 다층 퍼셉트론 (MLP) 순전파
129# ============================================
130print("\n[3] MLP 순전파 구현")
131print("-" * 40)
132
133class MLPNumpy:
134    """
135    NumPy로 구현한 다층 퍼셉트론
136    순전파만 구현 (역전파는 03에서)
137    """
138
139    def __init__(self, layer_sizes):
140        """
141        layer_sizes: [입력 차원, 은닉층1, 은닉층2, ..., 출력 차원]
142        예: [784, 256, 128, 10] → 입력 784, 은닉 256/128, 출력 10
143        """
144        self.num_layers = len(layer_sizes) - 1
145        self.weights = []
146        self.biases = []
147
148        # Xavier 초기화
149        for i in range(self.num_layers):
150            fan_in = layer_sizes[i]
151            fan_out = layer_sizes[i + 1]
152            # Xavier 초기화: std = sqrt(2 / (fan_in + fan_out))
153            std = np.sqrt(2.0 / (fan_in + fan_out))
154            W = np.random.randn(fan_in, fan_out) * std
155            b = np.zeros(fan_out)
156            self.weights.append(W)
157            self.biases.append(b)
158
159        print(f"MLP 생성: {layer_sizes}")
160        for i, (W, b) in enumerate(zip(self.weights, self.biases)):
161            print(f"  Layer {i+1}: W{W.shape}, b{b.shape}")
162
163    def forward(self, x):
164        """순전파"""
165        activations = [x]
166
167        for i in range(self.num_layers):
168            z = activations[-1] @ self.weights[i] + self.biases[i]
169
170            # 마지막 층은 활성화 없음 (또는 softmax)
171            if i < self.num_layers - 1:
172                a = relu(z)
173            else:
174                a = z  # 출력층
175
176            activations.append(a)
177
178        return activations[-1], activations
179
180    def predict_proba(self, x):
181        """분류 확률 (softmax)"""
182        output, _ = self.forward(x)
183        return softmax(output)
184
185    def predict(self, x):
186        """분류 예측"""
187        proba = self.predict_proba(x)
188        return np.argmax(proba, axis=-1)
189
190# MLP 테스트
191mlp = MLPNumpy([10, 32, 16, 3])
192
193# 배치 입력 (4개 샘플, 10차원)
194x_batch = np.random.randn(4, 10)
195output, activations = mlp.forward(x_batch)
196
197print(f"\n입력 shape: {x_batch.shape}")
198print(f"출력 shape: {output.shape}")
199print(f"출력 예시:\n{output}")
200
201# 확률과 예측
202proba = mlp.predict_proba(x_batch)
203pred = mlp.predict(x_batch)
204print(f"\nSoftmax 확률:\n{proba}")
205print(f"예측 클래스: {pred}")
206
207
208# ============================================
209# 4. XOR 문제 - 순전파만
210# ============================================
211print("\n[4] XOR 문제 (순전파만)")
212print("-" * 40)
213
214# XOR 데이터
215X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
216y_xor = np.array([0, 1, 1, 0])
217
218# 수동으로 가중치 설정 (학습 없이)
219# XOR을 해결하는 수동 설정 가중치
220class XORNetManual:
221    def __init__(self):
222        # 은닉층: 2개 뉴런
223        # 첫 번째 뉴런: AND처럼 동작 (둘 다 1일 때)
224        # 두 번째 뉴런: OR처럼 동작 (하나라도 1일 때)
225        self.W1 = np.array([[ 20,  20],   # x1에 대한 가중치
226                           [ 20,  20]])   # x2에 대한 가중치
227        self.b1 = np.array([-30, -10])    # AND: -30, OR: -10
228
229        # 출력층: OR - AND = XOR
230        self.W2 = np.array([[-20],        # AND 뉴런에 음수
231                           [ 20]])        # OR 뉴런에 양수
232        self.b2 = np.array([-10])
233
234    def forward(self, x):
235        z1 = x @ self.W1 + self.b1
236        a1 = sigmoid(z1)
237
238        z2 = a1 @ self.W2 + self.b2
239        a2 = sigmoid(z2)
240
241        return a2
242
243xor_manual = XORNetManual()
244
245print("수동 설정 가중치로 XOR 해결:")
246for i in range(4):
247    x = X_xor[i:i+1]
248    y_pred = xor_manual.forward(x)
249    print(f"  {X_xor[i]} → {y_pred[0, 0]:.4f} (정답: {y_xor[i]})")
250
251
252# ============================================
253# 5. 순전파 과정 시각화
254# ============================================
255print("\n[5] 순전파 과정 시각화")
256print("-" * 40)
257
258def visualize_forward_pass(x, model):
259    """순전파 과정의 값 변화 출력"""
260    print(f"입력: {x}")
261
262    a = x
263    for i in range(model.num_layers):
264        z = a @ model.weights[i] + model.biases[i]
265        print(f"\nLayer {i+1}:")
266        print(f"  z (선형 변환): {z[:5]}...")  # 처음 5개만
267
268        if i < model.num_layers - 1:
269            a = relu(z)
270            print(f"  a (ReLU 후):    {a[:5]}...")
271        else:
272            a = z
273            print(f"  출력:           {a}")
274
275    return a
276
277# 단일 샘플로 테스트
278small_mlp = MLPNumpy([4, 8, 3])
279x_single = np.array([1.0, 2.0, 3.0, 4.0])
280output = visualize_forward_pass(x_single, small_mlp)
281
282
283# ============================================
284# 6. NumPy vs PyTorch 비교
285# ============================================
286print("\n" + "=" * 60)
287print("NumPy vs PyTorch 비교")
288print("=" * 60)
289
290comparison = """
291| 항목          | NumPy (이 코드)           | PyTorch                    |
292|---------------|---------------------------|----------------------------|
293| 순전파        | x @ W + b 직접 계산       | model(x) 자동 계산         |
294| 활성화 함수   | np.maximum(0, x)          | F.relu(x)                  |
295| 가중치 관리   | 리스트로 직접 관리        | model.parameters()         |
296| 역전파        | ❌ (다음 레슨에서 구현)   | loss.backward() 자동       |
297| 배치 처리     | 행렬 곱셈으로 직접        | DataLoader 자동            |
298
299NumPy 구현의 장점:
3001. 순전파의 수학적 원리 완전 이해
3012. 행렬 연산의 의미 파악
3023. 활성화 함수의 동작 이해
303
304다음 단계 (03_backprop_scratch.py):
305- 역전파 알고리즘 NumPy 구현
306- 경사 하강법으로 가중치 업데이트
307- XOR 문제 학습으로 해결
308"""
309print(comparison)
310
311print("NumPy 신경망 기초 (순전파) 완료!")
312print("PyTorch 버전과 비교: examples/pytorch/02_neural_network.py")
313print("=" * 60)