1"""
208. RNN ๊ธฐ์ด (Recurrent Neural Networks)
3
4์ํ ์ ๊ฒฝ๋ง์ ๊ธฐ๋ณธ ๊ฐ๋
๊ณผ PyTorch ๊ตฌํ์ ํ์ตํฉ๋๋ค.
5"""
6
7import torch
8import torch.nn as nn
9import numpy as np
10import matplotlib.pyplot as plt
11
12print("=" * 60)
13print("PyTorch RNN ๊ธฐ์ด")
14print("=" * 60)
15
16device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17print(f"์ฌ์ฉ ์ฅ์น: {device}")
18
19
20# ============================================
21# 1. RNN ๊ธฐ๋ณธ ์ดํด
22# ============================================
23print("\n[1] RNN ๊ธฐ๋ณธ ์ดํด")
24print("-" * 40)
25
26# ๋จ์ RNN ์
์๋ ๊ตฌํ
27class SimpleRNNCell:
28 """RNN ์
์๋ ๊ตฌํ (์ดํด์ฉ)"""
29 def __init__(self, input_size, hidden_size):
30 # ๊ฐ์ค์น ์ด๊ธฐํ
31 self.W_xh = np.random.randn(input_size, hidden_size) * 0.1
32 self.W_hh = np.random.randn(hidden_size, hidden_size) * 0.1
33 self.b = np.zeros(hidden_size)
34
35 def forward(self, x, h_prev):
36 """
37 x: ํ์ฌ ์
๋ ฅ (input_size,)
38 h_prev: ์ด์ ์๋ ์ํ (hidden_size,)
39 """
40 h_new = np.tanh(x @ self.W_xh + h_prev @ self.W_hh + self.b)
41 return h_new
42
43# ํ
์คํธ
44cell = SimpleRNNCell(input_size=3, hidden_size=5)
45h = np.zeros(5)
46
47print("์๋ RNN ์
์์ ํ:")
48for t in range(4):
49 x = np.random.randn(3)
50 h = cell.forward(x, h)
51 print(f" t={t}: h = {h[:3]}...")
52
53
54# ============================================
55# 2. PyTorch nn.RNN
56# ============================================
57print("\n[2] PyTorch nn.RNN")
58print("-" * 40)
59
60# RNN ๋ ์ด์ด ์์ฑ
61rnn = nn.RNN(
62 input_size=10, # ์
๋ ฅ ํน์ฑ ์ฐจ์
63 hidden_size=20, # ์๋ ์ํ ์ฐจ์
64 num_layers=2, # RNN ์ธต ์
65 batch_first=True, # ์
๋ ฅ: (batch, seq, feature)
66 dropout=0.1 # ์ธต ๊ฐ ๋๋กญ์์
67)
68
69# ์
๋ ฅ ์์ฑ
70batch_size = 4
71seq_len = 8
72x = torch.randn(batch_size, seq_len, 10)
73
74# ์์ ํ
75output, h_n = rnn(x)
76
77print(f"์
๋ ฅ: {x.shape}")
78print(f"output (๋ชจ๋ ์๊ฐ ์๋์ํ): {output.shape}")
79print(f"h_n (๋ง์ง๋ง ์๋์ํ): {h_n.shape}")
80
81# ์ด๊ธฐ ์๋ ์ํ ์ง์
82h0 = torch.zeros(2, batch_size, 20) # (num_layers, batch, hidden)
83output, h_n = rnn(x, h0)
84print(f"\n์ด๊ธฐ ์ํ ์ง์ : h0 shape = {h0.shape}")
85
86
87# ============================================
88# 3. ์๋ฐฉํฅ RNN
89# ============================================
90print("\n[3] ์๋ฐฉํฅ RNN")
91print("-" * 40)
92
93rnn_bi = nn.RNN(
94 input_size=10,
95 hidden_size=20,
96 num_layers=1,
97 batch_first=True,
98 bidirectional=True
99)
100
101output_bi, h_n_bi = rnn_bi(x)
102
103print(f"์๋ฐฉํฅ RNN:")
104print(f" output: {output_bi.shape}") # (batch, seq, hidden*2)
105print(f" h_n: {h_n_bi.shape}") # (2, batch, hidden)
106
107# ์ ๋ฐฉํฅ/์ญ๋ฐฉํฅ ๋ถ๋ฆฌ
108forward_out = output_bi[:, :, :20]
109backward_out = output_bi[:, :, 20:]
110print(f" ์ ๋ฐฉํฅ ์ถ๋ ฅ: {forward_out.shape}")
111print(f" ์ญ๋ฐฉํฅ ์ถ๋ ฅ: {backward_out.shape}")
112
113
114# ============================================
115# 4. RNN ๋ถ๋ฅ๊ธฐ
116# ============================================
117print("\n[4] RNN ๋ถ๋ฅ๊ธฐ")
118print("-" * 40)
119
120class RNNClassifier(nn.Module):
121 """์ํ์ค ๋ถ๋ฅ์ฉ RNN"""
122 def __init__(self, input_size, hidden_size, num_classes, num_layers=1):
123 super().__init__()
124 self.hidden_size = hidden_size
125 self.num_layers = num_layers
126
127 self.rnn = nn.RNN(
128 input_size, hidden_size,
129 num_layers=num_layers,
130 batch_first=True,
131 dropout=0.2 if num_layers > 1 else 0
132 )
133 self.fc = nn.Linear(hidden_size, num_classes)
134
135 def forward(self, x):
136 # x: (batch, seq, features)
137 output, h_n = self.rnn(x)
138
139 # ๋ง์ง๋ง ์ธต์ ๋ง์ง๋ง ์๊ฐ ์๋ ์ํ
140 last_hidden = h_n[-1] # (batch, hidden)
141 out = self.fc(last_hidden)
142 return out
143
144# ํ
์คํธ
145model = RNNClassifier(input_size=10, hidden_size=32, num_classes=5)
146x = torch.randn(8, 15, 10) # 8 ์ํ, 15 ์คํ
, 10 ํน์ฑ
147out = model(x)
148print(f"๋ถ๋ฅ๊ธฐ ์
๋ ฅ: {x.shape}")
149print(f"๋ถ๋ฅ๊ธฐ ์ถ๋ ฅ: {out.shape}")
150
151
152# ============================================
153# 5. ์๊ณ์ด ์์ธก (์ฌ์ธํ)
154# ============================================
155print("\n[5] ์๊ณ์ด ์์ธก (์ฌ์ธํ)")
156print("-" * 40)
157
158# ๋ฐ์ดํฐ ์์ฑ
159def generate_sin_data(seq_len=50, n_samples=1000):
160 X = []
161 y = []
162 for _ in range(n_samples):
163 start = np.random.uniform(0, 2*np.pi)
164 seq = np.sin(np.linspace(start, start + 4*np.pi, seq_len + 1))
165 X.append(seq[:-1].reshape(-1, 1))
166 y.append(seq[-1])
167 return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
168
169X_train, y_train = generate_sin_data(seq_len=50, n_samples=1000)
170X_test, y_test = generate_sin_data(seq_len=50, n_samples=200)
171
172X_train = torch.from_numpy(X_train)
173y_train = torch.from_numpy(y_train)
174X_test = torch.from_numpy(X_test)
175y_test = torch.from_numpy(y_test)
176
177print(f"ํ๋ จ ๋ฐ์ดํฐ: X={X_train.shape}, y={y_train.shape}")
178print(f"ํ
์คํธ ๋ฐ์ดํฐ: X={X_test.shape}, y={y_test.shape}")
179
180# ๋ชจ๋ธ
181class SinPredictor(nn.Module):
182 def __init__(self, hidden_size=32):
183 super().__init__()
184 self.rnn = nn.RNN(1, hidden_size, batch_first=True)
185 self.fc = nn.Linear(hidden_size, 1)
186
187 def forward(self, x):
188 _, h_n = self.rnn(x)
189 return self.fc(h_n[-1]).squeeze(-1)
190
191model = SinPredictor(hidden_size=32).to(device)
192criterion = nn.MSELoss()
193optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
194
195# ํ์ต
196from torch.utils.data import DataLoader, TensorDataset
197
198train_loader = DataLoader(
199 TensorDataset(X_train, y_train),
200 batch_size=64, shuffle=True
201)
202
203losses = []
204for epoch in range(50):
205 model.train()
206 epoch_loss = 0
207 for X_batch, y_batch in train_loader:
208 X_batch = X_batch.to(device)
209 y_batch = y_batch.to(device)
210
211 pred = model(X_batch)
212 loss = criterion(pred, y_batch)
213
214 optimizer.zero_grad()
215 loss.backward()
216
217 # ๊ธฐ์ธ๊ธฐ ํด๋ฆฌํ
218 torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
219
220 optimizer.step()
221 epoch_loss += loss.item()
222
223 losses.append(epoch_loss / len(train_loader))
224
225 if (epoch + 1) % 10 == 0:
226 print(f"Epoch {epoch+1}: Loss = {losses[-1]:.6f}")
227
228# ํ
์คํธ
229model.eval()
230with torch.no_grad():
231 X_test_dev = X_test.to(device)
232 pred_test = model(X_test_dev)
233 test_loss = criterion(pred_test, y_test.to(device))
234 print(f"\nํ
์คํธ MSE: {test_loss.item():.6f}")
235
236# ์๊ฐํ
237plt.figure(figsize=(12, 4))
238
239plt.subplot(1, 2, 1)
240plt.plot(losses)
241plt.xlabel('Epoch')
242plt.ylabel('Loss')
243plt.title('Training Loss')
244plt.grid(True, alpha=0.3)
245
246plt.subplot(1, 2, 2)
247plt.scatter(y_test.numpy()[:100], pred_test.cpu().numpy()[:100], alpha=0.5)
248plt.plot([-1, 1], [-1, 1], 'r--')
249plt.xlabel('True')
250plt.ylabel('Predicted')
251plt.title('Prediction vs True')
252plt.grid(True, alpha=0.3)
253
254plt.tight_layout()
255plt.savefig('rnn_sin_prediction.png', dpi=100)
256plt.close()
257print("๊ทธ๋ํ ์ ์ฅ: rnn_sin_prediction.png")
258
259
260# ============================================
261# 6. Many-to-Many RNN
262# ============================================
263print("\n[6] Many-to-Many RNN")
264print("-" * 40)
265
266class Seq2SeqRNN(nn.Module):
267 """์ํ์ค โ ์ํ์ค"""
268 def __init__(self, input_size, hidden_size, output_size):
269 super().__init__()
270 self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
271 self.fc = nn.Linear(hidden_size, output_size)
272
273 def forward(self, x):
274 output, _ = self.rnn(x)
275 # ๋ชจ๋ ์๊ฐ ๋จ๊ณ์ FC ์ ์ฉ
276 out = self.fc(output)
277 return out
278
279model_s2s = Seq2SeqRNN(10, 20, 5)
280x = torch.randn(4, 8, 10)
281out = model_s2s(x)
282print(f"Seq2Seq ์
๋ ฅ: {x.shape}")
283print(f"Seq2Seq ์ถ๋ ฅ: {out.shape}") # (4, 8, 5)
284
285
286# ============================================
287# 7. ๊ฐ๋ณ ๊ธธ์ด ์ํ์ค ์ฒ๋ฆฌ
288# ============================================
289print("\n[7] ๊ฐ๋ณ ๊ธธ์ด ์ํ์ค")
290print("-" * 40)
291
292from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
293
294# ๋ค์ํ ๊ธธ์ด์ ์ํ์ค (ํจ๋ฉ๋จ)
295sequences = [
296 torch.randn(5, 10), # ๊ธธ์ด 5
297 torch.randn(3, 10), # ๊ธธ์ด 3
298 torch.randn(7, 10), # ๊ธธ์ด 7
299]
300lengths = torch.tensor([5, 3, 7])
301
302# ํจ๋ฉ (๊ฐ์ฅ ๊ธด ์ํ์ค์ ๋ง์ถค)
303max_len = max(lengths)
304padded = torch.zeros(3, max_len, 10)
305for i, seq in enumerate(sequences):
306 padded[i, :len(seq)] = seq
307
308print(f"ํจ๋ฉ๋ ์ํ์ค: {padded.shape}")
309print(f"์ค์ ๊ธธ์ด: {lengths}")
310
311# ํจํน
312rnn = nn.RNN(10, 20, batch_first=True)
313packed = pack_padded_sequence(padded, lengths, batch_first=True, enforce_sorted=False)
314packed_output, h_n = rnn(packed)
315
316# ์ธํจํน
317output, output_lengths = pad_packed_sequence(packed_output, batch_first=True)
318print(f"์ธํจํน๋ ์ถ๋ ฅ: {output.shape}")
319
320
321# ============================================
322# 8. ๊ธฐ์ธ๊ธฐ ์์ค ์์ฐ
323# ============================================
324print("\n[8] ๊ธฐ์ธ๊ธฐ ์์ค ์์ฐ")
325print("-" * 40)
326
327def check_gradients(model, seq_len):
328 """์ํ์ค ๊ธธ์ด์ ๋ฐ๋ฅธ ๊ธฐ์ธ๊ธฐ ํ์ธ"""
329 model.train()
330 x = torch.randn(1, seq_len, 1, requires_grad=True)
331 output, h_n = model.rnn(x)
332 loss = h_n.sum()
333 loss.backward()
334
335 # ์ฒซ ๋ฒ์งธ ๊ฐ์ค์น์ ๊ธฐ์ธ๊ธฐ ํฌ๊ธฐ
336 grad_norm = model.rnn.weight_ih_l0.grad.norm().item()
337 return grad_norm
338
339model = SinPredictor(hidden_size=32)
340
341print("์ํ์ค ๊ธธ์ด์ ๋ฐ๋ฅธ ๊ธฐ์ธ๊ธฐ ํฌ๊ธฐ:")
342for seq_len in [10, 50, 100, 200]:
343 grad = check_gradients(model, seq_len)
344 print(f" ๊ธธ์ด {seq_len:3d}: ๊ธฐ์ธ๊ธฐ norm = {grad:.6f}")
345
346
347# ============================================
348# ์ ๋ฆฌ
349# ============================================
350print("\n" + "=" * 60)
351print("RNN ๊ธฐ์ด ์ ๋ฆฌ")
352print("=" * 60)
353
354summary = """
355RNN ํต์ฌ:
356 h(t) = tanh(W_xh ร x(t) + W_hh ร h(t-1) + b)
357
358PyTorch RNN:
359 rnn = nn.RNN(input_size, hidden_size, batch_first=True)
360 output, h_n = rnn(x)
361 # output: (batch, seq, hidden) - ๋ชจ๋ ์๊ฐ
362 # h_n: (layers, batch, hidden) - ๋ง์ง๋ง๋ง
363
364๋ถ๋ฅ ํจํด:
365 # ๋ง์ง๋ง ์๋ ์ํ ์ฌ์ฉ
366 output = fc(h_n[-1])
367
368Seq2Seq ํจํด:
369 # ๋ชจ๋ ์๊ฐ ์๋ ์ํ ์ฌ์ฉ
370 output = fc(rnn_output)
371
372์ฃผ์์ฌํญ:
3731. ๊ธฐ์ธ๊ธฐ ํด๋ฆฌํ ์ฌ์ฉ
3742. ๊ธด ์ํ์ค โ LSTM/GRU ์ฌ์ฉ
3753. batch_first ํ์ธ
3764. ๊ฐ๋ณ ๊ธธ์ด โ pack_padded_sequence
377"""
378print(summary)
379print("=" * 60)