08_rnn_basic.py

Download
python 380 lines 10.1 KB
  1"""
  208. RNN ๊ธฐ์ดˆ (Recurrent Neural Networks)
  3
  4์ˆœํ™˜ ์‹ ๊ฒฝ๋ง์˜ ๊ธฐ๋ณธ ๊ฐœ๋…๊ณผ PyTorch ๊ตฌํ˜„์„ ํ•™์Šตํ•ฉ๋‹ˆ๋‹ค.
  5"""
  6
  7import torch
  8import torch.nn as nn
  9import numpy as np
 10import matplotlib.pyplot as plt
 11
 12print("=" * 60)
 13print("PyTorch RNN ๊ธฐ์ดˆ")
 14print("=" * 60)
 15
 16device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 17print(f"์‚ฌ์šฉ ์žฅ์น˜: {device}")
 18
 19
 20# ============================================
 21# 1. RNN ๊ธฐ๋ณธ ์ดํ•ด
 22# ============================================
 23print("\n[1] RNN ๊ธฐ๋ณธ ์ดํ•ด")
 24print("-" * 40)
 25
 26# ๋‹จ์ˆœ RNN ์…€ ์ˆ˜๋™ ๊ตฌํ˜„
 27class SimpleRNNCell:
 28    """RNN ์…€ ์ˆ˜๋™ ๊ตฌํ˜„ (์ดํ•ด์šฉ)"""
 29    def __init__(self, input_size, hidden_size):
 30        # ๊ฐ€์ค‘์น˜ ์ดˆ๊ธฐํ™”
 31        self.W_xh = np.random.randn(input_size, hidden_size) * 0.1
 32        self.W_hh = np.random.randn(hidden_size, hidden_size) * 0.1
 33        self.b = np.zeros(hidden_size)
 34
 35    def forward(self, x, h_prev):
 36        """
 37        x: ํ˜„์žฌ ์ž…๋ ฅ (input_size,)
 38        h_prev: ์ด์ „ ์€๋‹‰ ์ƒํƒœ (hidden_size,)
 39        """
 40        h_new = np.tanh(x @ self.W_xh + h_prev @ self.W_hh + self.b)
 41        return h_new
 42
 43# ํ…Œ์ŠคํŠธ
 44cell = SimpleRNNCell(input_size=3, hidden_size=5)
 45h = np.zeros(5)
 46
 47print("์ˆ˜๋™ RNN ์…€ ์ˆœ์ „ํŒŒ:")
 48for t in range(4):
 49    x = np.random.randn(3)
 50    h = cell.forward(x, h)
 51    print(f"  t={t}: h = {h[:3]}...")
 52
 53
 54# ============================================
 55# 2. PyTorch nn.RNN
 56# ============================================
 57print("\n[2] PyTorch nn.RNN")
 58print("-" * 40)
 59
 60# RNN ๋ ˆ์ด์–ด ์ƒ์„ฑ
 61rnn = nn.RNN(
 62    input_size=10,    # ์ž…๋ ฅ ํŠน์„ฑ ์ฐจ์›
 63    hidden_size=20,   # ์€๋‹‰ ์ƒํƒœ ์ฐจ์›
 64    num_layers=2,     # RNN ์ธต ์ˆ˜
 65    batch_first=True, # ์ž…๋ ฅ: (batch, seq, feature)
 66    dropout=0.1       # ์ธต ๊ฐ„ ๋“œ๋กญ์•„์›ƒ
 67)
 68
 69# ์ž…๋ ฅ ์ƒ์„ฑ
 70batch_size = 4
 71seq_len = 8
 72x = torch.randn(batch_size, seq_len, 10)
 73
 74# ์ˆœ์ „ํŒŒ
 75output, h_n = rnn(x)
 76
 77print(f"์ž…๋ ฅ: {x.shape}")
 78print(f"output (๋ชจ๋“  ์‹œ๊ฐ„ ์€๋‹‰์ƒํƒœ): {output.shape}")
 79print(f"h_n (๋งˆ์ง€๋ง‰ ์€๋‹‰์ƒํƒœ): {h_n.shape}")
 80
 81# ์ดˆ๊ธฐ ์€๋‹‰ ์ƒํƒœ ์ง€์ •
 82h0 = torch.zeros(2, batch_size, 20)  # (num_layers, batch, hidden)
 83output, h_n = rnn(x, h0)
 84print(f"\n์ดˆ๊ธฐ ์ƒํƒœ ์ง€์ •: h0 shape = {h0.shape}")
 85
 86
 87# ============================================
 88# 3. ์–‘๋ฐฉํ–ฅ RNN
 89# ============================================
 90print("\n[3] ์–‘๋ฐฉํ–ฅ RNN")
 91print("-" * 40)
 92
 93rnn_bi = nn.RNN(
 94    input_size=10,
 95    hidden_size=20,
 96    num_layers=1,
 97    batch_first=True,
 98    bidirectional=True
 99)
100
101output_bi, h_n_bi = rnn_bi(x)
102
103print(f"์–‘๋ฐฉํ–ฅ RNN:")
104print(f"  output: {output_bi.shape}")  # (batch, seq, hidden*2)
105print(f"  h_n: {h_n_bi.shape}")        # (2, batch, hidden)
106
107# ์ •๋ฐฉํ–ฅ/์—ญ๋ฐฉํ–ฅ ๋ถ„๋ฆฌ
108forward_out = output_bi[:, :, :20]
109backward_out = output_bi[:, :, 20:]
110print(f"  ์ •๋ฐฉํ–ฅ ์ถœ๋ ฅ: {forward_out.shape}")
111print(f"  ์—ญ๋ฐฉํ–ฅ ์ถœ๋ ฅ: {backward_out.shape}")
112
113
114# ============================================
115# 4. RNN ๋ถ„๋ฅ˜๊ธฐ
116# ============================================
117print("\n[4] RNN ๋ถ„๋ฅ˜๊ธฐ")
118print("-" * 40)
119
120class RNNClassifier(nn.Module):
121    """์‹œํ€€์Šค ๋ถ„๋ฅ˜์šฉ RNN"""
122    def __init__(self, input_size, hidden_size, num_classes, num_layers=1):
123        super().__init__()
124        self.hidden_size = hidden_size
125        self.num_layers = num_layers
126
127        self.rnn = nn.RNN(
128            input_size, hidden_size,
129            num_layers=num_layers,
130            batch_first=True,
131            dropout=0.2 if num_layers > 1 else 0
132        )
133        self.fc = nn.Linear(hidden_size, num_classes)
134
135    def forward(self, x):
136        # x: (batch, seq, features)
137        output, h_n = self.rnn(x)
138
139        # ๋งˆ์ง€๋ง‰ ์ธต์˜ ๋งˆ์ง€๋ง‰ ์‹œ๊ฐ„ ์€๋‹‰ ์ƒํƒœ
140        last_hidden = h_n[-1]  # (batch, hidden)
141        out = self.fc(last_hidden)
142        return out
143
144# ํ…Œ์ŠคํŠธ
145model = RNNClassifier(input_size=10, hidden_size=32, num_classes=5)
146x = torch.randn(8, 15, 10)  # 8 ์ƒ˜ํ”Œ, 15 ์Šคํ…, 10 ํŠน์„ฑ
147out = model(x)
148print(f"๋ถ„๋ฅ˜๊ธฐ ์ž…๋ ฅ: {x.shape}")
149print(f"๋ถ„๋ฅ˜๊ธฐ ์ถœ๋ ฅ: {out.shape}")
150
151
152# ============================================
153# 5. ์‹œ๊ณ„์—ด ์˜ˆ์ธก (์‚ฌ์ธํŒŒ)
154# ============================================
155print("\n[5] ์‹œ๊ณ„์—ด ์˜ˆ์ธก (์‚ฌ์ธํŒŒ)")
156print("-" * 40)
157
158# ๋ฐ์ดํ„ฐ ์ƒ์„ฑ
159def generate_sin_data(seq_len=50, n_samples=1000):
160    X = []
161    y = []
162    for _ in range(n_samples):
163        start = np.random.uniform(0, 2*np.pi)
164        seq = np.sin(np.linspace(start, start + 4*np.pi, seq_len + 1))
165        X.append(seq[:-1].reshape(-1, 1))
166        y.append(seq[-1])
167    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
168
169X_train, y_train = generate_sin_data(seq_len=50, n_samples=1000)
170X_test, y_test = generate_sin_data(seq_len=50, n_samples=200)
171
172X_train = torch.from_numpy(X_train)
173y_train = torch.from_numpy(y_train)
174X_test = torch.from_numpy(X_test)
175y_test = torch.from_numpy(y_test)
176
177print(f"ํ›ˆ๋ จ ๋ฐ์ดํ„ฐ: X={X_train.shape}, y={y_train.shape}")
178print(f"ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ: X={X_test.shape}, y={y_test.shape}")
179
180# ๋ชจ๋ธ
181class SinPredictor(nn.Module):
182    def __init__(self, hidden_size=32):
183        super().__init__()
184        self.rnn = nn.RNN(1, hidden_size, batch_first=True)
185        self.fc = nn.Linear(hidden_size, 1)
186
187    def forward(self, x):
188        _, h_n = self.rnn(x)
189        return self.fc(h_n[-1]).squeeze(-1)
190
191model = SinPredictor(hidden_size=32).to(device)
192criterion = nn.MSELoss()
193optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
194
195# ํ•™์Šต
196from torch.utils.data import DataLoader, TensorDataset
197
198train_loader = DataLoader(
199    TensorDataset(X_train, y_train),
200    batch_size=64, shuffle=True
201)
202
203losses = []
204for epoch in range(50):
205    model.train()
206    epoch_loss = 0
207    for X_batch, y_batch in train_loader:
208        X_batch = X_batch.to(device)
209        y_batch = y_batch.to(device)
210
211        pred = model(X_batch)
212        loss = criterion(pred, y_batch)
213
214        optimizer.zero_grad()
215        loss.backward()
216
217        # ๊ธฐ์šธ๊ธฐ ํด๋ฆฌํ•‘
218        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
219
220        optimizer.step()
221        epoch_loss += loss.item()
222
223    losses.append(epoch_loss / len(train_loader))
224
225    if (epoch + 1) % 10 == 0:
226        print(f"Epoch {epoch+1}: Loss = {losses[-1]:.6f}")
227
228# ํ…Œ์ŠคํŠธ
229model.eval()
230with torch.no_grad():
231    X_test_dev = X_test.to(device)
232    pred_test = model(X_test_dev)
233    test_loss = criterion(pred_test, y_test.to(device))
234    print(f"\nํ…Œ์ŠคํŠธ MSE: {test_loss.item():.6f}")
235
236# ์‹œ๊ฐํ™”
237plt.figure(figsize=(12, 4))
238
239plt.subplot(1, 2, 1)
240plt.plot(losses)
241plt.xlabel('Epoch')
242plt.ylabel('Loss')
243plt.title('Training Loss')
244plt.grid(True, alpha=0.3)
245
246plt.subplot(1, 2, 2)
247plt.scatter(y_test.numpy()[:100], pred_test.cpu().numpy()[:100], alpha=0.5)
248plt.plot([-1, 1], [-1, 1], 'r--')
249plt.xlabel('True')
250plt.ylabel('Predicted')
251plt.title('Prediction vs True')
252plt.grid(True, alpha=0.3)
253
254plt.tight_layout()
255plt.savefig('rnn_sin_prediction.png', dpi=100)
256plt.close()
257print("๊ทธ๋ž˜ํ”„ ์ €์žฅ: rnn_sin_prediction.png")
258
259
260# ============================================
261# 6. Many-to-Many RNN
262# ============================================
263print("\n[6] Many-to-Many RNN")
264print("-" * 40)
265
266class Seq2SeqRNN(nn.Module):
267    """์‹œํ€€์Šค โ†’ ์‹œํ€€์Šค"""
268    def __init__(self, input_size, hidden_size, output_size):
269        super().__init__()
270        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
271        self.fc = nn.Linear(hidden_size, output_size)
272
273    def forward(self, x):
274        output, _ = self.rnn(x)
275        # ๋ชจ๋“  ์‹œ๊ฐ„ ๋‹จ๊ณ„์— FC ์ ์šฉ
276        out = self.fc(output)
277        return out
278
279model_s2s = Seq2SeqRNN(10, 20, 5)
280x = torch.randn(4, 8, 10)
281out = model_s2s(x)
282print(f"Seq2Seq ์ž…๋ ฅ: {x.shape}")
283print(f"Seq2Seq ์ถœ๋ ฅ: {out.shape}")  # (4, 8, 5)
284
285
286# ============================================
287# 7. ๊ฐ€๋ณ€ ๊ธธ์ด ์‹œํ€€์Šค ์ฒ˜๋ฆฌ
288# ============================================
289print("\n[7] ๊ฐ€๋ณ€ ๊ธธ์ด ์‹œํ€€์Šค")
290print("-" * 40)
291
292from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
293
294# ๋‹ค์–‘ํ•œ ๊ธธ์ด์˜ ์‹œํ€€์Šค (ํŒจ๋”ฉ๋จ)
295sequences = [
296    torch.randn(5, 10),   # ๊ธธ์ด 5
297    torch.randn(3, 10),   # ๊ธธ์ด 3
298    torch.randn(7, 10),   # ๊ธธ์ด 7
299]
300lengths = torch.tensor([5, 3, 7])
301
302# ํŒจ๋”ฉ (๊ฐ€์žฅ ๊ธด ์‹œํ€€์Šค์— ๋งž์ถค)
303max_len = max(lengths)
304padded = torch.zeros(3, max_len, 10)
305for i, seq in enumerate(sequences):
306    padded[i, :len(seq)] = seq
307
308print(f"ํŒจ๋”ฉ๋œ ์‹œํ€€์Šค: {padded.shape}")
309print(f"์‹ค์ œ ๊ธธ์ด: {lengths}")
310
311# ํŒจํ‚น
312rnn = nn.RNN(10, 20, batch_first=True)
313packed = pack_padded_sequence(padded, lengths, batch_first=True, enforce_sorted=False)
314packed_output, h_n = rnn(packed)
315
316# ์–ธํŒจํ‚น
317output, output_lengths = pad_packed_sequence(packed_output, batch_first=True)
318print(f"์–ธํŒจํ‚น๋œ ์ถœ๋ ฅ: {output.shape}")
319
320
321# ============================================
322# 8. ๊ธฐ์šธ๊ธฐ ์†Œ์‹ค ์‹œ์—ฐ
323# ============================================
324print("\n[8] ๊ธฐ์šธ๊ธฐ ์†Œ์‹ค ์‹œ์—ฐ")
325print("-" * 40)
326
327def check_gradients(model, seq_len):
328    """์‹œํ€€์Šค ๊ธธ์ด์— ๋”ฐ๋ฅธ ๊ธฐ์šธ๊ธฐ ํ™•์ธ"""
329    model.train()
330    x = torch.randn(1, seq_len, 1, requires_grad=True)
331    output, h_n = model.rnn(x)
332    loss = h_n.sum()
333    loss.backward()
334
335    # ์ฒซ ๋ฒˆ์งธ ๊ฐ€์ค‘์น˜์˜ ๊ธฐ์šธ๊ธฐ ํฌ๊ธฐ
336    grad_norm = model.rnn.weight_ih_l0.grad.norm().item()
337    return grad_norm
338
339model = SinPredictor(hidden_size=32)
340
341print("์‹œํ€€์Šค ๊ธธ์ด์— ๋”ฐ๋ฅธ ๊ธฐ์šธ๊ธฐ ํฌ๊ธฐ:")
342for seq_len in [10, 50, 100, 200]:
343    grad = check_gradients(model, seq_len)
344    print(f"  ๊ธธ์ด {seq_len:3d}: ๊ธฐ์šธ๊ธฐ norm = {grad:.6f}")
345
346
347# ============================================
348# ์ •๋ฆฌ
349# ============================================
350print("\n" + "=" * 60)
351print("RNN ๊ธฐ์ดˆ ์ •๋ฆฌ")
352print("=" * 60)
353
354summary = """
355RNN ํ•ต์‹ฌ:
356    h(t) = tanh(W_xh ร— x(t) + W_hh ร— h(t-1) + b)
357
358PyTorch RNN:
359    rnn = nn.RNN(input_size, hidden_size, batch_first=True)
360    output, h_n = rnn(x)
361    # output: (batch, seq, hidden) - ๋ชจ๋“  ์‹œ๊ฐ„
362    # h_n: (layers, batch, hidden) - ๋งˆ์ง€๋ง‰๋งŒ
363
364๋ถ„๋ฅ˜ ํŒจํ„ด:
365    # ๋งˆ์ง€๋ง‰ ์€๋‹‰ ์ƒํƒœ ์‚ฌ์šฉ
366    output = fc(h_n[-1])
367
368Seq2Seq ํŒจํ„ด:
369    # ๋ชจ๋“  ์‹œ๊ฐ„ ์€๋‹‰ ์ƒํƒœ ์‚ฌ์šฉ
370    output = fc(rnn_output)
371
372์ฃผ์˜์‚ฌํ•ญ:
3731. ๊ธฐ์šธ๊ธฐ ํด๋ฆฌํ•‘ ์‚ฌ์šฉ
3742. ๊ธด ์‹œํ€€์Šค โ†’ LSTM/GRU ์‚ฌ์šฉ
3753. batch_first ํ™•์ธ
3764. ๊ฐ€๋ณ€ ๊ธธ์ด โ†’ pack_padded_sequence
377"""
378print(summary)
379print("=" * 60)