1% presentation.tex
2% Complete Beamer presentation demonstrating various features
3% Showcases slides, blocks, columns, lists, math, and figures
4
5\documentclass[aspectratio=169]{beamer}
6
7% Theme selection
8\usetheme{Madrid}
9\usecolortheme{dolphin}
10
11% Packages
12\usepackage{graphicx}
13\usepackage{amsmath, amssymb}
14\usepackage{listings}
15\usepackage{tikz}
16\usetikzlibrary{shapes, arrows.meta, positioning}
17
18% Listings configuration for code
19\lstset{
20 basicstyle=\ttfamily\small,
21 keywordstyle=\color{blue},
22 commentstyle=\color{green!60!black},
23 stringstyle=\color{red},
24 numbers=left,
25 numberstyle=\tiny,
26 frame=single,
27 breaklines=true
28}
29
30% Title information
31\title[Deep Learning Intro]{Introduction to Deep Learning}
32\subtitle{Neural Networks and Applications}
33\author[J. Smith]{John Smith}
34\institute[MIT]{
35 Department of Computer Science\\
36 Massachusetts Institute of Technology
37}
38\date{\today}
39
40% Logo (uncomment if you have a logo file)
41% \logo{\includegraphics[height=0.8cm]{logo.png}}
42
43\begin{document}
44
45% ==================== Title Slide ====================
46\begin{frame}
47 \titlepage
48\end{frame}
49
50% ==================== Outline Slide ====================
51\begin{frame}{Outline}
52 \tableofcontents
53\end{frame}
54
55% ==================== Section 1 ====================
56\section{Introduction}
57
58\begin{frame}{What is Deep Learning?}
59 \begin{block}{Definition}
60 Deep Learning is a subset of machine learning that uses artificial neural
61 networks with multiple layers to progressively extract higher-level features
62 from raw input.
63 \end{block}
64
65 \vspace{0.5cm}
66
67 \begin{columns}[c]
68 \column{0.5\textwidth}
69 \textbf{Key Characteristics:}
70 \begin{itemize}
71 \item Multiple layers of processing
72 \item Automatic feature learning
73 \item End-to-end learning
74 \item Hierarchical representations
75 \end{itemize}
76
77 \column{0.5\textwidth}
78 \textbf{Applications:}
79 \begin{itemize}
80 \item Computer Vision
81 \item Natural Language Processing
82 \item Speech Recognition
83 \item Game Playing (AlphaGo)
84 \end{itemize}
85 \end{columns}
86\end{frame}
87
88\begin{frame}{History of Neural Networks}
89 \begin{itemize}
90 \item \textbf{1943:} McCulloch-Pitts neuron model
91 \item \textbf{1958:} Perceptron (Rosenblatt)
92 \item \textbf{1986:} Backpropagation popularized (Rumelhart et al.)
93 \item \textbf{1998:} LeNet-5 for digit recognition (LeCun)
94 \item \textbf{2012:} AlexNet wins ImageNet (Krizhevsky et al.)
95 \item \textbf{2017:} Transformer architecture (Vaswani et al.)
96 \item \textbf{2020s:} Large Language Models (GPT, BERT, etc.)
97 \end{itemize}
98
99 \pause
100
101 \begin{alertblock}{Key Breakthrough}
102 The availability of large datasets, powerful GPUs, and improved algorithms
103 led to the deep learning revolution in the 2010s.
104 \end{alertblock}
105\end{frame}
106
107% ==================== Section 2 ====================
108\section{Neural Network Fundamentals}
109
110\begin{frame}{The Artificial Neuron}
111 \begin{columns}[c]
112 \column{0.6\textwidth}
113 \begin{tikzpicture}[
114 neuron/.style={circle, draw=black, thick, minimum size=1.2cm, fill=blue!20},
115 arrow/.style={->,>=Stealth, thick}
116 ]
117 % Inputs
118 \foreach \i in {1,2,3} {
119 \node (x\i) at (0, -\i) {$x_\i$};
120 \node (w\i) at (1.5, -\i+0.3) {\small $w_\i$};
121 }
122 \node at (0, -4) {$\vdots$};
123 \node (xn) at (0, -5) {$x_n$};
124 \node (wn) at (1.5, -5+0.3) {\small $w_n$};
125
126 % Neuron
127 \node[neuron] (neuron) at (4, -3) {$\sum, \sigma$};
128
129 % Output
130 \node (output) at (6.5, -3) {$y$};
131
132 % Connections
133 \foreach \i in {1,2,3} {
134 \draw[arrow] (x\i) -- (neuron);
135 }
136 \draw[arrow] (xn) -- (neuron);
137 \draw[arrow] (neuron) -- (output);
138
139 % Bias
140 \node (bias) at (4, -1) {$b$};
141 \draw[arrow] (bias) -- (neuron);
142 \end{tikzpicture}
143
144 \column{0.4\textwidth}
145 \textbf{Mathematical Model:}
146 \begin{equation*}
147 y = \sigma\left(\sum_{i=1}^{n} w_i x_i + b\right)
148 \end{equation*}
149
150 Where:
151 \begin{itemize}
152 \item $x_i$: inputs
153 \item $w_i$: weights
154 \item $b$: bias
155 \item $\sigma$: activation function
156 \end{itemize}
157 \end{columns}
158\end{frame}
159
160\begin{frame}{Activation Functions}
161 \begin{columns}[t]
162 \column{0.33\textwidth}
163 \centering
164 \textbf{Sigmoid}
165 \begin{equation*}
166 \sigma(x) = \frac{1}{1 + e^{-x}}
167 \end{equation*}
168 \vspace{0.2cm}
169 \begin{itemize}
170 \item Range: (0, 1)
171 \item Smooth gradient
172 \item Vanishing gradient issue
173 \end{itemize}
174
175 \column{0.33\textwidth}
176 \centering
177 \textbf{ReLU}
178 \begin{equation*}
179 f(x) = \max(0, x)
180 \end{equation*}
181 \vspace{0.2cm}
182 \begin{itemize}
183 \item Range: $[0, \infty)$
184 \item Most popular
185 \item Computational efficiency
186 \end{itemize}
187
188 \column{0.33\textwidth}
189 \centering
190 \textbf{Tanh}
191 \begin{equation*}
192 \tanh(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}
193 \end{equation*}
194 \vspace{0.2cm}
195 \begin{itemize}
196 \item Range: (-1, 1)
197 \item Zero-centered
198 \item Similar issues as sigmoid
199 \end{itemize}
200 \end{columns}
201\end{frame}
202
203\begin{frame}[fragile]{Training Neural Networks}
204 \begin{block}{Backpropagation Algorithm}
205 The core algorithm for training neural networks, using gradient descent
206 to minimize the loss function.
207 \end{block}
208
209 \vspace{0.3cm}
210
211 \textbf{Key Steps:}
212 \begin{enumerate}
213 \item \textbf{Forward Pass:} Compute predictions
214 \item \textbf{Compute Loss:} Measure error
215 \item \textbf{Backward Pass:} Compute gradients
216 \item \textbf{Update Weights:} Gradient descent
217 \end{enumerate}
218
219 \vspace{0.3cm}
220
221 \begin{exampleblock}{Weight Update Rule}
222 \begin{equation*}
223 w_{new} = w_{old} - \eta \frac{\partial L}{\partial w}
224 \end{equation*}
225 where $\eta$ is the learning rate and $L$ is the loss function.
226 \end{exampleblock}
227\end{frame}
228
229% ==================== Section 3 ====================
230\section{Deep Learning Architectures}
231
232\begin{frame}{Convolutional Neural Networks (CNNs)}
233 \begin{columns}[c]
234 \column{0.5\textwidth}
235 \textbf{Key Components:}
236 \begin{itemize}
237 \item Convolutional layers
238 \item Pooling layers
239 \item Fully connected layers
240 \end{itemize}
241
242 \vspace{0.3cm}
243
244 \textbf{Advantages:}
245 \begin{itemize}
246 \item Translation invariance
247 \item Parameter sharing
248 \item Hierarchical feature learning
249 \end{itemize}
250
251 \column{0.5\textwidth}
252 \begin{center}
253 \textit{[CNN Architecture Diagram]}
254
255 \vspace{0.2cm}
256
257 \framebox{\parbox{4cm}{
258 Input Image\\
259 $\downarrow$\\
260 Convolution + ReLU\\
261 $\downarrow$\\
262 Pooling\\
263 $\downarrow$\\
264 Fully Connected\\
265 $\downarrow$\\
266 Output
267 }}
268 \end{center}
269 \end{columns}
270
271 \vspace{0.5cm}
272
273 \begin{alertblock}{Applications}
274 Image classification, object detection, image segmentation, face recognition
275 \end{alertblock}
276\end{frame}
277
278\begin{frame}{Recurrent Neural Networks (RNNs)}
279 \textbf{Designed for sequential data processing}
280
281 \vspace{0.3cm}
282
283 \begin{columns}[t]
284 \column{0.5\textwidth}
285 \textbf{Standard RNN Issues:}
286 \begin{itemize}
287 \item Vanishing gradients
288 \item Difficulty learning long-term dependencies
289 \item Limited memory
290 \end{itemize}
291
292 \vspace{0.3cm}
293
294 \textbf{Solutions:}
295 \begin{itemize}
296 \item LSTM (Long Short-Term Memory)
297 \item GRU (Gated Recurrent Unit)
298 \end{itemize}
299
300 \column{0.5\textwidth}
301 \textbf{LSTM Advantages:}
302 \begin{itemize}
303 \item Cell state for long-term memory
304 \item Gating mechanisms
305 \item Better gradient flow
306 \end{itemize}
307
308 \vspace{0.3cm}
309
310 \textbf{Applications:}
311 \begin{itemize}
312 \item Language modeling
313 \item Machine translation
314 \item Speech recognition
315 \item Time series prediction
316 \end{itemize}
317 \end{columns}
318\end{frame}
319
320\begin{frame}{Transformers}
321 \begin{block}{Revolutionary Architecture (2017)}
322 ``Attention is All You Need'' introduced the Transformer, which replaced
323 recurrence with self-attention mechanisms.
324 \end{block}
325
326 \vspace{0.3cm}
327
328 \begin{columns}[c]
329 \column{0.5\textwidth}
330 \textbf{Key Innovations:}
331 \begin{itemize}
332 \item Self-attention mechanism
333 \item Positional encoding
334 \item Parallel processing
335 \item Multi-head attention
336 \end{itemize}
337
338 \column{0.5\textwidth}
339 \textbf{Impact:}
340 \begin{itemize}
341 \item BERT (2018)
342 \item GPT series (2018-)
343 \item Vision Transformers (2020)
344 \item Foundation for modern LLMs
345 \end{itemize}
346 \end{columns}
347
348 \vspace{0.3cm}
349
350 \begin{exampleblock}{Self-Attention Formula}
351 \begin{equation*}
352 \text{Attention}(Q, K, V) = \text{softmax}\left(\frac{QK^T}{\sqrt{d_k}}\right)V
353 \end{equation*}
354 \end{exampleblock}
355\end{frame}
356
357% ==================== Section 4 ====================
358\section{Practical Considerations}
359
360\begin{frame}[fragile]{Training Best Practices}
361 \begin{columns}[t]
362 \column{0.5\textwidth}
363 \textbf{Data Preparation:}
364 \begin{itemize}
365 \item Data augmentation
366 \item Normalization
367 \item Train/validation/test split
368 \item Handling imbalanced datasets
369 \end{itemize}
370
371 \vspace{0.3cm}
372
373 \textbf{Hyperparameters:}
374 \begin{itemize}
375 \item Learning rate
376 \item Batch size
377 \item Number of epochs
378 \item Network architecture
379 \end{itemize}
380
381 \column{0.5\textwidth}
382 \textbf{Regularization:}
383 \begin{itemize}
384 \item Dropout
385 \item L1/L2 regularization
386 \item Batch normalization
387 \item Early stopping
388 \end{itemize}
389
390 \vspace{0.3cm}
391
392 \textbf{Optimization:}
393 \begin{itemize}
394 \item Adam optimizer
395 \item Learning rate scheduling
396 \item Gradient clipping
397 \item Mixed precision training
398 \end{itemize}
399 \end{columns}
400\end{frame}
401
402\begin{frame}[fragile]{PyTorch Example}
403 \begin{lstlisting}[language=Python]
404import torch
405import torch.nn as nn
406
407class SimpleNN(nn.Module):
408 def __init__(self):
409 super(SimpleNN, self).__init__()
410 self.fc1 = nn.Linear(784, 256)
411 self.fc2 = nn.Linear(256, 128)
412 self.fc3 = nn.Linear(128, 10)
413 self.relu = nn.ReLU()
414
415 def forward(self, x):
416 x = x.view(-1, 784) # Flatten
417 x = self.relu(self.fc1(x))
418 x = self.relu(self.fc2(x))
419 x = self.fc3(x)
420 return x
421 \end{lstlisting}
422\end{frame}
423
424% ==================== Section 5 ====================
425\section{Conclusion}
426
427\begin{frame}{Summary}
428 \begin{block}{What We Covered}
429 \begin{itemize}
430 \item Fundamentals of neural networks
431 \item Major deep learning architectures (CNN, RNN, Transformer)
432 \item Training algorithms and best practices
433 \item Practical implementation considerations
434 \end{itemize}
435 \end{block}
436
437 \vspace{0.5cm}
438
439 \begin{exampleblock}{Key Takeaways}
440 \begin{enumerate}
441 \item Deep learning has revolutionized AI
442 \item Architecture choice depends on the problem domain
443 \item Proper training requires careful hyperparameter tuning
444 \item Transformers are the current state-of-the-art for many tasks
445 \end{enumerate}
446 \end{exampleblock}
447\end{frame}
448
449\begin{frame}{Future Directions}
450 \begin{columns}[c]
451 \column{0.5\textwidth}
452 \textbf{Research Trends:}
453 \begin{itemize}
454 \item Scaling laws
455 \item Multimodal models
456 \item Efficient architectures
457 \item Neural architecture search
458 \item Explainable AI
459 \end{itemize}
460
461 \column{0.5\textwidth}
462 \textbf{Challenges:}
463 \begin{itemize}
464 \item Computational costs
465 \item Data requirements
466 \item Bias and fairness
467 \item Robustness
468 \item Interpretability
469 \end{itemize}
470 \end{columns}
471
472 \vspace{0.5cm}
473
474 \begin{alertblock}{The Future is Bright}
475 Deep learning continues to advance rapidly, with new breakthroughs
476 happening regularly. Stay curious and keep learning!
477 \end{alertblock}
478\end{frame}
479
480\begin{frame}[standout]
481 \Huge Thank You!
482
483 \vspace{1cm}
484
485 \Large Questions?
486
487 \vspace{1cm}
488
489 \normalsize
490 Contact: john.smith@mit.edu
491\end{frame}
492
493% ==================== Backup Slides ====================
494\appendix
495
496\begin{frame}{Additional Resources}
497 \textbf{Books:}
498 \begin{itemize}
499 \item Deep Learning (Goodfellow, Bengio, Courville)
500 \item Neural Networks and Deep Learning (Nielsen)
501 \item Dive into Deep Learning (Zhang et al.)
502 \end{itemize}
503
504 \vspace{0.3cm}
505
506 \textbf{Online Courses:}
507 \begin{itemize}
508 \item Stanford CS231n (Computer Vision)
509 \item Stanford CS224n (NLP)
510 \item fast.ai Practical Deep Learning
511 \end{itemize}
512
513 \vspace{0.3cm}
514
515 \textbf{Frameworks:}
516 \begin{itemize}
517 \item PyTorch: \url{https://pytorch.org}
518 \item TensorFlow: \url{https://tensorflow.org}
519 \end{itemize}
520\end{frame}
521
522\end{document}