08_time_series.py

Download
python 452 lines 13.5 KB
  1"""
  208_time_series.py
  3
  4Demonstrates time series analysis techniques:
  5- Moving average
  6- Exponential smoothing
  7- Autocorrelation
  8- Stationarity (ADF test concept)
  9- AR/MA/ARMA models
 10- Simple forecasting
 11"""
 12
 13import numpy as np
 14from scipy import stats
 15
 16try:
 17    import matplotlib.pyplot as plt
 18    HAS_PLT = True
 19except ImportError:
 20    HAS_PLT = False
 21    print("matplotlib not available; skipping plots\n")
 22
 23
 24def print_section(title):
 25    """Print formatted section header."""
 26    print("\n" + "=" * 70)
 27    print(f"  {title}")
 28    print("=" * 70)
 29
 30
 31def moving_average():
 32    """Demonstrate moving average smoothing."""
 33    print_section("1. Moving Average")
 34
 35    # Generate time series with trend and noise
 36    np.random.seed(42)
 37    n = 200
 38    t = np.arange(n)
 39    trend = 0.5 * t
 40    seasonal = 10 * np.sin(2 * np.pi * t / 20)
 41    noise = np.random.normal(0, 5, n)
 42    y = trend + seasonal + noise
 43
 44    print(f"Generated time series: n = {n}")
 45    print(f"Components: trend + seasonal + noise")
 46
 47    # Different window sizes
 48    windows = [5, 10, 20]
 49
 50    print(f"\nMoving averages with different windows:")
 51
 52    ma_results = {}
 53    for window in windows:
 54        # Simple moving average
 55        ma = np.convolve(y, np.ones(window) / window, mode='valid')
 56        ma_results[window] = ma
 57
 58        # MSE compared to trend+seasonal (without noise)
 59        true_signal = (trend + seasonal)[window-1:]
 60        mse = np.mean((ma - true_signal)**2)
 61
 62        print(f"\n  Window size {window}:")
 63        print(f"    Output length: {len(ma)}")
 64        print(f"    MSE (vs true signal): {mse:.2f}")
 65        print(f"    Mean: {np.mean(ma):.2f}")
 66        print(f"    Std: {np.std(ma, ddof=1):.2f}")
 67
 68    if HAS_PLT:
 69        plt.figure(figsize=(12, 8))
 70
 71        plt.subplot(2, 1, 1)
 72        plt.plot(t, y, 'gray', alpha=0.5, label='Original')
 73        plt.plot(t, trend + seasonal, 'black', linewidth=2, label='True signal')
 74        plt.xlabel('Time')
 75        plt.ylabel('Value')
 76        plt.title('Original Time Series')
 77        plt.legend()
 78        plt.grid(True, alpha=0.3)
 79
 80        plt.subplot(2, 1, 2)
 81        for window in windows:
 82            ma = ma_results[window]
 83            t_ma = t[window-1:]
 84            plt.plot(t_ma, ma, label=f'MA({window})', linewidth=2)
 85        plt.xlabel('Time')
 86        plt.ylabel('Value')
 87        plt.title('Moving Averages')
 88        plt.legend()
 89        plt.grid(True, alpha=0.3)
 90
 91        plt.tight_layout()
 92        plt.savefig('/tmp/moving_average.png', dpi=100)
 93        print("\n[Plot saved to /tmp/moving_average.png]")
 94        plt.close()
 95
 96
 97def exponential_smoothing():
 98    """Demonstrate exponential smoothing."""
 99    print_section("2. Exponential Smoothing")
100
101    # Generate time series
102    np.random.seed(123)
103    n = 150
104    t = np.arange(n)
105    level = 50 + 0.3 * t
106    noise = np.random.normal(0, 3, n)
107    y = level + noise
108
109    print(f"Generated time series: n = {n}")
110
111    # Different smoothing parameters
112    alphas = [0.1, 0.3, 0.7, 0.9]
113
114    print(f"\nExponential smoothing with different α:")
115
116    es_results = {}
117    for alpha in alphas:
118        # Simple exponential smoothing
119        smoothed = np.zeros(n)
120        smoothed[0] = y[0]
121
122        for i in range(1, n):
123            smoothed[i] = alpha * y[i] + (1 - alpha) * smoothed[i-1]
124
125        es_results[alpha] = smoothed
126
127        # MSE
128        mse = np.mean((smoothed - level)**2)
129
130        print(f"\n  α = {alpha}:")
131        print(f"    MSE: {mse:.2f}")
132        print(f"    Final value: {smoothed[-1]:.2f}")
133
134        # Lag (measure of responsiveness)
135        lag = np.mean(level - smoothed)
136        print(f"    Average lag: {lag:.2f}")
137
138    print(f"\nSmaller α: more smoothing, less responsive")
139    print(f"Larger α: less smoothing, more responsive")
140
141    if HAS_PLT:
142        plt.figure(figsize=(12, 6))
143        plt.plot(t, y, 'gray', alpha=0.4, label='Original', linewidth=1)
144        plt.plot(t, level, 'black', linestyle='--', linewidth=2, label='True level')
145
146        for alpha in alphas:
147            plt.plot(t, es_results[alpha], label=f'α={alpha}', linewidth=2)
148
149        plt.xlabel('Time')
150        plt.ylabel('Value')
151        plt.title('Exponential Smoothing')
152        plt.legend()
153        plt.grid(True, alpha=0.3)
154        plt.savefig('/tmp/exponential_smoothing.png', dpi=100)
155        print("\n[Plot saved to /tmp/exponential_smoothing.png]")
156        plt.close()
157
158
159def autocorrelation_analysis():
160    """Demonstrate autocorrelation analysis."""
161    print_section("3. Autocorrelation Analysis")
162
163    # Generate AR(1) process
164    np.random.seed(456)
165    n = 300
166    phi = 0.7
167    y = np.zeros(n)
168    y[0] = np.random.normal(0, 1)
169
170    for t in range(1, n):
171        y[t] = phi * y[t-1] + np.random.normal(0, 1)
172
173    print(f"Generated AR(1) process: y_t = {phi}*y_{{t-1}} + ε_t")
174    print(f"Sample size: {n}")
175
176    # Calculate autocorrelation function (ACF)
177    max_lag = 20
178    acf = np.zeros(max_lag + 1)
179
180    y_mean = np.mean(y)
181    var = np.var(y, ddof=1)
182
183    for lag in range(max_lag + 1):
184        if lag == 0:
185            acf[lag] = 1.0
186        else:
187            cov = np.mean((y[:-lag] - y_mean) * (y[lag:] - y_mean))
188            acf[lag] = cov / var
189
190    print(f"\nAutocorrelation function (ACF):")
191    print(f"  Lag    ACF    Theoretical")
192    for lag in range(min(11, max_lag + 1)):
193        theoretical = phi**lag  # For AR(1)
194        print(f"  {lag:3d}  {acf[lag]:6.3f}    {theoretical:6.3f}")
195
196    # Ljung-Box test for autocorrelation
197    # Test statistic Q = n(n+2) * sum((ρ_k)^2 / (n-k))
198    h = 10  # Test up to lag h
199    Q = n * (n + 2) * np.sum(acf[1:h+1]**2 / (n - np.arange(1, h + 1)))
200    p_value = 1 - stats.chi2.cdf(Q, h)
201
202    print(f"\nLjung-Box test (H₀: no autocorrelation up to lag {h}):")
203    print(f"  Q statistic: {Q:.4f}")
204    print(f"  p-value: {p_value:.6f}")
205    if p_value < 0.05:
206        print(f"  Significant autocorrelation detected")
207
208    if HAS_PLT:
209        fig, axes = plt.subplots(2, 1, figsize=(12, 8))
210
211        # Time series
212        axes[0].plot(y, linewidth=1)
213        axes[0].set_xlabel('Time')
214        axes[0].set_ylabel('Value')
215        axes[0].set_title(f'AR(1) Process (φ={phi})')
216        axes[0].grid(True, alpha=0.3)
217
218        # ACF
219        axes[1].bar(range(max_lag + 1), acf, alpha=0.7)
220        axes[1].plot(range(max_lag + 1), [phi**lag for lag in range(max_lag + 1)],
221                    'r--', linewidth=2, label='Theoretical')
222        # Confidence bands (approximate)
223        conf_level = 1.96 / np.sqrt(n)
224        axes[1].axhline(conf_level, color='blue', linestyle='--', alpha=0.5)
225        axes[1].axhline(-conf_level, color='blue', linestyle='--', alpha=0.5)
226        axes[1].set_xlabel('Lag')
227        axes[1].set_ylabel('ACF')
228        axes[1].set_title('Autocorrelation Function')
229        axes[1].legend()
230        axes[1].grid(True, alpha=0.3)
231
232        plt.tight_layout()
233        plt.savefig('/tmp/autocorrelation.png', dpi=100)
234        print("\n[Plot saved to /tmp/autocorrelation.png]")
235        plt.close()
236
237
238def stationarity_test():
239    """Demonstrate stationarity testing concept."""
240    print_section("4. Stationarity Testing")
241
242    print("Comparing stationary vs non-stationary series\n")
243
244    np.random.seed(789)
245    n = 200
246
247    # Stationary: AR(1) with |phi| < 1
248    phi = 0.6
249    y_stationary = np.zeros(n)
250    for t in range(1, n):
251        y_stationary[t] = phi * y_stationary[t-1] + np.random.normal(0, 1)
252
253    # Non-stationary: Random walk
254    y_rw = np.cumsum(np.random.normal(0, 1, n))
255
256    print("Series 1: AR(1) with φ=0.6 (stationary)")
257    print(f"  Mean: {np.mean(y_stationary):.2f}")
258    print(f"  Variance: {np.var(y_stationary, ddof=1):.2f}")
259
260    print("\nSeries 2: Random walk (non-stationary)")
261    print(f"  Mean: {np.mean(y_rw):.2f}")
262    print(f"  Variance: {np.var(y_rw, ddof=1):.2f}")
263
264    # Simple stationarity check: variance in first vs second half
265    n_half = n // 2
266
267    var1_stat = np.var(y_stationary[:n_half], ddof=1)
268    var2_stat = np.var(y_stationary[n_half:], ddof=1)
269
270    var1_rw = np.var(y_rw[:n_half], ddof=1)
271    var2_rw = np.var(y_rw[n_half:], ddof=1)
272
273    print(f"\nVariance comparison (first half vs second half):")
274    print(f"  Stationary series: {var1_stat:.2f} vs {var2_stat:.2f} (ratio: {var2_stat/var1_stat:.2f})")
275    print(f"  Random walk: {var1_rw:.2f} vs {var2_rw:.2f} (ratio: {var2_rw/var1_rw:.2f})")
276
277    print(f"\nStationary series: variance stable")
278    print(f"Random walk: variance increases over time")
279
280    # Differencing the random walk
281    y_rw_diff = np.diff(y_rw)
282
283    print(f"\nDifferenced random walk:")
284    print(f"  Variance: {np.var(y_rw_diff, ddof=1):.2f}")
285    print(f"  (Differencing makes random walk stationary)")
286
287
288def ar_ma_models():
289    """Demonstrate AR, MA, and ARMA models."""
290    print_section("5. AR, MA, and ARMA Models")
291
292    np.random.seed(111)
293    n = 300
294
295    # AR(1) model: y_t = 0.7*y_{t-1} + ε_t
296    print("AR(1) model: y_t = 0.7*y_{t-1} + ε_t")
297    phi = 0.7
298    y_ar = np.zeros(n)
299    eps = np.random.normal(0, 1, n)
300    for t in range(1, n):
301        y_ar[t] = phi * y_ar[t-1] + eps[t]
302
303    print(f"  Mean: {np.mean(y_ar):.2f}")
304    print(f"  Variance: {np.var(y_ar, ddof=1):.2f}")
305    print(f"  Theoretical variance: {1/(1-phi**2):.2f}")
306
307    # MA(1) model: y_t = ε_t + 0.6*ε_{t-1}
308    print(f"\nMA(1) model: y_t = ε_t + 0.6*ε_{{t-1}}")
309    theta = 0.6
310    y_ma = np.zeros(n)
311    for t in range(1, n):
312        y_ma[t] = eps[t] + theta * eps[t-1]
313
314    print(f"  Mean: {np.mean(y_ma):.2f}")
315    print(f"  Variance: {np.var(y_ma, ddof=1):.2f}")
316    print(f"  Theoretical variance: {1 + theta**2:.2f}")
317
318    # ARMA(1,1) model: y_t = 0.5*y_{t-1} + ε_t + 0.4*ε_{t-1}
319    print(f"\nARMA(1,1) model: y_t = 0.5*y_{{t-1}} + ε_t + 0.4*ε_{{t-1}}")
320    phi_arma = 0.5
321    theta_arma = 0.4
322    y_arma = np.zeros(n)
323    for t in range(1, n):
324        y_arma[t] = phi_arma * y_arma[t-1] + eps[t] + theta_arma * eps[t-1]
325
326    print(f"  Mean: {np.mean(y_arma):.2f}")
327    print(f"  Variance: {np.var(y_arma, ddof=1):.2f}")
328
329    if HAS_PLT:
330        fig, axes = plt.subplots(3, 1, figsize=(12, 10))
331
332        axes[0].plot(y_ar, linewidth=1)
333        axes[0].set_ylabel('Value')
334        axes[0].set_title(f'AR(1): y_t = {phi}*y_{{t-1}} + ε_t')
335        axes[0].grid(True, alpha=0.3)
336
337        axes[1].plot(y_ma, linewidth=1, color='orange')
338        axes[1].set_ylabel('Value')
339        axes[1].set_title(f'MA(1): y_t = ε_t + {theta}*ε_{{t-1}}')
340        axes[1].grid(True, alpha=0.3)
341
342        axes[2].plot(y_arma, linewidth=1, color='green')
343        axes[2].set_xlabel('Time')
344        axes[2].set_ylabel('Value')
345        axes[2].set_title(f'ARMA(1,1): y_t = {phi_arma}*y_{{t-1}} + ε_t + {theta_arma}*ε_{{t-1}}')
346        axes[2].grid(True, alpha=0.3)
347
348        plt.tight_layout()
349        plt.savefig('/tmp/ar_ma_models.png', dpi=100)
350        print("\n[Plot saved to /tmp/ar_ma_models.png]")
351        plt.close()
352
353
354def simple_forecasting():
355    """Demonstrate simple forecasting methods."""
356    print_section("6. Simple Forecasting")
357
358    # Generate data
359    np.random.seed(222)
360    n_train = 100
361    n_test = 20
362    n_total = n_train + n_test
363
364    # AR(1) process
365    phi = 0.8
366    y = np.zeros(n_total)
367    for t in range(1, n_total):
368        y[t] = phi * y[t-1] + np.random.normal(0, 1)
369
370    y_train = y[:n_train]
371    y_test = y[n_train:]
372
373    print(f"Training data: {n_train} observations")
374    print(f"Test data: {n_test} observations")
375
376    # Method 1: Mean forecast
377    forecast_mean = np.full(n_test, np.mean(y_train))
378
379    # Method 2: Last value (naive forecast)
380    forecast_naive = np.full(n_test, y_train[-1])
381
382    # Method 3: AR(1) forecast (assuming phi is estimated)
383    phi_hat = np.corrcoef(y_train[:-1], y_train[1:])[0, 1]
384    forecast_ar = np.zeros(n_test)
385    forecast_ar[0] = phi_hat * y_train[-1]
386    for h in range(1, n_test):
387        forecast_ar[h] = phi_hat * forecast_ar[h-1]
388
389    print(f"\nEstimated φ: {phi_hat:.4f} (true: {phi})")
390
391    # Calculate forecast errors
392    methods = [
393        ("Mean", forecast_mean),
394        ("Naive", forecast_naive),
395        ("AR(1)", forecast_ar)
396    ]
397
398    print(f"\nForecast performance:")
399    print(f"  {'Method':<10} {'MAE':>8} {'RMSE':>8}")
400    print("-" * 30)
401
402    for name, forecast in methods:
403        mae = np.mean(np.abs(y_test - forecast))
404        rmse = np.sqrt(np.mean((y_test - forecast)**2))
405        print(f"  {name:<10} {mae:>8.3f} {rmse:>8.3f}")
406
407    if HAS_PLT:
408        plt.figure(figsize=(12, 6))
409
410        t_train = np.arange(n_train)
411        t_test = np.arange(n_train, n_total)
412
413        plt.plot(t_train, y_train, 'b-', linewidth=2, label='Training data')
414        plt.plot(t_test, y_test, 'black', linewidth=2, marker='o', label='Test data')
415        plt.plot(t_test, forecast_mean, 'g--', linewidth=2, label='Mean forecast')
416        plt.plot(t_test, forecast_naive, 'orange', linestyle='--', linewidth=2, label='Naive forecast')
417        plt.plot(t_test, forecast_ar, 'r--', linewidth=2, label='AR(1) forecast')
418
419        plt.axvline(n_train - 0.5, color='gray', linestyle=':', linewidth=2)
420        plt.xlabel('Time')
421        plt.ylabel('Value')
422        plt.title('Time Series Forecasting')
423        plt.legend()
424        plt.grid(True, alpha=0.3)
425        plt.savefig('/tmp/forecasting.png', dpi=100)
426        print("\n[Plot saved to /tmp/forecasting.png]")
427        plt.close()
428
429
430def main():
431    """Run all demonstrations."""
432    print("=" * 70)
433    print("  TIME SERIES ANALYSIS DEMONSTRATIONS")
434    print("=" * 70)
435
436    np.random.seed(42)
437
438    moving_average()
439    exponential_smoothing()
440    autocorrelation_analysis()
441    stationarity_test()
442    ar_ma_models()
443    simple_forecasting()
444
445    print("\n" + "=" * 70)
446    print("  All demonstrations completed successfully!")
447    print("=" * 70)
448
449
450if __name__ == "__main__":
451    main()