Data is available here...
import datetime as datetime import pandas as pd import numpy as np import statsmodels.api as sm import seaborn as sns import matplotlib.pyplot as plt from statsmodels.tsa.seasonal import seasonal_decompose from statsmodels.tsa.stattools import adfuller # Import the sample streamflow dataset df=pd.read_csv(r'F:\Python\TimeSeries\USGS-Monthly_Streamflow_Bend_OR.txt', sep='\t', header=0, low_memory=False, encoding='latin-1') # The yyyy,mm, and dd are in seperate columns, we need to make this a single column df['dti'] = pd.to_datetime(df['year_nu'].astype(str)+'-'+df['month_nu'].astype(str)+'-'+df['dd_nu'].astype(str)) # Let use this as our index since we are using pandas df.index = pd.DatetimeIndex(df['dti']) # Clean the dataframe a bit df = df.drop(['dd_nu','year_nu','month_nu','dti'], axis=1) def test_stationarity(timeseries, window, figsize=(10,7)): #Determing rolling statistics rolmean = timeseries.rolling(window=window).mean() rolstd = timeseries.rolling(window=window).std() #Plot rolling statistics: fig, ax = plt.subplots(figsize=figsize, dpi=100) orig = plt.plot(timeseries, color='blue',label='Original',linewidth=2) mean = plt.plot(rolmean, color='red', label='Rolling Mean',linewidth=2) std = plt.plot(rolstd, color='black', label = 'Rolling Std',linewidth=2) plt.legend(loc='best', prop={'size': 8}) plt.title('Rolling Mean & Standard Deviation') plt.show(block=False) #Perform Dickey-Fuller test: print('Results of Dickey-Fuller Test:') dftest = adfuller(timeseries, autolag='AIC') dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) for key,value in dftest[4].items(): dfoutput['Critical Value (%s)'%key] = value print(dfoutput) test_stationarity(timeseries=df['mean_va'], window=12, figsize=(10,5)) #ACF and PACF plots: from statsmodels.tsa.stattools import acf, pacf acf_ = acf(df['mean_va'], nlags=60) pacf_ = pacf(df['mean_va'], nlags=60, method='ols') #Plot ACF: plt.figure(figsize=(12,4), dpi=80) plt.subplot(121) plt.plot(np.arange(len(acf_)), acf_,linewidth=2) plt.xticks(np.arange(len(acf_)), fontsize=5) plt.axhline(y=0,linestyle='--',linewidth=2,color='gray') plt.axhline(y=-1.96/np.sqrt(len(df['mean_va'])),linestyle='--',linewidth=2,color='gray') plt.axhline(y=1.96/np.sqrt(len(df['mean_va'])),linestyle='--',linewidth=2,color='gray') plt.title('Autocorrelation Function', fontsize=20) #Plot PACF: plt.subplot(122) plt.plot(np.arange(len(pacf_)), pacf_,linewidth=2) plt.xticks(np.arange(len(pacf_)), fontsize=5) plt.axhline(y=0,linestyle='--',linewidth=2,color='gray') plt.axhline(y=-1.96/np.sqrt(len(df['mean_va'])),linestyle='--',linewidth=2,color='gray') plt.axhline(y=1.96/np.sqrt(len(df['mean_va'])),linestyle='--',linewidth=2,color='gray') plt.title('Partial Autocorrelation Function', fontsize=20) plt.tight_layout() #Combined Model ARIMA Model from statsmodels.tsa.arima_model import ARIMA # fit model model = ARIMA(df['mean_va'], order=(3,1,0)) model_fit = model.fit(disp=0) print(model_fit.summary()) # plot residual errors residuals = pd.DataFrame(model_fit.resid) residuals.plot(linewidth=2) plt.show() residuals.plot(kind='kde',linewidth=3) plt.show() print(residuals.describe()) df['mean_va_fittedvalues'] = model_fit.fittedvalues
No comments:
Post a Comment