Wednesday, August 2, 2017

Python stationarity check using Dickey-Fuller test ACF and PACF plots

Data is available here...
import datetime as datetime  
import pandas as pd  
import numpy as np
import statsmodels.api as sm  
import seaborn as sns  
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller

# Import the sample streamflow dataset
df=pd.read_csv(r'F:\Python\TimeSeries\USGS-Monthly_Streamflow_Bend_OR.txt', sep='\t',
               header=0, low_memory=False, encoding='latin-1')

# The yyyy,mm, and dd are in seperate columns, we need to make this a single column
df['dti'] = pd.to_datetime(df['year_nu'].astype(str)+'-'+df['month_nu'].astype(str)+'-'+df['dd_nu'].astype(str))

# Let use this as our index since we are using pandas
df.index = pd.DatetimeIndex(df['dti'])  

# Clean the dataframe a bit
df = df.drop(['dd_nu','year_nu','month_nu','dti'], axis=1)  

def test_stationarity(timeseries, window, figsize=(10,7)):
    
    #Determing rolling statistics
    rolmean = timeseries.rolling(window=window).mean()
    rolstd = timeseries.rolling(window=window).std()

    #Plot rolling statistics:
    fig, ax = plt.subplots(figsize=figsize, dpi=100)
    orig = plt.plot(timeseries, color='blue',label='Original',linewidth=2)
    mean = plt.plot(rolmean, color='red', label='Rolling Mean',linewidth=2)
    std = plt.plot(rolstd, color='black', label = 'Rolling Std',linewidth=2)
    plt.legend(loc='best', prop={'size': 8})
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)
    
test_stationarity(timeseries=df['mean_va'], window=12, figsize=(10,5))

#ACF and PACF plots:
from statsmodels.tsa.stattools import acf, pacf

acf_ = acf(df['mean_va'], nlags=60)
pacf_ = pacf(df['mean_va'], nlags=60, method='ols')

#Plot ACF: 
plt.figure(figsize=(12,4), dpi=80)
plt.subplot(121) 
plt.plot(np.arange(len(acf_)), acf_,linewidth=2)
plt.xticks(np.arange(len(acf_)), fontsize=5)
plt.axhline(y=0,linestyle='--',linewidth=2,color='gray')
plt.axhline(y=-1.96/np.sqrt(len(df['mean_va'])),linestyle='--',linewidth=2,color='gray')
plt.axhline(y=1.96/np.sqrt(len(df['mean_va'])),linestyle='--',linewidth=2,color='gray')
plt.title('Autocorrelation Function', fontsize=20)

#Plot PACF:
plt.subplot(122)
plt.plot(np.arange(len(pacf_)), pacf_,linewidth=2)
plt.xticks(np.arange(len(pacf_)), fontsize=5)
plt.axhline(y=0,linestyle='--',linewidth=2,color='gray')
plt.axhline(y=-1.96/np.sqrt(len(df['mean_va'])),linestyle='--',linewidth=2,color='gray')
plt.axhline(y=1.96/np.sqrt(len(df['mean_va'])),linestyle='--',linewidth=2,color='gray')
plt.title('Partial Autocorrelation Function', fontsize=20)
plt.tight_layout()

#Combined Model ARIMA Model
from statsmodels.tsa.arima_model import ARIMA

# fit model
model = ARIMA(df['mean_va'], order=(3,1,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())

# plot residual errors
residuals = pd.DataFrame(model_fit.resid)
residuals.plot(linewidth=2)
plt.show()
residuals.plot(kind='kde',linewidth=3)
plt.show()
print(residuals.describe())

df['mean_va_fittedvalues'] = model_fit.fittedvalues



No comments:

Post a Comment