!wget ftp://aftp.cmdl.noaa.gov/data/trace_gases/co2/in-situ/surface/mlo/co2_mlo_surface-insitu_1_ccgg_month.txt
!mv co2_mlo_surface-insitu_1_ccgg_month.txt mlo_co2.txt 

import numpy as np
co2 = np.loadtxt("mlo_co2.txt", comments="#", usecols=(3,), unpack=False)[4:] #skiprows=4, 
yearmonth = np.loadtxt("mlo_co2.txt", comments="#", usecols=(1,2), dtype=int, unpack=True)[:,4:]  #skiprows=4, 

print co2.shape, yearmonth.shape

import datetime as DT
#assume all days are the first day of month
days = np.ones_like(co2, dtype=int)
#year,doy = datetime.datetime(data[4:,0], data[4:,1], days).strftime('%Y %j').split()
#months = data[4:,1]

dates = []

for i in np.arange(len(days)):
    dates.append(DT.datetime(yearmonth[0][i], yearmonth[1][i], days[i]))
    
print "Mauna Loa CO2 serie starts from %s, and ends at %s"%(dates[0], dates[-1])

plt.figure(figsize=(20,5)) #make figure wider
plt.plot(dates,co2)
Alternatively, we can use relativedelta to generatoe a series of dates.
Sample code:
    firstday = 
    numdays = 
    dateList = [ firstday + relativedelta(days=x) for x in range(0,numdays) ]
import scipy.stats as stats

index = np.arange(len(co2))

co2_lin = stats.linregress(index, co2)
co2_predict = co2_lin[0] * index + co2_lin[1]

#figure(figsize=(20,5))
plot(dates, co2)
plot(dates, co2_predict)
title('Mauna Loa CO2 Concentation')
xlabel('Year')
ylabel('CO2 (ppm)')
figtext(0.15, 0.8, 'r = '+str(co2_lin[2]), size='x-large')
figtext(0.15, 0.75, 'intercept = '+str(co2_lin[1]), size='large')
figtext(0.15, 0.7, 'slope = '+str(co2_lin[0]), size='large')

#calculate residuals
co2_lin_redis = co2 - co2_predict
figure(figsize=(20,5))
plot(dates, co2_lin_redis)

import statsmodels.api as sm
tsa = sm.tsa
import statsmodels.formula.api as smf
index_const = sm.add_constant(index)
co2_model = smf.GLM(co2, index_const).fit()
print co2_model.summary()
co2_pr = co2_model.predict()
#plot(dates, co2)
#plot(dates, co2_pr)

import statsmodels.api as sm
tsa = sm.tsa
help(tsa.ARMA)

#Testing ARMA(1,1) for Original Series

arma_mod = tsa.ARMA(co2, order=(1,1))
arma_res = arma_mod.fit(trend='c', disp=-1)
arma_pred = arma_res.predict()

print arma_res.params
print "AIC = %f, BIC = %f"%(arma_res.aic,arma_res.bic)

plt.figure(figsize=(20,5))
plot(dates,arma_pred)
plot(dates,co2)

from statsmodels.tsa.arima_model import ARIMA

def Plot_ARIMA(p,d,q):
    arima = ARIMA(co2, [p, d, q], exog=None, dates=dates, freq='M', missing='none')
    arima_results = arima.fit(trend='c', disp=False)
    co2_ARIMA = arima_results.predict(exog=None, dynamic=False)
    
    if d>0:
        co2_d = co2[d:]-co2[:len(co2)-d]
    else:
        co2_d = co2

    plt.figure(figsize=(20,5))
    plt.plot(dates[d:],co2_d)
    plt.plot(dates[d:],co2_ARIMA)
    return arima_results.aic

aic_001 = Plot_ARIMA(0,0,1)   #MA(1), OR ARMA(0,1)

aic_100 = Plot_ARIMA(1,0,0)   #AR(1), or ARMA(1,0)

aic_101 = Plot_ARIMA(1,0,1)   # ARMA(1,1)

aic_001, aic_100, aic_101

acf, ci, Q, pvalue = tsa.acf(co2, nlags=24, alpha=0.05, qstat=True, unbiased=True)
subplot(211)
title('ACF')
plot(acf,'b^')

pacf, cip = tsa.pacf(co2, nlags=24, alpha=0.05)
subplot(212)
title('PACF')
plot(pacf,'b^')
vlines(np.arange(len(pacf)),[-0.4],pacf,'b')
#tight_layout()

# diff by 1 month
# Differencing by one month forces us to drop the first (or last) 1 value. 
df1_co2 = (co2 - np.roll(co2,1))[1:]
plt.figure(figsize=(20,5))
plt.plot(dates[1:],df1_co2)
#df1_log_co2 = (log_co2 - np.roll(log_co2,1))[1:]
#plt.plot(df1_log_co2)

acf, ci, Q, pvalue = tsa.acf(df1_co2, nlags=24, alpha=0.05, qstat=True, unbiased=True)
subplot(211)
plot(acf,'b^')

pacf, cip = tsa.pacf(df1_co2, nlags=24, alpha=0.05)
subplot(212)
plot(pacf,'b^')
vlines(np.arange(len(pacf)),[-0.6],pacf,'b')

# Differencing by the 12 months forces us to drop the first 12 values. 
df12_co2 = (co2 - np.roll(co2,12))[12:]
df12_dates = dates[12:]

p = 1
d = 0
q = 1

arima = ARIMA(df12_co2, [p, d, q], exog=None, dates=df12_dates, freq='M', missing='none')
df12_arima_results = arima.fit(trend='c', disp=False)
predicted_df12_arima = df12_arima_results.predict(exog=None, dynamic=False)

predicted_df12_arima_co2 = np.roll(co2,12)[12+d:] + np.roll(df12_co2,d)[d:] + predicted_df12_arima

plt.figure(figsize=(20,5))
plt.plot(df12_dates,df12_co2) 
plt.plot(df12_dates,predicted_df12_arima) # predicted 

df12_arima_results.summary(alpha=0.05)

# International airline passengers: monthly totals in thousands. 
# from Jan 1949 to Dec 1960
air = [112,118,132,129,121,135,148,148,136,119,104,118,\
       115,126,141,135,125,149,170,170,158,133,114,140,\
    145,150,178,163,172,178,199,199,184,162,146,166,\
    171,180,193,181,183,218,230,242,209,191,172,194,\
    196,196,236,235,229,243,264,272,237,211,180,201,\
    204,188,235,227,234,264,302,293,259,229,203,229,\
    242,233,267,269,270,315,364,347,312,274,237,278,\
    284,277,317,313,318,374,413,405,355,306,271,306,\
    315,301,356,348,355,422,465,467,404,347,305,336,\
    340,318,362,348,363,435,491,505,404,359,310,337,\
    360,342,406,396,420,472,548,559,463,407,362,405,\
    417,391,419,461,472,535,622,606,508,461,390,432]

plot(air)

#log transformation
log_air = np.log(air)
subplot(211)
plot(log_air)
     
#1st order differencing 
df1_log_air = log_air[1:] -log_air[:-1]
subplot(212)
plot(df1_log_air)

def plot_acf_pacf(data):
    acf, ci, Q, pvalue = tsa.acf(data, nlags=24, alpha=0.05, qstat=True, unbiased=True)
    subplot(211)
    plot(acf,'b^')
    
    pacf, cip = tsa.pacf(data, nlags=24, alpha=0.05)
    subplot(212)
    plot(pacf,'b^')
    vlines(np.arange(len(pacf)),[-0.6],pacf,'b')

plot_acf_pacf(df1_log_air)

# Differencing by the 12 months forces us to drop the first 12 values. 
df12_air = (df1_log_air - np.roll(df1_log_air,12))[12:]

p = 1
d = 0
q = 1

arima = ARIMA(df12_air, [p, d, q], exog=None, missing='none')
df12_arima_results = arima.fit(trend='c', disp=False)
predicted_df12_arima = df12_arima_results.predict(exog=None, dynamic=False)

predicted_df12_arima_air = np.roll(df12_air,d)[d:] + predicted_df12_arima

plt.figure(figsize=(20,5))
plt.plot(df12_air) 
plt.plot(predicted_df12_arima_air) # predicted 

df12_arima_results.summary()

import numpy as np
import pylab as plt

beer = np.genfromtxt("beer.txt",delimiter="\n")
year = np.arange(1,beer.shape[-1]+1)

#plt.plot(year,beer)

# some examplary code to calculate the 12-month moving average
months = np.arange(12) 
beer_12mon_avg = np.zeros_like(beer)[:-len(months)]
for i in months:
    beer_12mon_avg += beer[i:(i-len(months))]
    #print beer_ts[2:-10]
beer_12mon_avg /= len(months)
#plot(beer_12mon_avg)

beer_detrend_additive = beer[:-len(months)] - beer_12mon_avg
plot(beer_detrend_additive)

beer_detrend_multi = beer[:-len(months)] / beer_12mon_avg
plot(beer_detrend_multi)

run Wavelets.py