!wget ftp://aftp.cmdl.noaa.gov/data/trace_gases/co2/in-situ/surface/mlo/co2_mlo_surface-insitu_1_ccgg_month.txt !mv co2_mlo_surface-insitu_1_ccgg_month.txt mlo_co2.txt import numpy as np co2 = np.loadtxt("mlo_co2.txt", comments="#", usecols=(3,), unpack=False)[4:] #skiprows=4, yearmonth = np.loadtxt("mlo_co2.txt", comments="#", usecols=(1,2), dtype=int, unpack=True)[:,4:] #skiprows=4, print co2.shape, yearmonth.shape import datetime as DT #assume all days are the first day of month days = np.ones_like(co2, dtype=int) #year,doy = datetime.datetime(data[4:,0], data[4:,1], days).strftime('%Y %j').split() #months = data[4:,1] dates = [] for i in np.arange(len(days)): dates.append(DT.datetime(yearmonth[0][i], yearmonth[1][i], days[i])) print "Mauna Loa CO2 serie starts from %s, and ends at %s"%(dates[0], dates[-1]) plt.figure(figsize=(20,5)) #make figure wider plt.plot(dates,co2) Alternatively, we can use relativedelta to generatoe a series of dates. Sample code: firstday = numdays = dateList = [ firstday + relativedelta(days=x) for x in range(0,numdays) ] import scipy.stats as stats index = np.arange(len(co2)) co2_lin = stats.linregress(index, co2) co2_predict = co2_lin[0] * index + co2_lin[1] #figure(figsize=(20,5)) plot(dates, co2) plot(dates, co2_predict) title('Mauna Loa CO2 Concentation') xlabel('Year') ylabel('CO2 (ppm)') figtext(0.15, 0.8, 'r = '+str(co2_lin[2]), size='x-large') figtext(0.15, 0.75, 'intercept = '+str(co2_lin[1]), size='large') figtext(0.15, 0.7, 'slope = '+str(co2_lin[0]), size='large') #calculate residuals co2_lin_redis = co2 - co2_predict figure(figsize=(20,5)) plot(dates, co2_lin_redis) import statsmodels.api as sm tsa = sm.tsa import statsmodels.formula.api as smf index_const = sm.add_constant(index) co2_model = smf.GLM(co2, index_const).fit() print co2_model.summary() co2_pr = co2_model.predict() #plot(dates, co2) #plot(dates, co2_pr) import statsmodels.api as sm tsa = sm.tsa help(tsa.ARMA) #Testing ARMA(1,1) for Original Series arma_mod = tsa.ARMA(co2, order=(1,1)) arma_res = arma_mod.fit(trend='c', disp=-1) arma_pred = arma_res.predict() print arma_res.params print "AIC = %f, BIC = %f"%(arma_res.aic,arma_res.bic) plt.figure(figsize=(20,5)) plot(dates,arma_pred) plot(dates,co2) from statsmodels.tsa.arima_model import ARIMA def Plot_ARIMA(p,d,q): arima = ARIMA(co2, [p, d, q], exog=None, dates=dates, freq='M', missing='none') arima_results = arima.fit(trend='c', disp=False) co2_ARIMA = arima_results.predict(exog=None, dynamic=False) if d>0: co2_d = co2[d:]-co2[:len(co2)-d] else: co2_d = co2 plt.figure(figsize=(20,5)) plt.plot(dates[d:],co2_d) plt.plot(dates[d:],co2_ARIMA) return arima_results.aic aic_001 = Plot_ARIMA(0,0,1) #MA(1), OR ARMA(0,1) aic_100 = Plot_ARIMA(1,0,0) #AR(1), or ARMA(1,0) aic_101 = Plot_ARIMA(1,0,1) # ARMA(1,1) aic_001, aic_100, aic_101 acf, ci, Q, pvalue = tsa.acf(co2, nlags=24, alpha=0.05, qstat=True, unbiased=True) subplot(211) title('ACF') plot(acf,'b^') pacf, cip = tsa.pacf(co2, nlags=24, alpha=0.05) subplot(212) title('PACF') plot(pacf,'b^') vlines(np.arange(len(pacf)),[-0.4],pacf,'b') #tight_layout() # diff by 1 month # Differencing by one month forces us to drop the first (or last) 1 value. df1_co2 = (co2 - np.roll(co2,1))[1:] plt.figure(figsize=(20,5)) plt.plot(dates[1:],df1_co2) #df1_log_co2 = (log_co2 - np.roll(log_co2,1))[1:] #plt.plot(df1_log_co2) acf, ci, Q, pvalue = tsa.acf(df1_co2, nlags=24, alpha=0.05, qstat=True, unbiased=True) subplot(211) plot(acf,'b^') pacf, cip = tsa.pacf(df1_co2, nlags=24, alpha=0.05) subplot(212) plot(pacf,'b^') vlines(np.arange(len(pacf)),[-0.6],pacf,'b') # Differencing by the 12 months forces us to drop the first 12 values. df12_co2 = (co2 - np.roll(co2,12))[12:] df12_dates = dates[12:] p = 1 d = 0 q = 1 arima = ARIMA(df12_co2, [p, d, q], exog=None, dates=df12_dates, freq='M', missing='none') df12_arima_results = arima.fit(trend='c', disp=False) predicted_df12_arima = df12_arima_results.predict(exog=None, dynamic=False) predicted_df12_arima_co2 = np.roll(co2,12)[12+d:] + np.roll(df12_co2,d)[d:] + predicted_df12_arima plt.figure(figsize=(20,5)) plt.plot(df12_dates,df12_co2) plt.plot(df12_dates,predicted_df12_arima) # predicted df12_arima_results.summary(alpha=0.05) # International airline passengers: monthly totals in thousands. # from Jan 1949 to Dec 1960 air = [112,118,132,129,121,135,148,148,136,119,104,118,\ 115,126,141,135,125,149,170,170,158,133,114,140,\ 145,150,178,163,172,178,199,199,184,162,146,166,\ 171,180,193,181,183,218,230,242,209,191,172,194,\ 196,196,236,235,229,243,264,272,237,211,180,201,\ 204,188,235,227,234,264,302,293,259,229,203,229,\ 242,233,267,269,270,315,364,347,312,274,237,278,\ 284,277,317,313,318,374,413,405,355,306,271,306,\ 315,301,356,348,355,422,465,467,404,347,305,336,\ 340,318,362,348,363,435,491,505,404,359,310,337,\ 360,342,406,396,420,472,548,559,463,407,362,405,\ 417,391,419,461,472,535,622,606,508,461,390,432] plot(air) #log transformation log_air = np.log(air) subplot(211) plot(log_air) #1st order differencing df1_log_air = log_air[1:] -log_air[:-1] subplot(212) plot(df1_log_air) def plot_acf_pacf(data): acf, ci, Q, pvalue = tsa.acf(data, nlags=24, alpha=0.05, qstat=True, unbiased=True) subplot(211) plot(acf,'b^') pacf, cip = tsa.pacf(data, nlags=24, alpha=0.05) subplot(212) plot(pacf,'b^') vlines(np.arange(len(pacf)),[-0.6],pacf,'b') plot_acf_pacf(df1_log_air) # Differencing by the 12 months forces us to drop the first 12 values. df12_air = (df1_log_air - np.roll(df1_log_air,12))[12:] p = 1 d = 0 q = 1 arima = ARIMA(df12_air, [p, d, q], exog=None, missing='none') df12_arima_results = arima.fit(trend='c', disp=False) predicted_df12_arima = df12_arima_results.predict(exog=None, dynamic=False) predicted_df12_arima_air = np.roll(df12_air,d)[d:] + predicted_df12_arima plt.figure(figsize=(20,5)) plt.plot(df12_air) plt.plot(predicted_df12_arima_air) # predicted df12_arima_results.summary() import numpy as np import pylab as plt beer = np.genfromtxt("beer.txt",delimiter="\n") year = np.arange(1,beer.shape[-1]+1) #plt.plot(year,beer) # some examplary code to calculate the 12-month moving average months = np.arange(12) beer_12mon_avg = np.zeros_like(beer)[:-len(months)] for i in months: beer_12mon_avg += beer[i:(i-len(months))] #print beer_ts[2:-10] beer_12mon_avg /= len(months) #plot(beer_12mon_avg) beer_detrend_additive = beer[:-len(months)] - beer_12mon_avg plot(beer_detrend_additive) beer_detrend_multi = beer[:-len(months)] / beer_12mon_avg plot(beer_detrend_multi) run Wavelets.py