!wget ftp://aftp.cmdl.noaa.gov/data/trace_gases/co2/in-situ/surface/mlo/co2_mlo_surface-insitu_1_ccgg_month.txt
!mv co2_mlo_surface-insitu_1_ccgg_month.txt mlo_co2.txt
import numpy as np
co2 = np.loadtxt("mlo_co2.txt", comments="#", usecols=(3,), unpack=False)[4:] #skiprows=4,
yearmonth = np.loadtxt("mlo_co2.txt", comments="#", usecols=(1,2), dtype=int, unpack=True)[:,4:] #skiprows=4,
print co2.shape, yearmonth.shape
(464,) (2, 464)
import datetime as DT
#assume all days are the first day of month
days = np.ones_like(co2, dtype=int)
#year,doy = datetime.datetime(data[4:,0], data[4:,1], days).strftime('%Y %j').split()
#months = data[4:,1]
dates = []
for i in np.arange(len(days)):
dates.append(DT.datetime(yearmonth[0][i], yearmonth[1][i], days[i]))
print "Mauna Loa CO2 serie starts from %s, and ends at %s"%(dates[0], dates[-1])
Mauna Loa CO2 serie starts from 1974-05-01 00:00:00, and ends at 2012-12-01 00:00:00
plt.figure(figsize=(20,5)) #make figure wider
plt.plot(dates,co2)
[<matplotlib.lines.Line2D at 0x53b7730>]
import scipy.stats as stats
index = np.arange(len(co2))
co2_lin = stats.linregress(index, co2)
co2_predict = co2_lin[0] * index + co2_lin[1]
#figure(figsize=(20,5))
plot(dates, co2)
plot(dates, co2_predict)
title('Mauna Loa CO2 Concentation')
xlabel('Year')
ylabel('CO2 (ppm)')
figtext(0.15, 0.8, 'r = '+str(co2_lin[2]), size='x-large')
figtext(0.15, 0.75, 'intercept = '+str(co2_lin[1]), size='large')
figtext(0.15, 0.7, 'slope = '+str(co2_lin[0]), size='large')
<matplotlib.text.Text at 0x5a41250>
#calculate residuals
co2_lin_redis = co2 - co2_predict
figure(figsize=(20,5))
plot(dates, co2_lin_redis)
[<matplotlib.lines.Line2D at 0x5a157f0>]
import statsmodels.api as sm
tsa = sm.tsa
import statsmodels.formula.api as smf
index_const = sm.add_constant(index)
co2_model = smf.GLM(co2, index_const).fit()
print co2_model.summary()
co2_pr = co2_model.predict()
#plot(dates, co2)
#plot(dates, co2_pr)
Generalized Linear Model Regression Results ============================================================================== Dep. Variable: y No. Observations: 464 Model: GLM Df Residuals: 462 Model Family: Gaussian Df Model: 1 Link Function: identity Scale: 6.35942699131 Method: IRLS Log-Likelihood: -1086.6 Date: Thu, 12 Dec 2013 Deviance: 2938.1 Time: 21:25:06 Pearson chi2: 2.94e+03 No. Iterations: 3 ============================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------ const 327.6770 0.234 1401.740 0.000 327.219 328.135 x1 0.1384 0.001 158.347 0.000 0.137 0.140 ==============================================================================
import statsmodels.api as sm
tsa = sm.tsa
help(tsa.ARMA)
#Testing ARMA(1,1) for Original Series
arma_mod = tsa.ARMA(co2, order=(1,1))
arma_res = arma_mod.fit(trend='c', disp=-1)
arma_pred = arma_res.predict()
print arma_res.params
print "AIC = %f, BIC = %f"%(arma_res.aic,arma_res.bic)
plt.figure(figsize=(20,5))
plot(dates,arma_pred)
plot(dates,co2)
[ 362.86827751 0.99825572 0.69159428] AIC = 1240.736430, BIC = 1257.295968
[<matplotlib.lines.Line2D at 0x543e290>]
from statsmodels.tsa.arima_model import ARIMA
def Plot_ARIMA(p,d,q):
arima = ARIMA(co2, [p, d, q], exog=None, dates=dates, freq='M', missing='none')
arima_results = arima.fit(trend='c', disp=False)
co2_ARIMA = arima_results.predict(exog=None, dynamic=False)
if d>0:
co2_d = co2[d:]-co2[:len(co2)-d]
else:
co2_d = co2
plt.figure(figsize=(20,5))
plt.plot(dates[d:],co2_d)
plt.plot(dates[d:],co2_ARIMA)
return arima_results.aic
aic_001 = Plot_ARIMA(0,0,1) #MA(1), OR ARMA(0,1)
aic_100 = Plot_ARIMA(1,0,0) #AR(1), or ARMA(1,0)
aic_101 = Plot_ARIMA(1,0,1) # ARMA(1,1)
aic_001, aic_100, aic_101
(3415.9859841534144, 1540.2555026390642, 1240.7364300383967)
acf, ci, Q, pvalue = tsa.acf(co2, nlags=24, alpha=0.05, qstat=True, unbiased=True)
subplot(211)
title('ACF')
plot(acf,'b^')
pacf, cip = tsa.pacf(co2, nlags=24, alpha=0.05)
subplot(212)
title('PACF')
plot(pacf,'b^')
vlines(np.arange(len(pacf)),[-0.4],pacf,'b')
#tight_layout()
<matplotlib.collections.LineCollection at 0x7e3ae50>
# diff by 1 month
# Differencing by one month forces us to drop the first (or last) 1 value.
df1_co2 = (co2 - np.roll(co2,1))[1:]
plt.figure(figsize=(20,5))
plt.plot(dates[1:],df1_co2)
#df1_log_co2 = (log_co2 - np.roll(log_co2,1))[1:]
#plt.plot(df1_log_co2)
[<matplotlib.lines.Line2D at 0x7ce5990>]
acf, ci, Q, pvalue = tsa.acf(df1_co2, nlags=24, alpha=0.05, qstat=True, unbiased=True)
subplot(211)
plot(acf,'b^')
pacf, cip = tsa.pacf(df1_co2, nlags=24, alpha=0.05)
subplot(212)
plot(pacf,'b^')
vlines(np.arange(len(pacf)),[-0.6],pacf,'b')
<matplotlib.collections.LineCollection at 0x80ceff0>
# Differencing by the 12 months forces us to drop the first 12 values.
df12_co2 = (co2 - np.roll(co2,12))[12:]
df12_dates = dates[12:]
p = 1
d = 0
q = 1
arima = ARIMA(df12_co2, [p, d, q], exog=None, dates=df12_dates, freq='M', missing='none')
df12_arima_results = arima.fit(trend='c', disp=False)
predicted_df12_arima = df12_arima_results.predict(exog=None, dynamic=False)
predicted_df12_arima_co2 = np.roll(co2,12)[12+d:] + np.roll(df12_co2,d)[d:] + predicted_df12_arima
plt.figure(figsize=(20,5))
plt.plot(df12_dates,df12_co2)
plt.plot(df12_dates,predicted_df12_arima) # predicted
[<matplotlib.lines.Line2D at 0x5a8f130>]
df12_arima_results.summary(alpha=0.05)
Dep. Variable: | y | No. Observations: | 452 |
---|---|---|---|
Model: | ARMA(1, 1) | Log Likelihood | -213.130 |
Method: | css-mle | S.D. of innovations | 0.387 |
Date: | Thu, 12 Dec 2013 | AIC | 434.260 |
Time: | 21:04:20 | BIC | 450.715 |
Sample: | 05-01-1975 | HQIC | 440.745 |
- 12-01-2012 |
coef | std err | z | P>|z| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
const | 1.6828 | 0.118 | 14.273 | 0.000 | 1.452 1.914 |
ar.L1.y | 0.8990 | 0.027 | 33.706 | 0.000 | 0.847 0.951 |
ma.L1.y | -0.3345 | 0.061 | -5.481 | 0.000 | -0.454 -0.215 |
Real | Imaginary | Modulus | Frequency | |
---|---|---|---|---|
AR.1 | 1.1123 +0.0000j 1.1123 0.0000||||
MA.1 | 2.9900 +0.0000j 2.9900 0.0000
# International airline passengers: monthly totals in thousands.
# from Jan 1949 to Dec 1960
air = [112,118,132,129,121,135,148,148,136,119,104,118,\
115,126,141,135,125,149,170,170,158,133,114,140,\
145,150,178,163,172,178,199,199,184,162,146,166,\
171,180,193,181,183,218,230,242,209,191,172,194,\
196,196,236,235,229,243,264,272,237,211,180,201,\
204,188,235,227,234,264,302,293,259,229,203,229,\
242,233,267,269,270,315,364,347,312,274,237,278,\
284,277,317,313,318,374,413,405,355,306,271,306,\
315,301,356,348,355,422,465,467,404,347,305,336,\
340,318,362,348,363,435,491,505,404,359,310,337,\
360,342,406,396,420,472,548,559,463,407,362,405,\
417,391,419,461,472,535,622,606,508,461,390,432]
plot(air)
[<matplotlib.lines.Line2D at 0x8b05cb0>]
#log transformation
log_air = np.log(air)
subplot(211)
plot(log_air)
#1st order differencing
df1_log_air = log_air[1:] -log_air[:-1]
subplot(212)
plot(df1_log_air)
[<matplotlib.lines.Line2D at 0x8cb5790>]
def plot_acf_pacf(data):
acf, ci, Q, pvalue = tsa.acf(data, nlags=24, alpha=0.05, qstat=True, unbiased=True)
subplot(211)
plot(acf,'b^')
pacf, cip = tsa.pacf(data, nlags=24, alpha=0.05)
subplot(212)
plot(pacf,'b^')
vlines(np.arange(len(pacf)),[-0.6],pacf,'b')
plot_acf_pacf(df1_log_air)
# Differencing by the 12 months forces us to drop the first 12 values.
df12_air = (df1_log_air - np.roll(df1_log_air,12))[12:]
p = 1
d = 0
q = 1
arima = ARIMA(df12_air, [p, d, q], exog=None, missing='none')
df12_arima_results = arima.fit(trend='c', disp=False)
predicted_df12_arima = df12_arima_results.predict(exog=None, dynamic=False)
predicted_df12_arima_air = np.roll(df12_air,d)[d:] + predicted_df12_arima
plt.figure(figsize=(20,5))
plt.plot(df12_air)
plt.plot(predicted_df12_arima_air) # predicted
df12_arima_results.summary()
Dep. Variable: | y | No. Observations: | 131 |
---|---|---|---|
Model: | ARMA(1, 1) | Log Likelihood | 227.133 |
Method: | css-mle | S.D. of innovations | 0.043 |
Date: | Thu, 12 Dec 2013 | AIC | -446.266 |
Time: | 21:17:33 | BIC | -434.765 |
Sample: | 0 | HQIC | -441.593 |
coef | std err | z | P>|z| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
const | 0.0003 | 0.002 | 0.124 | 0.901 | -0.004 0.004 |
ar.L1.y | 0.1448 | 0.245 | 0.590 | 0.556 | -0.336 0.626 |
ma.L1.y | -0.5190 | 0.218 | -2.382 | 0.019 | -0.946 -0.092 |
Real | Imaginary | Modulus | Frequency | |
---|---|---|---|---|
AR.1 | 6.9063 +0.0000j 6.9063 0.0000||||
MA.1 | 1.9268 +0.0000j 1.9268 0.0000
import numpy as np
import pylab as plt
beer = np.genfromtxt("beer.txt",delimiter="\n")
year = np.arange(1,beer.shape[-1]+1)
#plt.plot(year,beer)
# some examplary code to calculate the 12-month moving average
months = np.arange(12)
beer_12mon_avg = np.zeros_like(beer)[:-len(months)]
for i in months:
beer_12mon_avg += beer[i:(i-len(months))]
#print beer_ts[2:-10]
beer_12mon_avg /= len(months)
#plot(beer_12mon_avg)
beer_detrend_additive = beer[:-len(months)] - beer_12mon_avg
plot(beer_detrend_additive)
beer_detrend_multi = beer[:-len(months)] / beer_12mon_avg
plot(beer_detrend_multi)
run Wavelets.py
C:\Python27\lib\site-packages\matplotlib\image.py:349: UserWarning: Images are not supported on non-linear axes. warnings.warn("Images are not supported on non-linear axes.")