# We need a few libraries
import io
import numpy as np
import pandas
import requests
import matplotlib.pyplot as plt
import matplotlib.style
matplotlib.style.use('ggplot')
%matplotlib inline
Let's start with downloading the data from the different tidal guages. We use the 6 main Dutch stations.
stations = [
['Vlissingen', 20, lambda x: x - (6976-46)],
['Hoek van Holland', 22, lambda x:x - (6994 - 121)],
['Den Helder', 23, lambda x: x - (6988-42)],
['Delfzijl', 24, lambda x: x - (6978-155)],
['Harlingen', 25, lambda x: x - (7036-122)],
['IJmuiden', 32, lambda x: x - (7033-83)],
]
# you could use the monthly data
monthly_url = 'http://www.psmsl.org/data/obtaining/rlr.monthly.data/%d.rlrdata'
# and you can use the annual dat
annual_url = 'http://www.psmsl.org/data/obtaining/rlr.annual.data/%d.rlrdata'
# Let's download them both
monthly_dfs = []
annual_dfs = []
for station, id, to_local in stations:
f = io.BytesIO(requests.get(monthly_url % (id,)).content)
df = pandas.read_csv(f, sep=';', names=('year', 'waterlevel', 'code', 'another code'))
# add a column with the station name
df['station'] = station
# convert to local coordinate system
df['nap'] = df['waterlevel'].apply(to_local)
# ignore the part before 1890
df = df[df.year >= 1890]
monthly_dfs.append(df)
f = io.BytesIO(requests.get(annual_url % (id,)).content)
df = pandas.read_csv(f, sep=';', names=('year', 'waterlevel', 'code', 'another code'))
df['station'] = station
df['nap'] = df['waterlevel'].apply(to_local)
df = df[df.year >= 1890]
annual_dfs.append(df)
Now we have downloaded the stations, we can create a new station with the mean. You can use many other techniques, but the mean works just fine here.
# compute the monthly mean
monthly_df = pandas.concat(monthly_dfs)
mean = monthly_df.groupby('year').mean().reset_index()
mean['station'] = 'mean'
monthly_df = pandas.concat([monthly_df, mean[['year', 'waterlevel', 'nap', 'station']]])
monthly_df = monthly_df.reset_index()
# add the year number
monthly_df['year_floor'] = np.floor(monthly_df['year'])
annual_df = pandas.concat(annual_dfs)
mean = annual_df.groupby('year').mean().reset_index()
mean['station'] = 'mean'
annual_df = pandas.concat([annual_df, mean[['year', 'waterlevel', 'nap', 'station']]])
annual_df = annual_df.reset_index()
# save the files to json
# add the annual mean also to the monthly means
monthly_df = monthly_df.merge(annual_df[['year', 'nap']], left_on='year_floor', right_on='year', suffixes=['', '_annual'])
monthly_df[['year', 'nap', 'nap_annual', 'station']].to_json('monthly.json', orient='records')
annual_df[['year', 'nap', 'station']].to_json('annual.json', orient='records')
df = annual_df
# in memory file
grouped = df.groupby(['station'])
fig, ax = plt.subplots(figsize=(10,7))
for station, df in grouped:
annual_jitter = np.random.uniform(-0.5, 0.5, size=len(df))
monthly_jitter = np.random.uniform(-0.5*1/12.0, 0.5*1/12.0, size=len(df))
if station == 'mean':
pch = '-'
alpha = 1.0
ax.plot(df['year'] + annual_jitter, df['nap']/10 , pch, alpha=alpha, label=station)
else:
pch = '.'
alpha = 0.5
ax.plot(df['year'] + annual_jitter, df['nap']/10 , pch, alpha=alpha, label=station)
ax.set_ylabel('waterlevel [cm NAP]')
ax.set_xlabel('year')
legend = ax.legend(loc='best')
legend.legendPatch.set_alpha(0.5)
legend.legendPatch.set_facecolor('white')
fig.savefig('stations.png')
import statsmodels.api as sm
df = grouped.get_group('mean')
y = df['waterlevel']
X = df['year']
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
from IPython.display import display
display(results.summary())
Dep. Variable: | waterlevel | R-squared: | 0.828 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.827 |
Method: | Least Squares | F-statistic: | 589.0 |
Date: | Sun, 16 Nov 2014 | Prob (F-statistic): | 1.60e-48 |
Time: | 23:49:03 | Log-Likelihood: | -598.81 |
No. Observations: | 124 | AIC: | 1202. |
Df Residuals: | 122 | BIC: | 1207. |
Df Model: | 1 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
const | 3219.4017 | 149.443 | 21.543 | 0.000 | 2923.564 3515.239 |
year | 1.8581 | 0.077 | 24.269 | 0.000 | 1.707 2.010 |
Omnibus: | 3.101 | Durbin-Watson: | 1.544 |
---|---|---|---|
Prob(Omnibus): | 0.212 | Jarque-Bera (JB): | 2.704 |
Skew: | -0.357 | Prob(JB): | 0.259 |
Kurtosis: | 3.120 | Cond. No. | 1.06e+05 |
def fit(startyear):
df_selected = df[df['year'] > startyear]
y = df_selected['waterlevel']
X = df_selected['year']
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
display(results.summary())
from IPython.html.widgets import interactive
interactive(fit, startyear=(1890, 1980, 10))
Dep. Variable: | waterlevel | R-squared: | 0.681 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.677 |
Method: | Least Squares | F-statistic: | 172.6 |
Date: | Sun, 16 Nov 2014 | Prob (F-statistic): | 8.98e-22 |
Time: | 23:49:06 | Log-Likelihood: | -403.92 |
No. Observations: | 83 | AIC: | 811.8 |
Df Residuals: | 81 | BIC: | 816.7 |
Df Model: | 1 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
const | 3107.2138 | 287.403 | 10.811 | 0.000 | 2535.372 3679.056 |
year | 1.9146 | 0.146 | 13.138 | 0.000 | 1.625 2.205 |
Omnibus: | 3.114 | Durbin-Watson: | 1.547 |
---|---|---|---|
Prob(Omnibus): | 0.211 | Jarque-Bera (JB): | 2.643 |
Skew: | -0.434 | Prob(JB): | 0.267 |
Kurtosis: | 3.101 | Cond. No. | 1.62e+05 |
def plot(startyear):
df_selected = df[df['year'] > startyear]
y = df_selected['waterlevel']
X = df_selected['year']
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
fig, ax = plt.subplots()
ax.plot(df['year'], df['waterlevel'], '.', alpha=0.1)
ax.plot(df_selected['year'], results.fittedvalues)
ax.set_ylim(6400,7300)
interactive(plot, startyear=(1860, 1980, 10))
from statsmodels.sandbox.regression.predstd import wls_prediction_std
def plot(startyear):
df_selected = df[df['year'] > startyear]
y = df_selected['waterlevel']
X = df_selected['year']
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
fig, ax = plt.subplots()
ax.plot(df['year'], df['waterlevel'], '.', alpha=0.1)
ax.plot(df_selected['year'], results.fittedvalues)
fit, lower, upper = wls_prediction_std(results)
plt.fill_between(df_selected['year'], lower, upper, alpha=0.3)
ax.set_ylim(6400,7300)
import statsmodels.sandbox.regression.predstd
interactive(plot, startyear=(1860, 1980, 10))
%load_ext rpy2.ipython
The rpy2.ipython extension is already loaded. To reload it, use: %reload_ext rpy2.ipython
%Rpush df
%R library('ggplot2')
<StrVector - Python:0x114c18998 / R:0x7ff2298824a8> [str, str, str, ..., str, str, str]
def test(startyear):
df_selected = df[df['year']>startyear]
%Rpush df_selected
%R print(ggplot(df, aes(year, waterlevel)) + geom_point(alpha=0.3) + stat_smooth(data=df_selected, method='loess'))
interactive(test, startyear=(1860, 1980, 10))
def plot(startyear):
df_selected = df[(df['year'] >= startyear) & (df['year'] < (startyear + 18.6*2))]
y = df_selected['waterlevel']
X = np.c_[df_selected['year']-1970, (df_selected['year']-1970)**2]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
fig, ax = plt.subplots()
ax.plot(df['year'], df['waterlevel'], '.', alpha=0.3)
ax.plot(df_selected['year'], results.fittedvalues, linewidth=5)
interactive(plot, startyear=(1860, 1980, 5))
def plot(startyear, n_periods):
df_selected = df[(df['year'] >= startyear) & (df['year'] < (startyear + 18.6*n_periods))]
y = df_selected['waterlevel']
X = np.c_[
df_selected['year']-1970,
np.cos(2*np.pi*(df_selected['year']-1970)/18.613),
np.sin(2*np.pi*(df_selected['year']-1970)/18.613)
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
# e^{i\theta} = \cos\theta + i\sin\theta
phase = np.angle(complex(results.params['x2'], results.params['x3']))
ampl = np.sqrt(results.params['x2']**2 + results.params['x3']**2)
# plot it
fig, ax = plt.subplots()
ax.set_title("Phase (rad): %.1f, amplitude: %.1f\nA(cos): %.1f, B(sin): %.1f" % (phase, ampl, results.params['x2'], results.params['x3']))
ax.plot(df['year'], df['waterlevel'], '.', alpha=0.3)
ax.plot(df_selected['year'], results.fittedvalues, linewidth=5)
interactive(plot, startyear=(1860, 1980, 5), n_periods=(2,5))
df = annual_df[annual_df['station']=='mean']
hkv = pandas.read_csv('full_output_station_gemiddeld.csv', sep=';')
df = df.merge(hkv[['year', 'U2sin', 'U2cos']], on='year', suffixes=('', '_hkv'))
df = df.reset_index()
y = df['nap']
X = np.c_[
df['year']-1970,
np.cos(2*np.pi*(df['year']-1970)/18.613),
np.sin(2*np.pi*(df['year']-1970)/18.613),
df['U2cos'],
df['U2sin']
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
linear = results.params['const'] + results.params['x1'] * (df['year']-1970)
nodal = results.params['x2'] * np.cos(2*np.pi*(df['year']-1970)/18.613) + results.params['x3'] * np.sin(2*np.pi*(df['year']-1970)/18.613)
wind = results.params['x4'] * df['U2cos'] + results.params['x5'] * df['U2sin']
df['linear'] = linear
df['nodal'] = nodal
df['wind'] = wind
df['residual'] = results.resid
df['loess'] = sm.nonparametric.lowess(df['residual'], df['year'], frac=60.0/len(df))[:,1]
df['residual_loess'] = df['residual'] - df['loess']
df[['year', 'nap', 'station', 'linear', 'nodal', 'residual', 'wind', 'loess', 'residual_loess']].to_json('fit.json', orient='records')
y = df['nap']
X = np.c_[
np.ones(shape=(len(df),))
]
# compute semipartial correlations
rsquares = []
terms = [df['year']-1970,
np.c_[np.cos(2*np.pi*(df['year']-1970)/18.613), np.sin(2*np.pi*(df['year']-1970)/18.613)],
np.c_[df['U2cos'], df['U2sin']]]
names = ['linear', 'nodal', 'wind']
rsquare = 0
for name, term in zip(names, terms):
X = np.c_[X, term]
model = sm.OLS(y, X)
results = model.fit()
rsquares.append((name, results.rsquared - rsquare))
rsquare = results.rsquared
rsquares.append(('loess', 1- np.var(df['residual_loess'])/np.var(df['nap'])- rsquare))
rsquare = results.rsquared
rsquares.append(('residual', np.var(df['residual_loess'])/np.var(df['nap'])))
import json
json.dump(dict(rsquares), open('rsquares.json', 'w'))
rsquares
[('linear', 0.82769308510876782), ('nodal', 0.019216129938318116), ('wind', 0.043763073036397149), ('loess', 0.0036103727985814515), ('residual', 0.10571733911793549)]
a = results.summary(yname='waterlevel', xname=['constant', 'linear', 'nodal cos', 'nodal sin', 'wind cos', 'wind sin'])
#print(a.as_latex())
a
Dep. Variable: | waterlevel | R-squared: | 0.891 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.886 |
Method: | Least Squares | F-statistic: | 190.6 |
Date: | Sun, 16 Nov 2014 | Prob (F-statistic): | 1.71e-54 |
Time: | 23:49:24 | Log-Likelihood: | -566.06 |
No. Observations: | 123 | AIC: | 1144. |
Df Residuals: | 117 | BIC: | 1161. |
Df Model: | 5 |
coef | std err | t | P>|t| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
constant | -44.6608 | 6.507 | -6.863 | 0.000 | -57.548 -31.774 |
linear | 1.7577 | 0.067 | 26.278 | 0.000 | 1.625 1.890 |
nodal cos | 3.7984 | 3.233 | 1.175 | 0.242 | -2.604 10.201 |
nodal sin | -11.8578 | 3.138 | -3.778 | 0.000 | -18.073 -5.642 |
wind cos | 1.2649 | 0.189 | 6.694 | 0.000 | 0.891 1.639 |
wind sin | -0.4672 | 0.266 | -1.753 | 0.082 | -0.995 0.060 |
Omnibus: | 0.139 | Durbin-Watson: | 1.435 |
---|---|---|---|
Prob(Omnibus): | 0.933 | Jarque-Bera (JB): | 0.311 |
Skew: | 0.013 | Prob(JB): | 0.856 |
Kurtosis: | 2.755 | Cond. No. | 122. |
def plot(degree):
fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
%Rpush fitted
%Rpush degree
%R l = predict(loess(fitted, degree=degree, control=loess.control(surface='direct')), se=TRUE, newdata=data.frame(year=seq(1890,2100)))
l = %Rget l
import pandas.rpy.common as com
l = com.load_data('l')
plt.fill_between(np.arange(1890, 2101), l['fit'] - l['se.fit']*1.96, l['fit'] + l['se.fit']*1.96, alpha=0.5)
plt.plot(np.arange(1890, 2101), l['fit'])
plt.ylim(-200,400)
plt.title('Loess model with degree %d' % degree)
plt.ylabel("sea surface height residual ['mm']")
interactive(plot, degree=(1,2))
# The broken linear model
y = df['nap']
X = np.c_[
df['year']-1970,
np.cos(2*np.pi*(df['year']-1970)/18.613),
np.sin(2*np.pi*(df['year']-1970)/18.613),
df['U2cos'],
df['U2sin'],
(df['year']-1990 > 0)*(df['year']-1990)
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
fit = model.fit()
%Rpush df
%R fit <- lm(nap ~ year + I((year > 1990)*(year-1990)) + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613) + U2cos + U2sin), data=df)
%R years <- seq(1890, 2100)
%R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=rep(mean(df$U2cos), length(years)), U2sin=rep(mean(df$U2sin), length(years))))
pred = %Rget pred
%R print(summary(fit))
Call: lm(formula = nap ~ year + I((year > 1990) * (year - 1990)) + I(cos((2 * pi * year - 1970)/18.613)) + I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin), data = df) Residuals: Min 1Q Median 3Q Max -66.682 -19.736 2.081 19.674 63.102 Coefficients: Estimate Std. Error (Intercept) -3.382e+03 1.867e+02 year 1.688e+00 9.715e-02 I((year > 1990) * (year - 1990)) 8.555e-01 6.372e-01 I(cos((2 * pi * year - 1970)/18.613)) 5.595e+00 3.709e+00 I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin) 7.036e-01 1.751e-01 t value Pr(>|t|) (Intercept) -18.112 < 2e-16 *** year 17.378 < 2e-16 *** I((year > 1990) * (year - 1990)) 1.343 0.181947 I(cos((2 * pi * year - 1970)/18.613)) 1.508 0.134162 I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin) 4.018 0.000104 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 28.58 on 118 degrees of freedom Multiple R-squared: 0.8528, Adjusted R-squared: 0.8478 F-statistic: 170.9 on 4 and 118 DF, p-value: < 2.2e-16
def compute(split):
%Rpush split
%Rpush df
%R fit <- lm(nap ~ year + I((year > split)*(year-split)) + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613) + U2cos + U2sin), data=df)
%R years <- seq(1890, 2100)
%R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=rep(mean(df$U2cos), length(years)), U2sin=rep(mean(df$U2sin), length(years))))
pred = %Rget pred
%R print(summary(fit))
interactive(compute, split=(1980, 2000))
Call: lm(formula = nap ~ year + I((year > split) * (year - split)) + I(cos((2 * pi * year - 1970)/18.613)) + I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin), data = df) Residuals: Min 1Q Median 3Q Max -66.682 -19.736 2.081 19.674 63.102 Coefficients: Estimate Std. Error (Intercept) -3.382e+03 1.867e+02 year 1.688e+00 9.715e-02 I((year > split) * (year - split)) 8.555e-01 6.372e-01 I(cos((2 * pi * year - 1970)/18.613)) 5.595e+00 3.709e+00 I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin) 7.036e-01 1.751e-01 t value Pr(>|t|) (Intercept) -18.112 < 2e-16 *** year 17.378 < 2e-16 *** I((year > split) * (year - split)) 1.343 0.181947 I(cos((2 * pi * year - 1970)/18.613)) 1.508 0.134162 I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin) 4.018 0.000104 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 28.58 on 118 degrees of freedom Multiple R-squared: 0.8528, Adjusted R-squared: 0.8478 F-statistic: 170.9 on 4 and 118 DF, p-value: < 2.2e-16
plt.fill_between(np.arange(1890, 2101), np.array(pred)[:,1], np.array(pred)[:,2], alpha=0.5)
plt.plot(np.arange(1890, 2101), np.array(pred)[:,0])
[<matplotlib.lines.Line2D at 0x1140ae490>]
# The linear model
%Rpush df
%R fit <- lm(nap ~ year + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613)) + U2cos + U2sin, data=df)
%R years <- seq(1890, 2100)
# Padd with means
%R U2cos = df$U2cos
%R U2cos[years>2012] = mean(df$U2cos)
%R U2sin = df$U2cos
%R U2sin[years>2012] = mean(df$U2sin)
%R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=U2cos, U2sin=U2sin))
pred = %Rget pred
%R print(summary(fit))
Call: lm(formula = nap ~ year + I(cos((2 * pi * year - 1970)/18.613)) + I(sin((2 * pi * year - 1970)/18.613)) + U2cos + U2sin, data = df) Residuals: Min 1Q Median 3Q Max -59.919 -17.607 1.216 15.672 59.747 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -3.507e+03 1.289e+02 -27.207 < 2e-16 year 1.758e+00 6.689e-02 26.278 < 2e-16 I(cos((2 * pi * year - 1970)/18.613)) 3.427e+00 3.233e+00 1.060 0.29120 I(sin((2 * pi * year - 1970)/18.613)) -1.197e+01 3.139e+00 -3.814 0.00022 U2cos 1.265e+00 1.890e-01 6.694 7.94e-10 U2sin -4.672e-01 2.665e-01 -1.753 0.08214 (Intercept) *** year *** I(cos((2 * pi * year - 1970)/18.613)) I(sin((2 * pi * year - 1970)/18.613)) *** U2cos *** U2sin . --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 24.73 on 117 degrees of freedom Multiple R-squared: 0.8907, Adjusted R-squared: 0.886 F-statistic: 190.6 on 5 and 117 DF, p-value: < 2.2e-16
fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
degree = 1
%Rpush fitted
%Rpush degree
%R l = predict(loess(fitted, degree=degree, control=loess.control(surface='direct')), se=TRUE, newdata=data.frame(year=seq(1890,2100)))
l = %Rget l
import pandas.rpy.common as com
l = com.load_data('l')
ci_loess = pandas.DataFrame(data=dict(year=np.arange(1890, 2101), se=l['se.fit'], fit=l['fit'], lwr=l['fit'] - l['se.fit']*1.96, upr=l['fit'] + l['se.fit']*1.96))
# compute angle
beta = (ci_loess[ci_loess['year'] == 2100].fit.item() - ci_loess[ci_loess['year'] == 2013].fit.item())/(2100-2013)
# compute std error (1.96 -> z 5%, 2 two-tailed)
se = ((ci_loess[ci_loess['year'] == 2100].upr.item() - ci_loess[ci_loess['year'] == 2100].lwr.item()))/1.96/2.0/(2100-2013)
beta, se
(0.27802282630813135, 0.23436251419913748)
fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
degree = 2
%Rpush fitted
%Rpush degree
%R fit <- loess(fitted, degree=degree, control=loess.control(surface='direct'))
%R l <- predict(fit, se=TRUE, newdata=data.frame(year=seq(1890,2100)))
import pandas.rpy.common as com
l = com.load_data('l')
fit = %Rget fit
ci_loess2 = pandas.DataFrame(data=dict(year=np.arange(1890, 2101), fit=l['fit'], lwr=l['fit'] - l['se.fit']*1.96, upr=l['fit'] + l['se.fit']*1.96))
#ci_loess = ci_loess.set_index('year')
ci_linear = com.load_data("pred")
ci_linear['year'] = np.arange(1890, 2101)
#ci_linear= ci_linear.set_index('year')
ci = pandas.merge(ci_loess, ci_loess2, on='year', suffixes=('_loess', '_loess2'))
ci = pandas.merge(ci_linear, ci, on='year', suffixes=('_linear', ''))
#ci['fit'].name('fit_linear')
#ci['lwr'].rename('lwr_linear')
#ci['upr'].rename('upr_linear')
ci.rename(columns={'fit':'fit_linear', 'lwr': 'lwr_linear', 'upr': 'upr_linear'}, inplace=True)
ci.to_csv('ci.csv')
plt.fill_between(np.arange(1890,2101), ci_linear['lwr'] + ci_loess['lwr'], ci_linear['upr'] + ci_loess['upr'])
plt.plot(np.arange(1890,2101), ci_linear['fit'] + ci_loess['fit'])
plt.plot(np.arange(1890,2101), ci_linear['fit'], color='black')
[<matplotlib.lines.Line2D at 0x114c1b7d0>]
plt.fill_between(np.arange(1890,2101), ci_linear['lwr'] , ci_linear['upr'] )
plt.plot(np.arange(1890,2101), ci_linear['fit'])
plt.fill_between(np.arange(1890,2101), ci_loess['lwr'] , ci_loess['upr'] )
plt.plot(np.arange(1890,2101), ci_loess['fit'])
[<matplotlib.lines.Line2D at 0x11390aad0>]
df = annual_df[annual_df['station']=='mean']
hkv = pandas.read_csv('full_output_station_gemiddeld.csv', sep=';')
df = df.merge(hkv[['year', 'U2sin', 'U2cos']], on='year', suffixes=('', '_hkv'))
df = df.reset_index()
y = df['nap']
plt.subplots(figsize=(10,6))
for split in np.arange(1990, 2000):
X = np.c_[
df['year']-1970,
(df['year']-split)*(df['year']>split),
np.cos(2*np.pi*(df['year']-1970)/18.613),
np.sin(2*np.pi*(df['year']-1970)/18.613),
df['U2cos'],
df['U2sin']
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
plt.plot(df['year'], results.fittedvalues, label=str(split), alpha=0.3)
# Based on the records of Amsterdam/Den Helder
df = pandas.read_csv('/Users/baart_f/src/oetpython/applications/sealevel//sealevel/static/data/extra/9000.csv')
# Create a plot
fig, ax1 = plt.subplots(1,1, figsize=(10,6))
# compute a trend
def trend(index):
# Select based on index
df_selected = df.ix[index]
# Create a linear model
y = df_selected['waterlevel']
X = np.c_[
df_selected['year.month'],
np.cos(2*np.pi*(df_selected['year.month']-1970)/18.613),
np.sin(2*np.pi*(df_selected['year.month']-1970)/18.613)
]
X = statsmodels.regression.linear_model.add_constant(X)
model= statsmodels.regression.linear_model.OLS(y, X)
# fit the model
result = model.fit()
# ignore if we have missings
if np.isnan(df_selected['year.month']).any():
return np.nan
# create a plot
ax1.plot(df_selected['year.month'], result.fittedvalues, 'k-', alpha=0.2, linewidth=3)
# return the trend
return result.params['x1']
df['waterlevel'][df['waterlevel'] == -99999] = np.nan
ax1.plot(df['year.month'], df['waterlevel'], '.')
ax1.set_xlabel('tijd [jaar]')
ax1.set_ylabel('zeespiegelniveau [m boven NAP]')
df = df[df['year.month']<1890]
betas = pandas.rolling_apply(np.array(df.index, dtype='int'),20, trend)
fig.savefig('amsterdam.pdf')
# standard error of beta_1 and confidence range
np.std(betas[~np.isnan(betas)]), 1.96 * 2 * np.std(betas[~np.isnan(betas)])
(0.17157195469765205, 0.67256206241479599)
# the same for the whole of amsterdam
resp = requests.get('http://www.psmsl.org/data/longrecords/amsterdam.sea.level')
f = io.BytesIO(resp.content)
df = pandas.read_fwf(f, widths=(10,20), names=('year.month', 'waterlevel'), skiprows=1)
df['waterlevel']/=10.0
# Create a plot
fig, ax1 = plt.subplots(1,1, figsize=(10,6))
ax1.plot(df['year.month'], df['waterlevel'], '.')
ax1.set_xlabel('tijd [jaar]')
ax1.set_ylabel('zeespiegelniveau [m boven NAP]')
betas = pandas.rolling_apply(np.array(df.index, dtype='int'),20, trend)
np.std(betas[~np.isnan(betas)]), 1.96 * 2 * np.std(betas[~np.isnan(betas)])
(0.17411785038363373, 0.68254197350384416)