Sea-level rise¶

In [48]:

# We need a few libraries
import io

import numpy as np
import pandas
import requests

import matplotlib.pyplot as plt
import matplotlib.style
matplotlib.style.use('ggplot')
%matplotlib inline

Let's start with downloading the data from the different tidal guages. We use the 6 main Dutch stations.

http://www.psmsl.org/data/obtaining/stations/23.php Den Helder
http://www.psmsl.org/data/obtaining/stations/32.php IJmuiden
http://www.psmsl.org/data/obtaining/stations/20.php Vlissingen
http://www.psmsl.org/data/obtaining/stations/25.php Harlingen
http://www.psmsl.org/data/obtaining/stations/22.php Hoek van Holland
http://www.psmsl.org/data/obtaining/stations/24.php Delfzijl

In [49]:

stations = [
    ['Vlissingen', 20, lambda x: x - (6976-46)],
    ['Hoek van Holland', 22, lambda x:x - (6994 - 121)],
    ['Den Helder', 23, lambda x: x - (6988-42)],
    ['Delfzijl', 24, lambda x: x - (6978-155)],
    ['Harlingen', 25, lambda x: x - (7036-122)],
    ['IJmuiden', 32, lambda x: x - (7033-83)],
]

# you could use the monthly data
monthly_url = 'http://www.psmsl.org/data/obtaining/rlr.monthly.data/%d.rlrdata'
# and you can use the annual dat
annual_url = 'http://www.psmsl.org/data/obtaining/rlr.annual.data/%d.rlrdata'

In [50]:

# Let's download them both
monthly_dfs = []
annual_dfs = []

for station, id, to_local in stations:
    f = io.BytesIO(requests.get(monthly_url % (id,)).content)
    df = pandas.read_csv(f, sep=';', names=('year', 'waterlevel', 'code', 'another code'))
    # add a column with the station name
    df['station'] = station
    # convert to local coordinate system
    df['nap'] = df['waterlevel'].apply(to_local)
    # ignore the part before 1890
    df = df[df.year >= 1890]
    monthly_dfs.append(df)
    
    f = io.BytesIO(requests.get(annual_url % (id,)).content)
    df = pandas.read_csv(f, sep=';', names=('year', 'waterlevel', 'code', 'another code'))
    df['station'] = station
    df['nap'] = df['waterlevel'].apply(to_local)
    df = df[df.year >= 1890]
    annual_dfs.append(df)    

Now we have downloaded the stations, we can create a new station with the mean. You can use many other techniques, but the mean works just fine here.

In [51]:

# compute the monthly mean
monthly_df = pandas.concat(monthly_dfs)
mean = monthly_df.groupby('year').mean().reset_index()
mean['station'] = 'mean'
monthly_df = pandas.concat([monthly_df, mean[['year', 'waterlevel', 'nap', 'station']]])
monthly_df = monthly_df.reset_index()
# add the year number 
monthly_df['year_floor'] = np.floor(monthly_df['year'])

annual_df = pandas.concat(annual_dfs)
mean = annual_df.groupby('year').mean().reset_index()
mean['station'] = 'mean'
annual_df = pandas.concat([annual_df, mean[['year', 'waterlevel', 'nap', 'station']]])
annual_df = annual_df.reset_index()

# save the files to json
# add the annual mean also to the monthly means
monthly_df = monthly_df.merge(annual_df[['year', 'nap']], left_on='year_floor', right_on='year', suffixes=['', '_annual'])
monthly_df[['year', 'nap', 'nap_annual', 'station']].to_json('monthly.json', orient='records')
annual_df[['year', 'nap', 'station']].to_json('annual.json', orient='records')

df = annual_df

In [52]:

# in memory file
grouped = df.groupby(['station'])

In [53]:

fig, ax = plt.subplots(figsize=(10,7))
for station, df in grouped:
    annual_jitter = np.random.uniform(-0.5, 0.5, size=len(df))
    monthly_jitter = np.random.uniform(-0.5*1/12.0, 0.5*1/12.0, size=len(df))
    if station == 'mean':
        pch = '-'
        alpha = 1.0
        ax.plot(df['year'] + annual_jitter, df['nap']/10 , pch, alpha=alpha, label=station)
    else:
        pch = '.'
        alpha = 0.5
        ax.plot(df['year'] + annual_jitter, df['nap']/10 , pch, alpha=alpha, label=station)
    ax.set_ylabel('waterlevel [cm NAP]')
    ax.set_xlabel('year')
legend = ax.legend(loc='best')
legend.legendPatch.set_alpha(0.5)
legend.legendPatch.set_facecolor('white')

fig.savefig('stations.png')

In [54]:

import statsmodels.api as sm
df = grouped.get_group('mean')

In [55]:

y = df['waterlevel']
X = df['year']
X = sm.add_constant(X)

In [56]:

model = sm.OLS(y, X)
results = model.fit()
from IPython.display import display
display(results.summary())

OLS Regression Results
Dep. Variable:	waterlevel	R-squared:	0.828
Model:	OLS	Adj. R-squared:	0.827
Method:	Least Squares	F-statistic:	589.0
Date:	Sun, 16 Nov 2014	Prob (F-statistic):	1.60e-48
Time:	23:49:03	Log-Likelihood:	-598.81
No. Observations:	124	AIC:	1202.
Df Residuals:	122	BIC:	1207.
Df Model:	1

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
const	3219.4017	149.443	21.543	0.000	2923.564 3515.239
year	1.8581	0.077	24.269	0.000	1.707 2.010

Omnibus:	3.101	Durbin-Watson:	1.544
Prob(Omnibus):	0.212	Jarque-Bera (JB):	2.704
Skew:	-0.357	Prob(JB):	0.259
Kurtosis:	3.120	Cond. No.	1.06e+05

In [57]:

def fit(startyear):
    df_selected = df[df['year'] > startyear]
    y = df_selected['waterlevel']
    X = df_selected['year']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    display(results.summary())

In [58]:

from IPython.html.widgets import interactive

In [59]:

interactive(fit, startyear=(1890, 1980, 10))

OLS Regression Results
Dep. Variable:	waterlevel	R-squared:	0.681
Model:	OLS	Adj. R-squared:	0.677
Method:	Least Squares	F-statistic:	172.6
Date:	Sun, 16 Nov 2014	Prob (F-statistic):	8.98e-22
Time:	23:49:06	Log-Likelihood:	-403.92
No. Observations:	83	AIC:	811.8
Df Residuals:	81	BIC:	816.7
Df Model:	1

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
const	3107.2138	287.403	10.811	0.000	2535.372 3679.056
year	1.9146	0.146	13.138	0.000	1.625 2.205

Omnibus:	3.114	Durbin-Watson:	1.547
Prob(Omnibus):	0.211	Jarque-Bera (JB):	2.643
Skew:	-0.434	Prob(JB):	0.267
Kurtosis:	3.101	Cond. No.	1.62e+05

In [60]:

def plot(startyear):
    df_selected = df[df['year'] > startyear]
    y = df_selected['waterlevel']
    X = df_selected['year']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    fig, ax = plt.subplots()
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.1)
    ax.plot(df_selected['year'], results.fittedvalues)
    
    ax.set_ylim(6400,7300)

In [61]:

interactive(plot, startyear=(1860, 1980, 10))

In [62]:

from statsmodels.sandbox.regression.predstd import wls_prediction_std

def plot(startyear):
    df_selected = df[df['year'] > startyear]
    y = df_selected['waterlevel']
    X = df_selected['year']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    fig, ax = plt.subplots()
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.1)
    ax.plot(df_selected['year'], results.fittedvalues)
    fit, lower, upper = wls_prediction_std(results)

    plt.fill_between(df_selected['year'], lower, upper, alpha=0.3)
    
    ax.set_ylim(6400,7300)

In [63]:

import statsmodels.sandbox.regression.predstd

In [64]:

interactive(plot, startyear=(1860, 1980, 10))

In [65]:

%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython

In [66]:

%Rpush df
%R library('ggplot2')

Out[66]:

<StrVector - Python:0x114c18998 / R:0x7ff2298824a8>
[str, str, str, ..., str, str, str]

In [67]:

def test(startyear):
    df_selected = df[df['year']>startyear]
    %Rpush df_selected
    %R print(ggplot(df, aes(year, waterlevel)) + geom_point(alpha=0.3) + stat_smooth(data=df_selected, method='loess'))
interactive(test, startyear=(1860, 1980, 10))

In [68]:

def plot(startyear):
    df_selected = df[(df['year'] >= startyear) & (df['year'] < (startyear + 18.6*2))]
    
    y = df_selected['waterlevel']
    X = np.c_[df_selected['year']-1970, (df_selected['year']-1970)**2]
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    fig, ax = plt.subplots()
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.3)
    ax.plot(df_selected['year'], results.fittedvalues, linewidth=5)

In [69]:

interactive(plot, startyear=(1860, 1980, 5))

In [70]:

def plot(startyear, n_periods):

    df_selected = df[(df['year'] >= startyear) & (df['year'] < (startyear + 18.6*n_periods))]
    
    y = df_selected['waterlevel']
    X = np.c_[
        df_selected['year']-1970, 
        np.cos(2*np.pi*(df_selected['year']-1970)/18.613),
        np.sin(2*np.pi*(df_selected['year']-1970)/18.613)

    ]
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    # e^{i\theta} = \cos\theta + i\sin\theta
    phase = np.angle(complex(results.params['x2'], results.params['x3']))
    ampl = np.sqrt(results.params['x2']**2 + results.params['x3']**2)
    
    # plot it
    fig, ax = plt.subplots()
    ax.set_title("Phase (rad): %.1f, amplitude: %.1f\nA(cos): %.1f, B(sin): %.1f" % (phase, ampl, results.params['x2'], results.params['x3']))
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.3)
    ax.plot(df_selected['year'], results.fittedvalues, linewidth=5)
    

In [71]:

interactive(plot, startyear=(1860, 1980, 5), n_periods=(2,5))

In [72]:

df = annual_df[annual_df['station']=='mean']
hkv = pandas.read_csv('full_output_station_gemiddeld.csv', sep=';')
df = df.merge(hkv[['year', 'U2sin', 'U2cos']], on='year', suffixes=('', '_hkv'))
df = df.reset_index()
y = df['nap']
X = np.c_[
    df['year']-1970, 
    np.cos(2*np.pi*(df['year']-1970)/18.613),
    np.sin(2*np.pi*(df['year']-1970)/18.613),
    df['U2cos'], 
    df['U2sin']
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
linear = results.params['const'] + results.params['x1'] * (df['year']-1970)
nodal = results.params['x2'] * np.cos(2*np.pi*(df['year']-1970)/18.613) + results.params['x3'] * np.sin(2*np.pi*(df['year']-1970)/18.613)
wind = results.params['x4'] * df['U2cos'] + results.params['x5'] * df['U2sin']
df['linear'] = linear
df['nodal'] = nodal
df['wind'] = wind
df['residual'] = results.resid
df['loess'] = sm.nonparametric.lowess(df['residual'], df['year'], frac=60.0/len(df))[:,1]
df['residual_loess'] = df['residual'] - df['loess']

In [73]:

df[['year', 'nap', 'station', 'linear', 'nodal', 'residual', 'wind', 'loess', 'residual_loess']].to_json('fit.json', orient='records')

In [74]:

y = df['nap']
X = np.c_[
    np.ones(shape=(len(df),))
]
# compute semipartial correlations
rsquares = []
terms = [df['year']-1970, 
              np.c_[np.cos(2*np.pi*(df['year']-1970)/18.613), np.sin(2*np.pi*(df['year']-1970)/18.613)],
              np.c_[df['U2cos'], df['U2sin']]]
names = ['linear', 'nodal', 'wind']
rsquare = 0
for name, term in zip(names, terms):
    X = np.c_[X, term]
    model = sm.OLS(y, X)
    results = model.fit()
    rsquares.append((name, results.rsquared - rsquare))
    rsquare = results.rsquared
rsquares.append(('loess', 1- np.var(df['residual_loess'])/np.var(df['nap'])- rsquare))
rsquare = results.rsquared
rsquares.append(('residual', np.var(df['residual_loess'])/np.var(df['nap'])))


import json
json.dump(dict(rsquares), open('rsquares.json', 'w'))
rsquares

Out[74]:

[('linear', 0.82769308510876782),
 ('nodal', 0.019216129938318116),
 ('wind', 0.043763073036397149),
 ('loess', 0.0036103727985814515),
 ('residual', 0.10571733911793549)]

In [75]:

a = results.summary(yname='waterlevel', xname=['constant', 'linear', 'nodal cos', 'nodal sin', 'wind cos', 'wind sin'])
#print(a.as_latex())
a

Out[75]:

OLS Regression Results
Dep. Variable:	waterlevel	R-squared:	0.891
Model:	OLS	Adj. R-squared:	0.886
Method:	Least Squares	F-statistic:	190.6
Date:	Sun, 16 Nov 2014	Prob (F-statistic):	1.71e-54
Time:	23:49:24	Log-Likelihood:	-566.06
No. Observations:	123	AIC:	1144.
Df Residuals:	117	BIC:	1161.
Df Model:	5

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
constant	-44.6608	6.507	-6.863	0.000	-57.548 -31.774
linear	1.7577	0.067	26.278	0.000	1.625 1.890
nodal cos	3.7984	3.233	1.175	0.242	-2.604 10.201
nodal sin	-11.8578	3.138	-3.778	0.000	-18.073 -5.642
wind cos	1.2649	0.189	6.694	0.000	0.891 1.639
wind sin	-0.4672	0.266	-1.753	0.082	-0.995 0.060

Omnibus:	0.139	Durbin-Watson:	1.435
Prob(Omnibus):	0.933	Jarque-Bera (JB):	0.311
Skew:	0.013	Prob(JB):	0.856
Kurtosis:	2.755	Cond. No.	122.

In [76]:

def plot(degree):
    fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
    %Rpush fitted
    %Rpush degree
    %R l = predict(loess(fitted, degree=degree, control=loess.control(surface='direct')), se=TRUE, newdata=data.frame(year=seq(1890,2100)))
    l = %Rget l
    import pandas.rpy.common as com
    l = com.load_data('l')
    plt.fill_between(np.arange(1890, 2101), l['fit'] - l['se.fit']*1.96, l['fit'] + l['se.fit']*1.96, alpha=0.5)
    plt.plot(np.arange(1890, 2101), l['fit'])
    plt.ylim(-200,400)
    plt.title('Loess model with degree %d' % degree)
    plt.ylabel("sea surface height residual ['mm']")
interactive(plot, degree=(1,2))

In [77]:

# The broken linear model
y = df['nap']
X = np.c_[
    df['year']-1970, 
    np.cos(2*np.pi*(df['year']-1970)/18.613),
    np.sin(2*np.pi*(df['year']-1970)/18.613),
    df['U2cos'], 
    df['U2sin'],
    (df['year']-1990 > 0)*(df['year']-1990)
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
fit = model.fit()

In [78]:

%Rpush df 
%R fit <- lm(nap ~ year + I((year > 1990)*(year-1990)) + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613) + U2cos + U2sin), data=df)
%R years <- seq(1890, 2100)
%R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=rep(mean(df$U2cos), length(years)), U2sin=rep(mean(df$U2sin), length(years))))
pred = %Rget pred
%R print(summary(fit))

Call:
lm(formula = nap ~ year + I((year > 1990) * (year - 1990)) + 
    I(cos((2 * pi * year - 1970)/18.613)) + I(sin((2 * pi * year - 
    1970)/18.613) + U2cos + U2sin), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-66.682 -19.736   2.081  19.674  63.102 

Coefficients:
                                                        Estimate Std. Error
(Intercept)                                           -3.382e+03  1.867e+02
year                                                   1.688e+00  9.715e-02
I((year > 1990) * (year - 1990))                       8.555e-01  6.372e-01
I(cos((2 * pi * year - 1970)/18.613))                  5.595e+00  3.709e+00
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)  7.036e-01  1.751e-01
                                                      t value Pr(>|t|)    
(Intercept)                                           -18.112  < 2e-16 ***
year                                                   17.378  < 2e-16 ***
I((year > 1990) * (year - 1990))                        1.343 0.181947    
I(cos((2 * pi * year - 1970)/18.613))                   1.508 0.134162    
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)   4.018 0.000104 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 28.58 on 118 degrees of freedom
Multiple R-squared:  0.8528,	Adjusted R-squared:  0.8478 
F-statistic: 170.9 on 4 and 118 DF,  p-value: < 2.2e-16

In [79]:

def compute(split):
    %Rpush split
    %Rpush df 
    %R fit <- lm(nap ~  year + I((year > split)*(year-split)) + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613) + U2cos + U2sin), data=df)
    %R years <- seq(1890, 2100)
    %R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=rep(mean(df$U2cos), length(years)), U2sin=rep(mean(df$U2sin), length(years))))
    pred = %Rget pred
    %R print(summary(fit))
interactive(compute, split=(1980, 2000))

Call:
lm(formula = nap ~ year + I((year > split) * (year - split)) + 
    I(cos((2 * pi * year - 1970)/18.613)) + I(sin((2 * pi * year - 
    1970)/18.613) + U2cos + U2sin), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-66.682 -19.736   2.081  19.674  63.102 

Coefficients:
                                                        Estimate Std. Error
(Intercept)                                           -3.382e+03  1.867e+02
year                                                   1.688e+00  9.715e-02
I((year > split) * (year - split))                     8.555e-01  6.372e-01
I(cos((2 * pi * year - 1970)/18.613))                  5.595e+00  3.709e+00
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)  7.036e-01  1.751e-01
                                                      t value Pr(>|t|)    
(Intercept)                                           -18.112  < 2e-16 ***
year                                                   17.378  < 2e-16 ***
I((year > split) * (year - split))                      1.343 0.181947    
I(cos((2 * pi * year - 1970)/18.613))                   1.508 0.134162    
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)   4.018 0.000104 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 28.58 on 118 degrees of freedom
Multiple R-squared:  0.8528,	Adjusted R-squared:  0.8478 
F-statistic: 170.9 on 4 and 118 DF,  p-value: < 2.2e-16

In [80]:

plt.fill_between(np.arange(1890, 2101), np.array(pred)[:,1], np.array(pred)[:,2], alpha=0.5)
plt.plot(np.arange(1890, 2101), np.array(pred)[:,0])

Out[80]:

[<matplotlib.lines.Line2D at 0x1140ae490>]

In [81]:

# The linear model
%Rpush df 
%R fit <- lm(nap ~ year + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613)) + U2cos + U2sin, data=df)
%R years <- seq(1890, 2100)
# Padd with means
%R U2cos = df$U2cos
%R U2cos[years>2012] = mean(df$U2cos)
%R U2sin = df$U2cos
%R U2sin[years>2012] = mean(df$U2sin)
%R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=U2cos, U2sin=U2sin))
pred = %Rget pred
%R print(summary(fit))

Call:
lm(formula = nap ~ year + I(cos((2 * pi * year - 1970)/18.613)) + 
    I(sin((2 * pi * year - 1970)/18.613)) + U2cos + U2sin, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-59.919 -17.607   1.216  15.672  59.747 

Coefficients:
                                        Estimate Std. Error t value Pr(>|t|)
(Intercept)                           -3.507e+03  1.289e+02 -27.207  < 2e-16
year                                   1.758e+00  6.689e-02  26.278  < 2e-16
I(cos((2 * pi * year - 1970)/18.613))  3.427e+00  3.233e+00   1.060  0.29120
I(sin((2 * pi * year - 1970)/18.613)) -1.197e+01  3.139e+00  -3.814  0.00022
U2cos                                  1.265e+00  1.890e-01   6.694 7.94e-10
U2sin                                 -4.672e-01  2.665e-01  -1.753  0.08214
                                         
(Intercept)                           ***
year                                  ***
I(cos((2 * pi * year - 1970)/18.613))    
I(sin((2 * pi * year - 1970)/18.613)) ***
U2cos                                 ***
U2sin                                 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 24.73 on 117 degrees of freedom
Multiple R-squared:  0.8907,	Adjusted R-squared:  0.886 
F-statistic: 190.6 on 5 and 117 DF,  p-value: < 2.2e-16

In [124]:

fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
degree = 1
%Rpush fitted
%Rpush degree
%R l = predict(loess(fitted, degree=degree, control=loess.control(surface='direct')), se=TRUE, newdata=data.frame(year=seq(1890,2100)))
l = %Rget l
import pandas.rpy.common as com
l = com.load_data('l')
ci_loess = pandas.DataFrame(data=dict(year=np.arange(1890, 2101), se=l['se.fit'], fit=l['fit'], lwr=l['fit'] - l['se.fit']*1.96, upr=l['fit'] + l['se.fit']*1.96))

# compute angle
beta = (ci_loess[ci_loess['year'] == 2100].fit.item() - ci_loess[ci_loess['year'] == 2013].fit.item())/(2100-2013)
# compute std error (1.96 -> z 5%, 2 two-tailed)
se = ((ci_loess[ci_loess['year'] == 2100].upr.item() - ci_loess[ci_loess['year'] == 2100].lwr.item()))/1.96/2.0/(2100-2013)
beta, se

Out[124]:

(0.27802282630813135, 0.23436251419913748)

In [114]:

fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
degree = 2
%Rpush fitted
%Rpush degree
%R fit <- loess(fitted, degree=degree, control=loess.control(surface='direct'))
%R l <- predict(fit, se=TRUE, newdata=data.frame(year=seq(1890,2100)))

import pandas.rpy.common as com
l = com.load_data('l')
fit = %Rget fit

ci_loess2 = pandas.DataFrame(data=dict(year=np.arange(1890, 2101),  fit=l['fit'], lwr=l['fit'] - l['se.fit']*1.96, upr=l['fit'] + l['se.fit']*1.96))
#ci_loess = ci_loess.set_index('year')

In [84]:

ci_linear = com.load_data("pred")
ci_linear['year'] = np.arange(1890, 2101)
#ci_linear= ci_linear.set_index('year')

In [85]:

ci = pandas.merge(ci_loess, ci_loess2, on='year', suffixes=('_loess', '_loess2'))
ci = pandas.merge(ci_linear, ci, on='year', suffixes=('_linear', ''))
#ci['fit'].name('fit_linear')
#ci['lwr'].rename('lwr_linear')
#ci['upr'].rename('upr_linear')
ci.rename(columns={'fit':'fit_linear', 'lwr': 'lwr_linear', 'upr': 'upr_linear'}, inplace=True)
ci.to_csv('ci.csv')

In [86]:

plt.fill_between(np.arange(1890,2101), ci_linear['lwr'] + ci_loess['lwr'], ci_linear['upr'] + ci_loess['upr'])
plt.plot(np.arange(1890,2101), ci_linear['fit'] + ci_loess['fit'])
plt.plot(np.arange(1890,2101), ci_linear['fit'], color='black')

Out[86]:

[<matplotlib.lines.Line2D at 0x114c1b7d0>]

In [40]:

plt.fill_between(np.arange(1890,2101), ci_linear['lwr'] , ci_linear['upr'] )
plt.plot(np.arange(1890,2101), ci_linear['fit'])
plt.fill_between(np.arange(1890,2101), ci_loess['lwr'] , ci_loess['upr'] )
plt.plot(np.arange(1890,2101), ci_loess['fit'])

Out[40]:

[<matplotlib.lines.Line2D at 0x11390aad0>]

In [40]:

In [41]:

df = annual_df[annual_df['station']=='mean']
hkv = pandas.read_csv('full_output_station_gemiddeld.csv', sep=';')
df = df.merge(hkv[['year', 'U2sin', 'U2cos']], on='year', suffixes=('', '_hkv'))
df = df.reset_index()
y = df['nap']
plt.subplots(figsize=(10,6))
for split in np.arange(1990, 2000):
    X = np.c_[
        df['year']-1970, 
        (df['year']-split)*(df['year']>split),
        np.cos(2*np.pi*(df['year']-1970)/18.613),
        np.sin(2*np.pi*(df['year']-1970)/18.613),
        df['U2cos'], 
        df['U2sin']
    ]
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    plt.plot(df['year'], results.fittedvalues, label=str(split), alpha=0.3)

In [42]:

# Based on the records of Amsterdam/Den Helder
df = pandas.read_csv('/Users/baart_f/src/oetpython/applications/sealevel//sealevel/static/data/extra/9000.csv')

# Create a plot
fig, ax1 = plt.subplots(1,1, figsize=(10,6))
# compute a trend
def trend(index):
    # Select based on index
    df_selected = df.ix[index]
    # Create a linear model
    y = df_selected['waterlevel']
    X = np.c_[
        df_selected['year.month'],
        np.cos(2*np.pi*(df_selected['year.month']-1970)/18.613),
        np.sin(2*np.pi*(df_selected['year.month']-1970)/18.613)
    ]
    X = statsmodels.regression.linear_model.add_constant(X)
    model= statsmodels.regression.linear_model.OLS(y, X)
    # fit the model
    result = model.fit()
    # ignore if we have missings
    if np.isnan(df_selected['year.month']).any():
        return np.nan
    # create a plot
    ax1.plot(df_selected['year.month'], result.fittedvalues, 'k-', alpha=0.2, linewidth=3)
    # return the trend
    return result.params['x1']
df['waterlevel'][df['waterlevel'] == -99999] = np.nan
ax1.plot(df['year.month'], df['waterlevel'], '.')
ax1.set_xlabel('tijd [jaar]')
ax1.set_ylabel('zeespiegelniveau [m boven NAP]')
df = df[df['year.month']<1890]
betas = pandas.rolling_apply(np.array(df.index, dtype='int'),20, trend)
fig.savefig('amsterdam.pdf')

In [43]:

# standard error of beta_1 and confidence range 
np.std(betas[~np.isnan(betas)]), 1.96 * 2 * np.std(betas[~np.isnan(betas)]) 

Out[43]:

(0.17157195469765205, 0.67256206241479599)

In [44]:

# the same for the whole of amsterdam
resp = requests.get('http://www.psmsl.org/data/longrecords/amsterdam.sea.level')
f = io.BytesIO(resp.content)
df = pandas.read_fwf(f, widths=(10,20), names=('year.month', 'waterlevel'), skiprows=1)
df['waterlevel']/=10.0
# Create a plot
fig, ax1 = plt.subplots(1,1, figsize=(10,6))
ax1.plot(df['year.month'], df['waterlevel'], '.')
ax1.set_xlabel('tijd [jaar]')
ax1.set_ylabel('zeespiegelniveau [m boven NAP]')

betas = pandas.rolling_apply(np.array(df.index, dtype='int'),20, trend)
np.std(betas[~np.isnan(betas)]), 1.96 * 2 * np.std(betas[~np.isnan(betas)]) 

Out[44]:

(0.17411785038363373, 0.68254197350384416)

In [44]: