from __future__ import print_function, division %matplotlib inline import numpy as np import matplotlib.pyplot as plt import pandas as pd # use seaborn plotting defaults import seaborn as sns; sns.set() # !curl -O http://academic.udayton.edu/kissock/http/Weather/gsod95-current/NOOSLO.txt # !mv NOOSLO.txt data data = pd.read_csv('../data/NOOSLO.txt', delim_whitespace=True, names=['month', 'day', 'year', 'degF']) data.describe() # Filter bad years data = data[data.year > 200] # Filter missing data data = data[data.degF > -99] data.describe() # Create a date index YMD = 10000 * data.year + 100 * data.month + data.day data.index = pd.to_datetime(YMD, format='%Y%m%d').astype('datetime64[ns]') data.head() # Convert Fahrenheit to Celsius data['degC'] = (data.degF - 32) / 1.8 data.head() data.degC.plot(); data['dayofyear'] = data.index.dayofyear x = data['dayofyear'].values y = data['degC'].values plt.scatter(x, y, alpha=0.1); from scipy import optimize def linear_model(theta, x): return theta[0] + theta[1] * x def linear_cost(theta, x, y): return np.sum((y - linear_model(theta, x)) ** 2) theta_guess = [0, 0] theta_best = optimize.fmin(linear_cost, theta_guess, args=(x, y)) print(theta_best) xfit = np.linspace(0, 365) plt.scatter(x, y, alpha=0.1) plt.plot(xfit, linear_model(theta_best, xfit), '-k'); X = np.vstack([np.ones_like(x), x]).T theta_best = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, y)) plt.scatter(x, y, alpha=0.1) plt.plot(xfit, linear_model(theta_best, xfit), '-k'); def seasonal_model(theta, x): phi = 2 * np.pi * x / 365 return theta[0] + theta[1] * np.sin(phi) + theta[2] * np.cos(phi) def seasonal_cost(theta, x, y): return np.sum((y - seasonal_model(theta, x)) ** 2) theta_guess = [0, 0, 0] theta_best = optimize.fmin(seasonal_cost, theta_guess, args=(x, y)) print(theta_best) xfit = np.linspace(0, 365) plt.scatter(x, y, alpha=0.1) plt.plot(xfit, seasonal_model(theta_best, xfit), '-k'); X = np.vstack([np.ones_like(x), np.sin(2 * np.pi * x / 365), np.cos(2 * np.pi * x / 365)]).T theta_best = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, y)) plt.scatter(x, y, alpha=0.1) plt.plot(xfit, seasonal_model(theta_best, xfit), '-k'); data['detrended_temp'] = y - seasonal_model(theta_best, x) data.head() data.detrended_temp.plot(); days = data.index.astype(int) y_detrended = data.detrended_temp days = (days - days[0]) / (days[1] - days[0]) plt.scatter(days, y_detrended, alpha=0.1); X = np.vstack([np.ones_like(days), days]).T theta_best = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, y_detrended)) print("slope = {0:.2g} degrees per year".format(365 * theta_best[1])) plt.scatter(days, y_detrended, alpha=0.1) plt.plot(days, linear_model(theta_best, days), '-k'); plt.hist(y_detrended, 100); plt.scatter(y_detrended[:-1], y_detrended[1:]); # Define a combined model: offset + seasonal variation + linear trend # (note: leap years might throw this off) def combined_model(theta, days): return (theta[0] + theta[1] * days + theta[2] * np.sin(2 * np.pi * days / 365.) + theta[3] * np.cos(2 * np.pi * days / 365.)) # Solve this model using linear algebra approach X = np.vstack([np.ones_like(days), days, np.sin(2 * np.pi * days / 365.), np.cos(2 * np.pi * days / 365.)]).T theta_best = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, y)) # Plot the best model and find the corrected trend: print("slope = {0:.2g} degrees per year".format(365 * theta_best[1])) plt.scatter(days, y, alpha=0.1) plt.plot(np.sort(days), combined_model(theta_best, np.sort(days)), '-k');