import pandas as pd
import statsmodels.api as sm
import numpy as np
from scipy.stats import logistic
import math
%pylab inline
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['logistic'] `%matplotlib` prevents importing * from pylab and numpy
dob = pd.read_csv('snapshot_data/2014-09-17/property_indexes/dob-index.csv', index_col=0)
dob.fillna(value=0, inplace=True)
dob['total'] = dob.sum(axis=1)
dob.ix[1990:]['total']
1990 12987 1991 10692 1992 9168 1993 7059 1994 5114 1995 3019 1996 1674 1997 836 1998 471 1999 307 2000 199 2001 122 2002 100 2003 71 2004 57 2005 35 2006 28 2007 24 2008 13 2009 17 2010 23 2011 19 2012 20 2013 32 2014 7 2411 1 2426 2 Name: total, dtype: float64
dob['ratio'] = (dob['total'] - dob['male']) / dob['total']
dob['year'] = dob.index
dob['shift-year'] = dob['year'] - 1800
dob.ix[1800:1980]['ratio'].plot(kind='line')
<matplotlib.axes._subplots.AxesSubplot at 0x7f09a5ef5d10>
logit = sm.Logit(dob.ix[1800:1990]['ratio'], dob.ix[1800:1990]['shift-year'])
result = logit.fit()
Optimization terminated successfully. Current function value: 0.344498 Iterations 5
result.summary()
Dep. Variable: | ratio | No. Observations: | 191 |
---|---|---|---|
Model: | Logit | Df Residuals: | 190 |
Method: | MLE | Df Model: | 0 |
Date: | Thu, 08 Jan 2015 | Pseudo R-squ.: | -0.6871 |
Time: | 15:17:15 | Log-Likelihood: | -65.799 |
converged: | True | LL-Null: | -39.002 |
LLR p-value: | 1.000 |
coef | std err | z | P>|z| | [95.0% Conf. Int.] | |
---|---|---|---|---|---|
shift-year | -0.0146 | 0.002 | -7.109 | 0.000 | -0.019 -0.011 |
result.params[0]
-0.014622907346901763
result.model
<statsmodels.discrete.discrete_model.Logit at 0x7f09d06269d0>
def sigmoid(x):
b0 = 1
b1 = -result.params[0]
exponent = (b0 + ((x)*b1))
return 1 / (1 + math.exp(-1 * exponent) )
def invsigmoid(x):
return 1 / sigmoid(x)
dob['logistic'] = dob['shift-year'].apply(sigmoid)
dob.ix[1800:1990][['logistic','ratio']].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f09a5ec05d0>
5*math.e**2
36.94528049465325
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
import sympy as sym
"""
create a function to fit with your data. a, b, c and d are the coefficients
that curve_fit will calculate for you.
In this part you need to guess and/or use mathematical knowledge to find
a function that resembles your data
"""
def mypoly(x, a, b, c, d):
return a*x**3 + b*x**2 +c*x + d
def myexp(x, a, b,c, d):
return (a**((b*x)+c)) +d
def mypow(x, a,b,c):
return ((x)**(b)) +c
"""
make the curve_fit
"""
for func in [mypoly, myexp, mypow]:
x = list(dob.ix[1800:1980]['ratio'].index)
y = list(dob.ix[1800:1980]['ratio'])
popt, pcov = curve_fit(func, x, y, maxfev=1000000)
print 'pcov', pcov
"""
Plot your data
"""
plt.plot(x, y, 'ro',label="Original Data")
"""
brutal force to avoid errors
"""
x = [float(xn) for xn in x] #every element (xn) in x becomes a float
y = [float(yn) for yn in y] #every element (yn) in y becomes a float
x = np.array(x) #transform your data in a numpy array,
y = np.array(y) #so the curve_fit can work
"""
The result is:
popt[0] = a , popt[1] = b, popt[2] = c and popt[2] = d of the function,
so f(x) = popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3].
"""
print "a = %s , b = %s, c = %s, d = %s" % (popt[0], popt[1], popt[2], popt[3] if len(popt)==4 else None)
"""
Use sympy to generate the latex sintax of the function
"""
xs = sym.Symbol('\lambda')
tex = sym.latex(func(xs,*popt)).replace('$', '')
plt.title(r'$f(\lambda)= %s$' %(tex),fontsize=16)
"""
Print the coefficients and plot the funcion.
"""
plt.plot(x, func(x, *popt), label="Fitted Curve") #same as line above \/
#plt.plot(x, popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3], label="Fitted Curve")
plt.legend(loc='upper left')
plt.show()
pcov [[ 2.02482052e-17 -1.14807353e-13 2.16886454e-10 -1.36513136e-07] [ -1.14807353e-13 6.51000487e-10 -1.22990766e-06 7.74181850e-04] [ 2.16886454e-10 -1.22990766e-06 2.32376497e-03 -1.46282103e+00] [ -1.36513136e-07 7.74181850e-04 -1.46282103e+00 9.20913276e+02]] a = 4.87416698802e-08 , b = -0.000269974433142, c = 0.498921947871, d = -307.558030013
pcov [[ 3.01147547e+04 -2.59627896e+06 5.42091393e+09 -1.76199201e-01] [ -2.59627896e+06 2.23832619e+08 -4.67352462e+11 1.51905520e+01] [ 5.42091393e+09 -4.67352462e+11 9.75810965e+14 -3.17171987e+04] [ -1.76199183e-01 1.51905505e+01 -3.17171953e+04 7.07313684e-06]] a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027
pcov [[ inf inf inf] [ inf inf inf] [ inf inf inf]] a = 1.0 , b = 0.260677743372, c = -7.03174236265, d = None
myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(2034)
def myexp_f( a, b,c, d):
return lambda x: (a**((b*x)+c)) +d
def mypoly_f(a,b,c,d):
return lambda x: a*x**3 + b*x**2 +c*x + d
myexp_at_zero = lambda x: abs(myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(x) - 0.5)
mypoly_at_zero = lambda x: abs(mypoly_f(a = 4.87416698802e-08 , b = -0.000269974433142, c = 0.498921947871, d = -307.558030013)(x) - 0.5)
from scipy.optimize import minimize
print minimize(myexp_at_zero, (2100))
print minimize(mypoly_at_zero,(2100))
status: 2 success: False njev: 44 nfev: 144 hess_inv: array([[ 643.99976828]]) fun: 3.212685673048554e-11 x: array([ 2034.17023164]) message: 'Desired error not necessarily achieved due to precision loss.' jac: array([ 0.00232247]) status: 2 success: False njev: 49 nfev: 159 hess_inv: array([[ 16654.7000672]]) fun: 7.298694981727749e-11 x: array([ 2037.01251129]) message: 'Desired error not necessarily achieved due to precision loss.' jac: array([-0.00400543])