In [1]:

import pandas as pd
import statsmodels.api as sm
import numpy as np
from scipy.stats import logistic
import math
%pylab inline

Populating the interactive namespace from numpy and matplotlib

WARNING: pylab import has clobbered these variables: ['logistic']
`%matplotlib` prevents importing * from pylab and numpy

In [2]:

dob = pd.read_csv('snapshot_data/2014-09-17/property_indexes/dob-index.csv', index_col=0)

In [3]:

dob.fillna(value=0, inplace=True)

In [4]:

dob['total'] = dob.sum(axis=1)

In [23]:

dob.ix[1990:]['total']

Out[23]:

1990    12987
1991    10692
1992     9168
1993     7059
1994     5114
1995     3019
1996     1674
1997      836
1998      471
1999      307
2000      199
2001      122
2002      100
2003       71
2004       57
2005       35
2006       28
2007       24
2008       13
2009       17
2010       23
2011       19
2012       20
2013       32
2014        7
2411        1
2426        2
Name: total, dtype: float64

In [5]:

dob['ratio'] = (dob['total'] - dob['male']) / dob['total']

In [6]:

dob['year'] = dob.index
dob['shift-year'] = dob['year'] - 1800

In [7]:

dob.ix[1800:1980]['ratio'].plot(kind='line')

Out[7]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f09a5ef5d10>

In [8]:

logit = sm.Logit(dob.ix[1800:1990]['ratio'], dob.ix[1800:1990]['shift-year'])

In [9]:

result = logit.fit()

Optimization terminated successfully.
         Current function value: 0.344498
         Iterations 5

In [10]:

result.summary()

Out[10]:

Logit Regression Results
Dep. Variable:	ratio	No. Observations:	191
Model:	Logit	Df Residuals:	190
Method:	MLE	Df Model:	0
Date:	Thu, 08 Jan 2015	Pseudo R-squ.:	-0.6871
Time:	15:17:15	Log-Likelihood:	-65.799
converged:	True	LL-Null:	-39.002
		LLR p-value:	1.000

	coef	std err	z	P>\|z\|	[95.0% Conf. Int.]
shift-year	-0.0146	0.002	-7.109	0.000	-0.019 -0.011

In [11]:

result.params[0]

Out[11]:

-0.014622907346901763

In [12]:

result.model

Out[12]:

<statsmodels.discrete.discrete_model.Logit at 0x7f09d06269d0>

In [13]:

def sigmoid(x):
    b0 = 1
    b1 = -result.params[0]
    exponent = (b0 + ((x)*b1))
    return  1 / (1 + math.exp(-1 * exponent) )

def invsigmoid(x):
    return 1 / sigmoid(x)

In [14]:

dob['logistic'] = dob['shift-year'].apply(sigmoid)

In [15]:

dob.ix[1800:1990][['logistic','ratio']].plot()

Out[15]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f09a5ec05d0>

In [16]:

5*math.e**2

Out[16]:

36.94528049465325

In [18]:

import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
import sympy as sym

"""
create a function to fit with your data. a, b, c and d are the coefficients
that curve_fit will calculate for you. 
In this part you need to guess and/or use mathematical knowledge to find
a function that resembles your data
"""
def mypoly(x, a, b, c, d):
    return a*x**3 + b*x**2 +c*x + d

def myexp(x, a, b,c, d):
    return (a**((b*x)+c)) +d

def mypow(x, a,b,c):
    return  ((x)**(b)) +c


"""
make the curve_fit
"""
for func in [mypoly, myexp, mypow]:
    x = list(dob.ix[1800:1980]['ratio'].index)
    y = list(dob.ix[1800:1980]['ratio'])

    
    popt, pcov = curve_fit(func, x, y, maxfev=1000000)
    print 'pcov', pcov

    """
    Plot your data
    """
    plt.plot(x, y, 'ro',label="Original Data")

    """
    brutal force to avoid errors
    """    
    x = [float(xn) for xn in x] #every element (xn) in x becomes a float
    y = [float(yn) for yn in y] #every element (yn) in y becomes a float
    x = np.array(x) #transform your data in a numpy array, 
    y = np.array(y) #so the curve_fit can work



    """
    The result is:
    popt[0] = a , popt[1] = b, popt[2] = c and popt[2] = d of the function,
    so f(x) = popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3].
    """
    print "a = %s , b = %s, c = %s, d = %s" % (popt[0], popt[1], popt[2], popt[3] if len(popt)==4 else None)

    """
    Use sympy to generate the latex sintax of the function
    """
    xs = sym.Symbol('\lambda')    
    tex = sym.latex(func(xs,*popt)).replace('$', '')
    plt.title(r'$f(\lambda)= %s$' %(tex),fontsize=16)

    """
    Print the coefficients and plot the funcion.
    """

    plt.plot(x, func(x, *popt), label="Fitted Curve") #same as line above \/
    #plt.plot(x, popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3], label="Fitted Curve") 

    plt.legend(loc='upper left')
    plt.show()

pcov [[  2.02482052e-17  -1.14807353e-13   2.16886454e-10  -1.36513136e-07]
 [ -1.14807353e-13   6.51000487e-10  -1.22990766e-06   7.74181850e-04]
 [  2.16886454e-10  -1.22990766e-06   2.32376497e-03  -1.46282103e+00]
 [ -1.36513136e-07   7.74181850e-04  -1.46282103e+00   9.20913276e+02]]
a = 4.87416698802e-08 , b = -0.000269974433142, c = 0.498921947871, d = -307.558030013

pcov [[  3.01147547e+04  -2.59627896e+06   5.42091393e+09  -1.76199201e-01]
 [ -2.59627896e+06   2.23832619e+08  -4.67352462e+11   1.51905520e+01]
 [  5.42091393e+09  -4.67352462e+11   9.75810965e+14  -3.17171987e+04]
 [ -1.76199183e-01   1.51905505e+01  -3.17171953e+04   7.07313684e-06]]
a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027

pcov [[ inf  inf  inf]
 [ inf  inf  inf]
 [ inf  inf  inf]]
a = 1.0 , b = 0.260677743372, c = -7.03174236265, d = None

In [ ]:

myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(2034)

In [20]:

def myexp_f( a, b,c, d):
    return lambda x: (a**((b*x)+c)) +d

def mypoly_f(a,b,c,d):
    return lambda x: a*x**3 + b*x**2 +c*x + d


myexp_at_zero = lambda x: abs(myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(x) - 0.5)

mypoly_at_zero = lambda x: abs(mypoly_f(a = 4.87416698802e-08 , b = -0.000269974433142, c = 0.498921947871, d = -307.558030013)(x) - 0.5)

from scipy.optimize import minimize

print minimize(myexp_at_zero, (2100))
print minimize(mypoly_at_zero,(2100))

   status: 2
  success: False
     njev: 44
     nfev: 144
 hess_inv: array([[ 643.99976828]])
      fun: 3.212685673048554e-11
        x: array([ 2034.17023164])
  message: 'Desired error not necessarily achieved due to precision loss.'
      jac: array([ 0.00232247])
   status: 2
  success: False
     njev: 49
     nfev: 159
 hess_inv: array([[ 16654.7000672]])
      fun: 7.298694981727749e-11
        x: array([ 2037.01251129])
  message: 'Desired error not necessarily achieved due to precision loss.'
      jac: array([-0.00400543])

In [ ]: