#!/usr/bin/env python
# coding: utf-8

# In[3]:


get_ipython().run_line_magic('pinfo', '%matplotlib')


# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
#%pylab inline
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import json

import quandl
import os

from IPython.display import display

#KEY = '...'
#!mkdir ./.keys
#with open('./.keys/quandl-api-key.json','w') as f:
#    json.dump({'key': KEY}, f)

KEYFILE = './.keys/quandl-api-key.json'
if os.path.exists(KEYFILE):
    with open('./.keys/quandl-api-key.json','r') as f:
        quandl_token = json.load(f)['key']
        
    import functools
    _quandl_get = functools.partial(quandl.get, authtoken=quandl_token)
else:
    _quandl_get = quandl.get
    
print(pd.__version__, np.__version__)
fig = plt.figure()


# In[ ]:


# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/USARGDPR-Real-GDP-in-the-United-States
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/GDP-Gross-Domestic-Product-1-Decimal
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/FYGFD-Gross-Federal-Debt
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/USAPOPL-Population-in-the-United-States
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/CPIAUCSL-Consumer-Price-Index-for-All-Urban-Consumers-All-Items-USA-Inflation
import collections
_data = collections.OrderedDict()
for _key in ['FRED/USARGDPR', 'FRED/GDP', 'FRED/FYGFD', 'FRED/USAPOPL', 'FRED/CPIAUCSL']:
    _data[_key.replace('/','_')] = _quandl_get(_key)


# In[ ]:


mpl.rcParams['figure.figsize'] = (20,4)


# In[ ]:


#for k,v in _data.iteritems():
#    v.plot(
#        title=k,
#        xlim=('1940','2020'),
#        ylim=(0, v.max()),
#        xticks=[str(x) for x in range(1949,2017,4)],
#        #x_compat=True,
#)
#usargdpr.plot(), usagdp.plot(), fygfd.plot(), popl.plot()
#usargdpr.


# In[ ]:


_data['FRED_USARGDPR'].head(), _data['FRED_FYGFD'].head()


# In[ ]:


((_data['FRED_USARGDPR'].resample('A', how='mean'))
 / _data['FRED_FYGFD'].resample('A', how='mean')).plot(title='USARGDPR / FYGFD')
((_data['FRED_GDP'].resample('A', how='mean'))
 / _data['FRED_FYGFD'].resample('A', how='mean')).plot(title='GDP / FYGFD')


# In[ ]:


((_data['FRED_GDP'].resample('A', how='mean'))
 / _data['FRED_USAPOPL'].resample('A', how='mean')).plot(title='FRED_GDP / FRED_USAPOPL')


# In[ ]:


((_data['FRED_FYGFD'].resample('A', how='mean'))
 / _data['FRED_GDP'].resample('A', how='mean')).plot(title='FRED_FYGFD / FRED_GDP')


# In[ ]:


plot = _data['FRED_CPIAUCSL'].resample('A', how='mean').plot(
    xticks=[str(x) for x in range(1949,2017,4)],
    x_compat=True,
    title="Yearly Inflation (CPI)"
    )
plot.legend(loc='upper left')


def add_line(plot, _year, text=None):
    _max = plot.yaxis.get_view_interval()[-1]
    plot.plot((_year,_year), (0, _max),
            color='gray', linewidth=1.5, linestyle="--")
    plot.annotate(
                text if text is not None else _year,
                xy=(_year, 0),
                xycoords='data',
                xytext=(+10, +30),
                textcoords='offset points',
                fontsize=12,
                #arrowprops=dict(arrowstyle="->"), #, connectionstyle=""), #arc3,rad=.2"),
                rotation='vertical',
                verticalalignment='bottom',
                horizontalalignment='center')

for year in range(1949, 2017, 4):
    add_line(plot, str(year))

display(plot)


# In[ ]:


us_presidents_csv_url = 'https://commondatastorage.googleapis.com/ckannet-storage/2012-05-08T122246/USPresident-Wikipedia-URLs-Thmbs-HS.csv' 


# In[ ]:


def get_presidents_df(data_file='./data/us_presidents.csv', data_url=us_presidents_csv_url):
    datadir = os.path.dirname(data_file)
    os.path.exists(datadir) or os.makedirs(datadir)
    get_ipython().system('wget --continue --no-clobber $data_url -O $data_file')
    df = presidents = pd.read_csv(data_file)
    
    df = presidents[['President ','Took office ','Left office ']]
    df['Took office '] = pd.to_datetime(presidents['Took office '])
    df['Left office '] = pd.to_datetime(presidents['Left office '], coerce=True)
    #display(df)
    df = df.set_index('Took office ', drop=False, verify_integrity=True)
    df['term'] = df['Left office '] - df['Took office ']
    
    col = df['term']
    val = col[0]
    df['term'] = (
        col.apply(
            lambda x: x.astype('datetime64'),
            convert_dtype=False))
    col = df['term']
    val = col[0]
    print(val)
    
    #val.item().days
    #df['terms'] = df['term'].apply(
    #    lambda x: (x.item().days if x.item() else 0)
    #    / float(365.25*4))
    return df

#df['terms'] = (df['term'] / np.timedelta64(1, 'D')) / float(365.25*4) # pandas 0.13

df = get_presidents_df()
display(df.head())

def presidents_by_year(df=None):
    if df is None:
        df = get_presidents_df()
    for year,name in df.ix[:,['President ']].to_records():
        print(year.year, name)

def add_presidents(plot, presidents=None, yearmin=0):
    if presidents is None:
        presidents = get_presidents_df()

    for year,name in presidents.ix[str(yearmin):,['President ']].to_records():
        #print year.year, name
        add_line(plot, year, name)

def poli_plot(df, **kwargs):
    yearmin = df.index.min().year
    yearmax = 2017
    
    plot = df.plot(
        xticks=[str(x) for x in range(yearmax, yearmin,-4)],
        x_compat=True,
        **kwargs)
    plot.legend(loc='upper left')
    
    add_presidents(plot, yearmin=yearmin)
    return plot


# In[ ]:


df = _data['FRED_CPIAUCSL']
poli_plot(df)


# In[ ]:


df = ((_data['FRED_GDP'].resample('A', how='mean')) / _data['FRED_USAPOPL'].resample('A', how='mean'))
poli_plot(df, title="GDP per capita (thousands of dollars)")


# In[ ]:


df = ((_data['FRED_FYGFD'].resample('A', how='mean')) / _data['FRED_USAPOPL'].resample('A', how='mean'))
poli_plot(df, title="Federal debt per capita (thousands of dollars)")


# In[ ]:


inflation_factor_linear = (_data['FRED_CPIAUCSL'] / _data['FRED_CPIAUCSL'].max()).resample('A', how='mean')
inflation_factor_uhh = 1 / inflation_factor_linear
cpi = inflation_factor_uhh
#display( cpi.head()) 
#display( cpi.tail())


df = ((_data['FRED_FYGFD'].resample('A', how='mean')) / _data['FRED_USAPOPL'].resample('A', how='mean'))
#print( df.columns )

scaled = (df * cpi)
#display(scaled.tail())
display(poli_plot(_data['FRED_CPIAUCSL'], title="inflation (FRED CPI UCSL)"))
plot = poli_plot(_data['FRED_CPIAUCSL'].resample('A','mean').pct_change(), title='yearly % change in inflation')
plot.axhline()

display(plot)


display(poli_plot(df, title="debt-per-capita"))
display(poli_plot(scaled, title="debt-per-capita scaled for inflation"))
plot = poli_plot(scaled.pct_change(), title="Yearly % change in debt-per-capita scaled for inflation")
plot.axhline()
display(plot)


#poli_plot(df)
#poli_plot(df * inflation_factor_linear)


# In[ ]:


# In[ ]:


# TODO: add house/senate majority party
# TODO: add major wars


# In[ ]:


get_ipython().system('wget --continue --no-clobber https://github.com/unitedstates/congress-legislators/raw/master/legislators-historical.yaml -O ./data/legislators-historical.yaml')
get_ipython().system('wget --continue --no-clobber https://github.com/unitedstates/congress-legislators/raw/master/legislators-current.yaml -O ./data/legislators-current.yaml')
#import yaml
#data = None
#with open('./data/legislators-historical.yaml','rb') as f:
#    data = yaml.load(f)
get_ipython().system('ls ./data')


# In[ ]:


import yaml
def iter_members(
    data_files=['./data/legislators-historical.yaml',
                './data/legislators-current.yaml']):
    
    for data_file in data_files:
        data = None
        with open(data_file,'rb') as f:
            data = yaml.load(f)
        for m in data:
            for t in m['terms']:
                yield (
                    t['state'],
                    t['type'],
                    t['start'],
                    t['end'],
                    t.get('party'),
                    m['name']['first'],
                    m['name']['last'],
                    m.get('bio',{}).get('gender', 'M'), # ...
                    m.get('bio',{}).get('birthday')
                )
iter_members.columns = [
    'state',
    'type',
    'start',
    'end',
    'party',
    'first',
    'last',
    'gender',
    'birthday'
]

_legislator_data = list(iter_members())


# In[ ]:


df = pd.DataFrame.from_records(
    _legislator_data,
    columns=iter_members.columns)
df['start'] = pd.to_datetime(df['start'])
df['end'] = pd.to_datetime(df['end'])
df['birthday'] = pd.to_datetime(df['birthday'])
df.set_index('start', drop=False, inplace=True)
display(df.head())
display(df)


# In[ ]:


col = df['party']
uniques = dict.fromkeys(col.unique())
#print(uniques)

_party_map = {}
repub, democ, other = [], [], []
for x in uniques:
    if x is not None:
        if 'Republ' in x:
            repub.append(x)
            _party_map[x] = 'Republican'
        elif 'Democr' in x:
            democ.append(x)
            _party_map[x] = 'Democrat'
        else:
            other.append(x)
            _party_map[x] = 'Other' # ...
    else:
        other.append(x)
        _party_map[x] = 'Other' # ...
        

print(len(repub), repub)
print(len(democ), democ)
print(len(other), other)


# In[ ]:


display(df['1949':]['party'].value_counts())
display(df[df['party']=='Liberal'])


# In[ ]:


# print(df2[df2['state'] == 'NE'][['type', 'party','first','last']].sort().to_string())


# In[ ]:


print('# party')
print(df[df['state'] == 'NE']['party'].value_counts(normalize=True))
print('# type')
print(df[df['state'] == 'NE']['type'].value_counts())


# In[ ]:


df['two_party_fail'] = df['party'].apply(lambda x: _party_map.get(x))
display( df['two_party_fail'].value_counts() )
display( df['two_party_fail'].value_counts(normalize=True) )


# In[ ]:


# objective: draw chart with per-year, per-two-party-counts
# group by year
# count factors

def start_year(x):
    return x.year - (x.year % 2)

grouper = df.groupby([start_year, 'two_party_fail'])

whoa = grouper.aggregate({'two_party_fail':len}).unstack()

display(whoa.plot())
display(whoa.head())
display(whoa.tail())

#df.pivot_table(values='two_party_fail', cols=['start'], aggfunc=len)


# In[ ]:


grouper = df.groupby([start_year, 'gender'])

whoa = grouper.aggregate({'gender':len}).unstack()
display(whoa.plot())


# In[2]:


# Q. how are these misleading / maybe not as helpful as they could be?
# 1. they count by start year, so they don't show the state at any given time
#    to show the state at any given time would require
#    a 'currently_serving' function
#    which, one might think could take into account standard terms/elections
#    as appropriate for rep/sen,
#    but there are special cases in mid-stream

# 2. they do not stratify by rep/sen; the counts are lumped together
#    'share_y' split by 'type' might be helpful

# ... how many hours would it take to draw these in [spreadsheet tool]
#     only to realize that you have no idea what 
#     'settings' were used to create a (very beautiful) chart?
#     ... python tools for visual studio now support 
#         something like `ipython --pylab=inline/qt`
#         ... i work on various platforms, so that's not an option for me
#         ... not sure what sort of configuration is required to get
#             anaconda ce working with this ide
#     ... ipython qt, ipython notebook
#     ... spyder ide
#     ... you can run these as scheduled jobs which generate online charts,
#         but then, still, without the source,
#         what smoke are you
# ... "you can get a good look at a t-bone steak by"