Notebook

In [3]:

%matplotlib?

In [1]:

%matplotlib inline
#%pylab inline
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import json

import quandl
import os

from IPython.display import display

#KEY = '...'
#!mkdir ./.keys
#with open('./.keys/quandl-api-key.json','w') as f:
#    json.dump({'key': KEY}, f)

KEYFILE = './.keys/quandl-api-key.json'
if os.path.exists(KEYFILE):
    with open('./.keys/quandl-api-key.json','r') as f:
        quandl_token = json.load(f)['key']
        
    import functools
    _quandl_get = functools.partial(quandl.get, authtoken=quandl_token)
else:
    _quandl_get = quandl.get
    
print(pd.__version__, np.__version__)
fig = plt.figure()

Reloading matplotlib.backends.__future__
Reloading __future__
Reloading matplotlib.backends.matplotlib
Reloading matplotlib
Reloading distutils
Reloading distutils.version
Reloading distutils.string
Reloading string
Reloading re
Reloading _sre
Reloading sre_compile
Reloading sre_parse
Reloading sre_constants
Reloading copy_reg
Reloading types
Reloading strop
Reloading distutils.re
Reloading distutils.types
Reloading dateutil
Reloading pyparsing
Reloading weakref
Reloading UserDict
Reloading _abcoll
Reloading abc
Reloading _weakrefset
Reloading _weakref
Reloading exceptions
Reloading copy
Reloading org
Reloading warnings
Reloading linecache
Reloading os
Reloading errno
Reloading posix
Reloading posixpath
Reloading stat
Reloading genericpath
Reloading _warnings
Reloading collections
Reloading _collections
Reloading operator
Reloading keyword
Reloading heapq
Reloading itertools
Reloading _heapq
Reloading thread
Reloading shutil
Reloading fnmatch
Reloading pwd
Reloading grp
Reloading distutils.sysconfig
Reloading distutils.os
Reloading distutils.sys
Reloading distutils.distutils
Reloading distutils.errors
Reloading matplotlib.cbook
Reloading matplotlib.__future__
Reloading matplotlib.datetime
Reloading datetime
Reloading matplotlib.errno
Reloading matplotlib.functools
Reloading functools
Reloading _functools
Reloading matplotlib.glob
Reloading glob
Reloading matplotlib.gzip
Reloading gzip
Reloading struct
Reloading _struct
Reloading time
Reloading zlib
Reloading io
Reloading _io
Reloading matplotlib.io
Reloading matplotlib.locale
Reloading locale
Reloading encodings
Reloading encodings.codecs
Reloading codecs
Reloading _codecs
Reloading encodings.encodings
Reloading encodings.__builtin__
Reloading encodings.aliases
Reloading _locale
Reloading matplotlib.os
Reloading matplotlib.re
Reloading matplotlib.sys
Reloading matplotlib.threading
Reloading threading
Reloading traceback
Reloading matplotlib.time
Reloading matplotlib.traceback
Reloading matplotlib.types
Reloading matplotlib.warnings
Reloading matplotlib.weakref
Reloading matplotlib.numpy
Reloading numpy
Reloading numpy.__config__
Reloading numpy.version
Reloading numpy._import_tools
Reloading numpy.testing
Reloading unittest
Reloading unittest.result
Reloading unittest.os
Reloading unittest.sys
Reloading unittest.traceback
Reloading unittest.StringIO
Reloading StringIO
Reloading unittest.functools
Reloading unittest.case
Reloading unittest.collections
Reloading unittest.difflib
Reloading difflib
Reloading unittest.pprint
Reloading pprint
Reloading cStringIO
Reloading unittest.re
Reloading unittest.types
Reloading unittest.warnings
Reloading unittest.util
Reloading unittest.suite
Reloading unittest.loader
Reloading unittest.fnmatch
Reloading unittest.main
Reloading unittest.signals
Reloading unittest.signal
Reloading signal
Reloading unittest.weakref
Reloading unittest.runner
Reloading unittest.time
Reloading numpy.testing.utils
Reloading numpy.testing.nosetester
Reloading numpy.compat
Reloading numpy.compat._inspect
Reloading numpy.compat.py3k
Reloading numpy.core
Reloading numpy.core.info
Reloading numpy.core.numeric
Reloading numpy.core.umath
Reloading numpy.core.numerictypes
Reloading numpy.core.multiarray
Reloading cPickle
Reloading numpy.core._dotblas
Reloading numpy.core.arrayprint
Reloading numpy.core.fromnumeric
Reloading numpy.core.records
Reloading numpy.core.defchararray
Reloading numpy.core.memmap
Reloading numpy.core.function_base
Reloading numpy.core.machar
Reloading numpy.core.getlimits
Reloading numpy.core.shape_base
Reloading numpy.testing.numpytest
Reloading numpy.lib
Reloading math
Reloading numpy.lib.info
Reloading numpy.lib.type_check
Reloading numpy.lib.ufunclike
Reloading numpy.lib.index_tricks
Reloading numpy.matrixlib
Reloading numpy.matrixlib.defmatrix
Reloading numpy.lib.function_base
Reloading numpy.lib.twodim_base
Reloading numpy.lib._compiled_base
Reloading numpy.lib.utils
Reloading numpy.lib.stride_tricks
Reloading numpy.lib.nanfunctions
Reloading numpy.lib.shape_base
Reloading numpy.lib.polynomial
Reloading numpy.linalg
Reloading numpy.linalg.info
Reloading numpy.linalg.linalg
Reloading numpy.lib.arraysetops
Reloading numpy.lib.npyio
Reloading numpy.lib._datasource
Reloading numpy.lib._iotools
Reloading future_builtins
Reloading numpy.lib.financial
Reloading numpy.lib.arrayterator
Reloading numpy.lib.arraypad
Reloading numpy.ma
Reloading numpy.ma.core
Reloading numpy.ma.extras
Reloading matplotlib.compat
Reloading tempfile
Reloading random
Reloading binascii
Reloading hashlib
Reloading _hashlib
Reloading _random
Reloading fcntl
Reloading matplotlib.rcsetup
Reloading matplotlib.matplotlib
Reloading matplotlib.fontconfig_pattern
Reloading matplotlib.pyparsing
Reloading matplotlib.colors
Reloading matplotlib.textwrap
Reloading textwrap
Reloading matplotlib.backends.inspect
Reloading inspect
Reloading dis
Reloading opcode
Reloading imp
Reloading tokenize
Reloading token
Reloading matplotlib.backends.warnings
('0.14.1', '1.8.2')

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-1-56193167c6ba> in <module>()

/usr/local/lib/python2.7/site-packages/matplotlib/pyplot.pyc in figure(num, figsize, dpi, facecolor, edgecolor, frameon, FigureClass, **kwargs)
    421                                         frameon=frameon,
    422                                         FigureClass=FigureClass,
--> 423                                         **kwargs)
    424 
    425         if figLabel:

/usr/local/lib/python2.7/site-packages/matplotlib/backends/backend_macosx.pyc in new_figure_manager(num, *args, **kwargs)
    235     """
    236     FigureClass = kwargs.pop('FigureClass', Figure)
--> 237     figure = FigureClass(*args, **kwargs)
    238     return new_figure_manager_given_figure(num, figure)
    239 

/usr/local/lib/python2.7/site-packages/matplotlib/figure.pyc in __init__(self, figsize, dpi, facecolor, edgecolor, linewidth, frameon, subplotpars, tight_layout)
    309             frameon = rcParams['figure.frameon']
    310 
--> 311         self.dpi_scale_trans = Affine2D()
    312         self.dpi = dpi
    313         self.bbox_inches = Bbox.from_bounds(0, 0, *figsize)

/usr/local/lib/python2.7/site-packages/matplotlib/transforms.pyc in __init__(self, matrix, **kwargs)
   1699         If *matrix* is None, initialize with the identity transform.
   1700         """
-> 1701         Affine2DBase.__init__(self, **kwargs)
   1702         if matrix is None:
   1703             matrix = np.identity(3)

/usr/local/lib/python2.7/site-packages/matplotlib/transforms.pyc in __init__(self, *args, **kwargs)
   1544 
   1545     def __init__(self, *args, **kwargs):
-> 1546         Transform.__init__(self, *args, **kwargs)
   1547         self._inverted = None
   1548 

/usr/local/lib/python2.7/site-packages/matplotlib/transforms.pyc in __init__(self, shorthand_name)
     91         # parents are deleted, references from the children won't keep
     92         # them alive.
---> 93         self._parents = WeakValueDictionary()
     94 
     95         # TransformNodes start out as invalid until their values are

/usr/local/Cellar/python/2.7.5/Frameworks/Python.framework/Versions/2.7/lib/python2.7/weakref.pyc in __init__(self, *args, **kw)
     51                 del self.data[wr.key]
     52         self._remove = remove
---> 53         UserDict.UserDict.__init__(self, *args, **kw)
     54 
     55     def __getitem__(self, key):

TypeError: unbound method __init__() must be called with UserDict instance as first argument (got WeakValueDictionary instance instead)

In [ ]:

# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/USARGDPR-Real-GDP-in-the-United-States
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/GDP-Gross-Domestic-Product-1-Decimal
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/FYGFD-Gross-Federal-Debt
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/USAPOPL-Population-in-the-United-States
# http://www.quandl.com/FRED-Federal-Reserve-Economic-Data/CPIAUCSL-Consumer-Price-Index-for-All-Urban-Consumers-All-Items-USA-Inflation
import collections
_data = collections.OrderedDict()
for _key in ['FRED/USARGDPR', 'FRED/GDP', 'FRED/FYGFD', 'FRED/USAPOPL', 'FRED/CPIAUCSL']:
    _data[_key.replace('/','_')] = _quandl_get(_key)

In [ ]:

mpl.rcParams['figure.figsize'] = (20,4)

In [ ]:

#for k,v in _data.iteritems():
#    v.plot(
#        title=k,
#        xlim=('1940','2020'),
#        ylim=(0, v.max()),
#        xticks=[str(x) for x in range(1949,2017,4)],
#        #x_compat=True,
#)
#usargdpr.plot(), usagdp.plot(), fygfd.plot(), popl.plot()
#usargdpr.

In [ ]:

_data['FRED_USARGDPR'].head(), _data['FRED_FYGFD'].head()

In [ ]:

((_data['FRED_USARGDPR'].resample('A', how='mean'))
 / _data['FRED_FYGFD'].resample('A', how='mean')).plot(title='USARGDPR / FYGFD')
((_data['FRED_GDP'].resample('A', how='mean'))
 / _data['FRED_FYGFD'].resample('A', how='mean')).plot(title='GDP / FYGFD')

In [ ]:

((_data['FRED_GDP'].resample('A', how='mean'))
 / _data['FRED_USAPOPL'].resample('A', how='mean')).plot(title='FRED_GDP / FRED_USAPOPL')

In [ ]:

((_data['FRED_FYGFD'].resample('A', how='mean'))
 / _data['FRED_GDP'].resample('A', how='mean')).plot(title='FRED_FYGFD / FRED_GDP')

In [ ]:

plot = _data['FRED_CPIAUCSL'].resample('A', how='mean').plot(
    xticks=[str(x) for x in range(1949,2017,4)],
    x_compat=True,
    title="Yearly Inflation (CPI)"
    )
plot.legend(loc='upper left')


def add_line(plot, _year, text=None):
    _max = plot.yaxis.get_view_interval()[-1]
    plot.plot((_year,_year), (0, _max),
            color='gray', linewidth=1.5, linestyle="--")
    plot.annotate(
                text if text is not None else _year,
                xy=(_year, 0),
                xycoords='data',
                xytext=(+10, +30),
                textcoords='offset points',
                fontsize=12,
                #arrowprops=dict(arrowstyle="->"), #, connectionstyle=""), #arc3,rad=.2"),
                rotation='vertical',
                verticalalignment='bottom',
                horizontalalignment='center')

for year in range(1949, 2017, 4):
    add_line(plot, str(year))

display(plot)

In [ ]:

us_presidents_csv_url = 'https://commondatastorage.googleapis.com/ckannet-storage/2012-05-08T122246/USPresident-Wikipedia-URLs-Thmbs-HS.csv' 

In [ ]:

def get_presidents_df(data_file='./data/us_presidents.csv', data_url=us_presidents_csv_url):
    datadir = os.path.dirname(data_file)
    os.path.exists(datadir) or os.makedirs(datadir)
    !wget --continue --no-clobber $data_url -O $data_file
    df = presidents = pd.read_csv(data_file)
    
    df = presidents[['President ','Took office ','Left office ']]
    df['Took office '] = pd.to_datetime(presidents['Took office '])
    df['Left office '] = pd.to_datetime(presidents['Left office '], coerce=True)
    #display(df)
    df = df.set_index('Took office ', drop=False, verify_integrity=True)
    df['term'] = df['Left office '] - df['Took office ']
    
    col = df['term']
    val = col[0]
    df['term'] = (
        col.apply(
            lambda x: x.astype('datetime64'),
            convert_dtype=False))
    col = df['term']
    val = col[0]
    print(val)
    
    #val.item().days
    #df['terms'] = df['term'].apply(
    #    lambda x: (x.item().days if x.item() else 0)
    #    / float(365.25*4))
    return df

#df['terms'] = (df['term'] / np.timedelta64(1, 'D')) / float(365.25*4) # pandas 0.13

df = get_presidents_df()
display(df.head())

def presidents_by_year(df=None):
    if df is None:
        df = get_presidents_df()
    for year,name in df.ix[:,['President ']].to_records():
        print(year.year, name)

def add_presidents(plot, presidents=None, yearmin=0):
    if presidents is None:
        presidents = get_presidents_df()

    for year,name in presidents.ix[str(yearmin):,['President ']].to_records():
        #print year.year, name
        add_line(plot, year, name)

def poli_plot(df, **kwargs):
    yearmin = df.index.min().year
    yearmax = 2017
    
    plot = df.plot(
        xticks=[str(x) for x in range(yearmax, yearmin,-4)],
        x_compat=True,
        **kwargs)
    plot.legend(loc='upper left')
    
    add_presidents(plot, yearmin=yearmin)
    return plot

In [ ]:

df = _data['FRED_CPIAUCSL']
poli_plot(df)

In [ ]:

df = ((_data['FRED_GDP'].resample('A', how='mean')) / _data['FRED_USAPOPL'].resample('A', how='mean'))
poli_plot(df, title="GDP per capita (thousands of dollars)")

In [ ]:

df = ((_data['FRED_FYGFD'].resample('A', how='mean')) / _data['FRED_USAPOPL'].resample('A', how='mean'))
poli_plot(df, title="Federal debt per capita (thousands of dollars)")

In [ ]:

inflation_factor_linear = (_data['FRED_CPIAUCSL'] / _data['FRED_CPIAUCSL'].max()).resample('A', how='mean')
inflation_factor_uhh = 1 / inflation_factor_linear
cpi = inflation_factor_uhh
#display( cpi.head()) 
#display( cpi.tail())


df = ((_data['FRED_FYGFD'].resample('A', how='mean')) / _data['FRED_USAPOPL'].resample('A', how='mean'))
#print( df.columns )

scaled = (df * cpi)
#display(scaled.tail())
display(poli_plot(_data['FRED_CPIAUCSL'], title="inflation (FRED CPI UCSL)"))
plot = poli_plot(_data['FRED_CPIAUCSL'].resample('A','mean').pct_change(), title='yearly % change in inflation')
plot.axhline()

display(plot)


display(poli_plot(df, title="debt-per-capita"))
display(poli_plot(scaled, title="debt-per-capita scaled for inflation"))
plot = poli_plot(scaled.pct_change(), title="Yearly % change in debt-per-capita scaled for inflation")
plot.axhline()
display(plot)


#poli_plot(df)
#poli_plot(df * inflation_factor_linear)

In [ ]:

# TODO: add house/senate majority party
# TODO: add major wars

In [ ]:

!wget --continue --no-clobber https://github.com/unitedstates/congress-legislators/raw/master/legislators-historical.yaml -O ./data/legislators-historical.yaml
!wget --continue --no-clobber https://github.com/unitedstates/congress-legislators/raw/master/legislators-current.yaml -O ./data/legislators-current.yaml
#import yaml
#data = None
#with open('./data/legislators-historical.yaml','rb') as f:
#    data = yaml.load(f)
!ls ./data

In [ ]:

import yaml
def iter_members(
    data_files=['./data/legislators-historical.yaml',
                './data/legislators-current.yaml']):
    
    for data_file in data_files:
        data = None
        with open(data_file,'rb') as f:
            data = yaml.load(f)
        for m in data:
            for t in m['terms']:
                yield (
                    t['state'],
                    t['type'],
                    t['start'],
                    t['end'],
                    t.get('party'),
                    m['name']['first'],
                    m['name']['last'],
                    m.get('bio',{}).get('gender', 'M'), # ...
                    m.get('bio',{}).get('birthday')
                )
iter_members.columns = [
    'state',
    'type',
    'start',
    'end',
    'party',
    'first',
    'last',
    'gender',
    'birthday'
]

_legislator_data = list(iter_members())

In [ ]:

df = pd.DataFrame.from_records(
    _legislator_data,
    columns=iter_members.columns)
df['start'] = pd.to_datetime(df['start'])
df['end'] = pd.to_datetime(df['end'])
df['birthday'] = pd.to_datetime(df['birthday'])
df.set_index('start', drop=False, inplace=True)
display(df.head())
display(df)

In [ ]:

col = df['party']
uniques = dict.fromkeys(col.unique())
#print(uniques)

_party_map = {}
repub, democ, other = [], [], []
for x in uniques:
    if x is not None:
        if 'Republ' in x:
            repub.append(x)
            _party_map[x] = 'Republican'
        elif 'Democr' in x:
            democ.append(x)
            _party_map[x] = 'Democrat'
        else:
            other.append(x)
            _party_map[x] = 'Other' # ...
    else:
        other.append(x)
        _party_map[x] = 'Other' # ...
        

print(len(repub), repub)
print(len(democ), democ)
print(len(other), other)

In [ ]:

display(df['1949':]['party'].value_counts())
display(df[df['party']=='Liberal'])

In [ ]:

# print(df2[df2['state'] == 'NE'][['type', 'party','first','last']].sort().to_string())

In [ ]:

print('# party')
print(df[df['state'] == 'NE']['party'].value_counts(normalize=True))
print('# type')
print(df[df['state'] == 'NE']['type'].value_counts())

In [ ]:

df['two_party_fail'] = df['party'].apply(lambda x: _party_map.get(x))
display( df['two_party_fail'].value_counts() )
display( df['two_party_fail'].value_counts(normalize=True) )

In [ ]:

# objective: draw chart with per-year, per-two-party-counts
# group by year
# count factors

def start_year(x):
    return x.year - (x.year % 2)

grouper = df.groupby([start_year, 'two_party_fail'])

whoa = grouper.aggregate({'two_party_fail':len}).unstack()

display(whoa.plot())
display(whoa.head())
display(whoa.tail())

#df.pivot_table(values='two_party_fail', cols=['start'], aggfunc=len)

In [ ]:

grouper = df.groupby([start_year, 'gender'])

whoa = grouper.aggregate({'gender':len}).unstack()
display(whoa.plot())

In [2]:

# Q. how are these misleading / maybe not as helpful as they could be?
# 1. they count by start year, so they don't show the state at any given time
#    to show the state at any given time would require
#    a 'currently_serving' function
#    which, one might think could take into account standard terms/elections
#    as appropriate for rep/sen,
#    but there are special cases in mid-stream

# 2. they do not stratify by rep/sen; the counts are lumped together
#    'share_y' split by 'type' might be helpful

# ... how many hours would it take to draw these in [spreadsheet tool]
#     only to realize that you have no idea what 
#     'settings' were used to create a (very beautiful) chart?
#     ... python tools for visual studio now support 
#         something like `ipython --pylab=inline/qt`
#         ... i work on various platforms, so that's not an option for me
#         ... not sure what sort of configuration is required to get
#             anaconda ce working with this ide
#     ... ipython qt, ipython notebook
#     ... spyder ide
#     ... you can run these as scheduled jobs which generate online charts,
#         but then, still, without the source,
#         what smoke are you
# ... "you can get a good look at a t-bone steak by"