import pandas as pd

countries = ['US', 'CA', 'GB', 'FR', 'CN', 'RU', 'DE']

def makeDataFrame(country):
    baseurl = 'http://s3.amazonaws.com/econpy/hhi'
    url = '%s/search_engine-%s-monthly-200807-201304.csv' % (baseurl, country)
    dframe = pd.read_csv(url, index_col=0)
    dframe.index = pd.DatetimeIndex(dframe.index)
    return dframe

us_df = makeDataFrame('US')
us_df.plot(title='All companies in the US')
pylab.show()

us_df['Google'].plot(color='g', title='Google')
pylab.show()

us_df['bing'].plot(color='b')
us_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)')
pylab.show()

us_df['AOL'].plot(color='k')
us_df['Ask Jeeves'].plot(color='r', title='AOL (Black) and Ask Jeeves (Red)')
pylab.show()

china_df = makeDataFrame('CN')
china_df.plot(title='All companies in China')
pylab.show()

china_df['Google'].plot(color='g')
china_df['Baidu'].plot(color='b')
china_df['360 Search'].plot(color='r', title='Google (Green), Baidu (Blue) and 360 Search (Red)')
pylab.show()

china_df['bing'].plot(color='b')
china_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)')
pylab.show()

uk_df = makeDataFrame('GB')
uk_df.plot(title='All companies in the UK')
pylab.show()

uk_df['Google'].plot(color='g', title='Google')
pylab.show()

uk_df['bing'].plot(color='b')
uk_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)')
pylab.show()

uk_df['Ask Jeeves'].plot(color='k')
uk_df['AOL'].plot(color='r', title='Ask Jeeves (Black) and AOL (Red)')
pylab.show()

germany_df = makeDataFrame('DE')
germany_df.plot(title='All companies in Germany')
pylab.show()

germany_df['Google'].plot(color='g', title='Google')
pylab.show()

germany_df['bing'].plot(color='b')
germany_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)')
pylab.show()

germany_df['WEB.DE'].plot(color='k')
germany_df['Ask Jeeves'].plot(color='r', title='web.de (Black) and Ask Jeeves (Red)')
pylab.show()

canada_df = makeDataFrame('CA')
canada_df.plot(title='All companies in Canada')
pylab.show()

canada_df['Google'].plot(color='g', title='Google')
pylab.show()

canada_df['bing'].plot(color='b')
canada_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)')
pylab.show()

russia_df = makeDataFrame('RU')
russia_df.plot(title='All companies in Russia')
pylab.show()

russia_df['YANDEX RU'].plot(color='r', title='Yandex')
pylab.show()

russia_df['Google'].plot(color='g', title='Google')
pylab.show()

russia_df['bing'].plot(color='b')
russia_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)')
pylab.show()

france_df = makeDataFrame('FR')
france_df.plot(title='All companies in France')
pylab.show()

france_df['Google'].plot(color='g', title='Google')
pylab.show()

france_df['bing'].plot(color='b')
france_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)')
pylab.show()

france_df['Voila'].plot(color='k')
france_df['Ask Jeeves'].plot(color='r', title='Viola (Black) and Ask Jeeves (Red)')
pylab.show()

panel = pd.Panel({country: makeDataFrame(country) for country in countries})

panel['US']['Google'].plot(color='b')
panel['GB']['Google'].plot(color='g')
panel['FR']['Google'].plot(color='y')
panel['DE']['Google'].plot(color='r')

panel['CN']['bing'].plot(color='r')
panel['US']['bing'].plot(color='b')
panel['GB']['bing'].plot(color='g')
panel['FR']['bing'].plot(color='y')
panel['DE']['bing'].plot(color='k')

panel['CN']['Yahoo!'].plot(color='r')
panel['US']['Yahoo!'].plot(color='b')
panel['GB']['Yahoo!'].plot(color='g')
panel['FR']['Yahoo!'].plot(color='y')
panel['DE']['Yahoo!'].plot(color='k')

def get_hhi(dframe, drop_other=True):
    # If true, drop the 'Other' group from the data.
    # Dropping the 'Other' group is a good idea when calculating the HHI.
    if drop_other == True:
        dframe.pop('Other')
        
    HHI_VALS = []
    for idx in dframe.iterrows():
        shares = [s for s in idx[1] if s > 0]
        sqr_shares = [s*s for s in shares]
        hhi_val = sum(sqr_shares)
        HHI_VALS.append({'month': idx[0], 'hhi': hhi_val})
    dframeHHI = pd.DataFrame(HHI_VALS)
    dframeHHI.index = pd.DatetimeIndex(dframeHHI.pop('month'))

    return dframeHHI['hhi']

us_df = makeDataFrame('US')
us_hhi_t = get_hhi(us_df, drop_other=False)
us_hhi_f = get_hhi(us_df, drop_other=True)
us_hhi_f.plot(color='b')
us_hhi_t.plot(color='r')

for searchengine in ['Google', 'bing', 'Yahoo!']:
    print '%s  mean  var  \n%s' % (searchengine, '='*18)
    for country in ['US', 'GB', 'CA', 'DE', 'FR', 'CN', 'RU']:
        tmpdf = panel[country][searchengine]
        print '%s:  %.3f  %.3f' % (country, tmpdf.mean(), tmpdf.var())
    print '\n'

hhi_df = pd.DataFrame({country: get_hhi(makeDataFrame(country)) for country in countries})
hhi_df.plot()