import pandas as pd countries = ['US', 'CA', 'GB', 'FR', 'CN', 'RU', 'DE'] def makeDataFrame(country): baseurl = 'http://s3.amazonaws.com/econpy/hhi' url = '%s/search_engine-%s-monthly-200807-201304.csv' % (baseurl, country) dframe = pd.read_csv(url, index_col=0) dframe.index = pd.DatetimeIndex(dframe.index) return dframe us_df = makeDataFrame('US') us_df.plot(title='All companies in the US') pylab.show() us_df['Google'].plot(color='g', title='Google') pylab.show() us_df['bing'].plot(color='b') us_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)') pylab.show() us_df['AOL'].plot(color='k') us_df['Ask Jeeves'].plot(color='r', title='AOL (Black) and Ask Jeeves (Red)') pylab.show() china_df = makeDataFrame('CN') china_df.plot(title='All companies in China') pylab.show() china_df['Google'].plot(color='g') china_df['Baidu'].plot(color='b') china_df['360 Search'].plot(color='r', title='Google (Green), Baidu (Blue) and 360 Search (Red)') pylab.show() china_df['bing'].plot(color='b') china_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)') pylab.show() uk_df = makeDataFrame('GB') uk_df.plot(title='All companies in the UK') pylab.show() uk_df['Google'].plot(color='g', title='Google') pylab.show() uk_df['bing'].plot(color='b') uk_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)') pylab.show() uk_df['Ask Jeeves'].plot(color='k') uk_df['AOL'].plot(color='r', title='Ask Jeeves (Black) and AOL (Red)') pylab.show() germany_df = makeDataFrame('DE') germany_df.plot(title='All companies in Germany') pylab.show() germany_df['Google'].plot(color='g', title='Google') pylab.show() germany_df['bing'].plot(color='b') germany_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)') pylab.show() germany_df['WEB.DE'].plot(color='k') germany_df['Ask Jeeves'].plot(color='r', title='web.de (Black) and Ask Jeeves (Red)') pylab.show() canada_df = makeDataFrame('CA') canada_df.plot(title='All companies in Canada') pylab.show() canada_df['Google'].plot(color='g', title='Google') pylab.show() canada_df['bing'].plot(color='b') canada_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)') pylab.show() russia_df = makeDataFrame('RU') russia_df.plot(title='All companies in Russia') pylab.show() russia_df['YANDEX RU'].plot(color='r', title='Yandex') pylab.show() russia_df['Google'].plot(color='g', title='Google') pylab.show() russia_df['bing'].plot(color='b') russia_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)') pylab.show() france_df = makeDataFrame('FR') france_df.plot(title='All companies in France') pylab.show() france_df['Google'].plot(color='g', title='Google') pylab.show() france_df['bing'].plot(color='b') france_df['Yahoo!'].plot(color='y', title='Bing (Blue) and Yahoo! (Yellow)') pylab.show() france_df['Voila'].plot(color='k') france_df['Ask Jeeves'].plot(color='r', title='Viola (Black) and Ask Jeeves (Red)') pylab.show() panel = pd.Panel({country: makeDataFrame(country) for country in countries}) panel['US']['Google'].plot(color='b') panel['GB']['Google'].plot(color='g') panel['FR']['Google'].plot(color='y') panel['DE']['Google'].plot(color='r') panel['CN']['bing'].plot(color='r') panel['US']['bing'].plot(color='b') panel['GB']['bing'].plot(color='g') panel['FR']['bing'].plot(color='y') panel['DE']['bing'].plot(color='k') panel['CN']['Yahoo!'].plot(color='r') panel['US']['Yahoo!'].plot(color='b') panel['GB']['Yahoo!'].plot(color='g') panel['FR']['Yahoo!'].plot(color='y') panel['DE']['Yahoo!'].plot(color='k') def get_hhi(dframe, drop_other=True): # If true, drop the 'Other' group from the data. # Dropping the 'Other' group is a good idea when calculating the HHI. if drop_other == True: dframe.pop('Other') HHI_VALS = [] for idx in dframe.iterrows(): shares = [s for s in idx[1] if s > 0] sqr_shares = [s*s for s in shares] hhi_val = sum(sqr_shares) HHI_VALS.append({'month': idx[0], 'hhi': hhi_val}) dframeHHI = pd.DataFrame(HHI_VALS) dframeHHI.index = pd.DatetimeIndex(dframeHHI.pop('month')) return dframeHHI['hhi'] us_df = makeDataFrame('US') us_hhi_t = get_hhi(us_df, drop_other=False) us_hhi_f = get_hhi(us_df, drop_other=True) us_hhi_f.plot(color='b') us_hhi_t.plot(color='r') for searchengine in ['Google', 'bing', 'Yahoo!']: print '%s mean var \n%s' % (searchengine, '='*18) for country in ['US', 'GB', 'CA', 'DE', 'FR', 'CN', 'RU']: tmpdf = panel[country][searchengine] print '%s: %.3f %.3f' % (country, tmpdf.mean(), tmpdf.var()) print '\n' hhi_df = pd.DataFrame({country: get_hhi(makeDataFrame(country)) for country in countries}) hhi_df.plot()