import pandas as pd import numpy as np heads = ["borough","location","creation_date","total_loans","year_loans","status","type","cote","title","variant","author","collaboration","publisher","place","country","year","num_pages","language","collection","ISBN","img_url"] data = pd.read_csv('donnees_ouverte.csv', encoding='utf-8', header=None, names=heads) data[:4] type_counts = data.groupby(['type']) type_counts.aggregate(np.sum).sort('total_loans', ascending=False) len(data['title'].unique()) data['borough'].value_counts() title_counts = data.groupby(['title']) title_counts.aggregate(np.sum).sort('total_loans', ascending=False).head(10) eng_fiction = data[(data.type == 'LV_Fiction A' ) & (data.language == 'eng')] eng_fiction.ix[eng_fiction['total_loans'].idxmax()] eng_counts = eng_fiction.groupby(['title', 'variant']) eng_counts.aggregate(np.sum).sort('total_loans', ascending=False) fre_fiction = data[(data.type == 'LV_Fiction A' ) & (data.language == 'fre')] fre_counts = fre_fiction.groupby(['title', 'variant']) fre_counts.aggregate(np.sum).sort('total_loans', ascending=False) fre_non_fiction = data[(data.type == 'LV_Documentaire A' ) & (data.language == 'fre')] fre_nf_counts = fre_non_fiction.groupby(['title']) fre_nf_counts.aggregate(np.sum).sort('total_loans', ascending=False) eng_non_fiction = data[(data.type == 'LV_Documentaire A' ) & (data.language == 'eng')] eng_nf_counts = eng_non_fiction.groupby(['title']) eng_nf_counts.aggregate(np.sum).sort('total_loans', ascending=False) top_boroughs = data.groupby(['borough','title']) df2 = top_boroughs.aggregate(sum).reset_index().sort(['borough','total_loans'],ascending=[True,False]).set_index(['borough']) df3 = pd.DataFrame() for indx in df2.index.unique(): df3 = pd.concat([df3, df2.ix[indx].iloc[0:3]]) df3 top_eng = eng_fiction.groupby(['borough','title']) df2 = top_eng.aggregate(sum).reset_index().sort(['borough','total_loans'],ascending=[True,False]).set_index(['borough']) df3 = pd.DataFrame() for indx in df2.index.unique(): df3 = pd.concat([df3, df2.ix[indx].iloc[0:3]]) df3 top_fre = fre_fiction.groupby(['borough','title']) df2 = top_fre.aggregate(sum).reset_index().sort(['borough','total_loans'],ascending=[True,False]).set_index(['borough']) df3 = pd.DataFrame() for indx in df2.index.unique(): df3 = pd.concat([df3, df2.ix[indx].iloc[0:3]]) df3 non_fic = data[data.type == 'LV_Documentaire A' ] top_nf = non_fic.groupby(['borough','title']) df2 = top_nf.aggregate(sum).reset_index().sort(['borough','total_loans'],ascending=[True,False]).set_index(['borough']) df3 = pd.DataFrame() for indx in df2.index.unique(): df3 = pd.concat([df3, df2.ix[indx].iloc[0:3]]) df3