import pandas as pd import numpy as np pd.options.display.max_columns = 5200 pd.options.display.max_rows = 5200 pj = '/Users/danielmsheehan/Desktop/data/' pj = '/Users/danielmsheehan/Dropbox/data/' pj = '/Volumes/Hotel/Dropbox/data/' df = pd.read_csv(pj+'output/all/taxi_2013.csv', dtype={'geoid':object}) dfg = df.groupby('geoid') dfg_desc = dfg['dist_roadbed'].describe() dfg_desc.to_csv(pj+'output/all/taxi_2013_describe.csv') import pandas as pd import numpy as np pj = '/Users/danielmsheehan/Desktop/data/' pj = '/Users/danielmsheehan/Dropbox/data/' pj = '/Volumes/Hotel/Dropbox/data/' df = pd.read_csv(pj+'output/all/taxi_2013_describe.csv', dtype={'geoid':object}, header=None) df.columns = ['geoid', 'stat','value'] df.head(50) df_avg = df[(df.stat == 'mean')] df_med = df[(df.stat == '50%')] df_std = df[(df.stat == 'std')] df_cnt = df[(df.stat == 'count')] df_avg.columns = ['geoid', 'stat','avgbrdist'] df_med.columns = ['geoid', 'stat','medbrdist'] df_std.columns = ['geoid', 'stat','stdbrdist'] df_cnt.columns = ['geoid', 'stat','count'] df_avg = df_avg[['geoid','avgbrdist']] df_med = df_med[['geoid','medbrdist']] df_std = df_std[['geoid','stdbrdist']] df_cnt['count'] = df_cnt['count'].astype(int) df_cnt = df_cnt[['geoid','count']] df_std = df_std.fillna(0) df_std = df_std.replace(np.inf, 0) df_all = df_avg.merge(df_med, on='geoid', how='left').merge(df_std, on='geoid', how='left').merge(df_cnt, on='geoid', how='left') df_all.to_csv(pj+'output/all/taxi_2013_describe_stats.csv', index=False) df_avg.to_csv(pj+'output/all/taxi_2013_describe_avg.csv', index=False) df_med.to_csv(pj+'output/all/taxi_2013_describe_med.csv', index=False) df_std.to_csv(pj+'output/all/taxi_2013_describe_std.csv', index=False) df_cnt.to_csv(pj+'output/all/taxi_2013_describe_cnt.csv', index=False) df_cnt.sort(['count'], ascending=False) df_cnt.sum() %reset