import pandas as pd
import numpy as np
pj = '/Users/danielmsheehan/Desktop/data/'
pj = '/Users/danielmsheehan/Dropbox/data/'
pj = '/Volumes/Hotel/Dropbox/data/'
df = pd.read_csv(pj+'output/all/taxi_2013_describe.csv', dtype={'geoid':object}, header=None)
df.columns = ['geoid', 'stat','value']
df.head(50)
df_avg = df[(df.stat == 'mean')]
df_med = df[(df.stat == '50%')]
df_std = df[(df.stat == 'std')]
df_cnt = df[(df.stat == 'count')]
df_avg.columns = ['geoid', 'stat','avgbrdist']
df_med.columns = ['geoid', 'stat','medbrdist']
df_std.columns = ['geoid', 'stat','stdbrdist']
df_cnt.columns = ['geoid', 'stat','count']
df_avg = df_avg[['geoid','avgbrdist']]
df_med = df_med[['geoid','medbrdist']]
df_std = df_std[['geoid','stdbrdist']]
df_cnt['count'] = df_cnt['count'].astype(int)
df_cnt = df_cnt[['geoid','count']]
df_std = df_std.fillna(0)
df_std = df_std.replace(np.inf, 0)
df_all = df_avg.merge(df_med, on='geoid', how='left').merge(df_std, on='geoid', how='left').merge(df_cnt, on='geoid', how='left')
df_all.to_csv(pj+'output/all/taxi_2013_describe_stats.csv', index=False)
df_avg.to_csv(pj+'output/all/taxi_2013_describe_avg.csv', index=False)
df_med.to_csv(pj+'output/all/taxi_2013_describe_med.csv', index=False)
df_std.to_csv(pj+'output/all/taxi_2013_describe_std.csv', index=False)
df_cnt.to_csv(pj+'output/all/taxi_2013_describe_cnt.csv', index=False)