import pandas as pd import numpy as np pd.options.display.max_columns = 5200 pd.options.display.max_rows = 5200 pj = '/Users/danielmsheehan/Desktop/data/' pj = '/Users/danielmsheehan/Dropbox/data/' pj = '/Volumes/Hotel/Dropbox/data/' df = pd.read_csv(pj+'processing/buildings/cb2010_building_0913_int.csv', dtype={'geoid':object}).rename(columns=lambda x: x.lower()) df dfg = df.groupby('geoid') dfg.head(20) dfg_desc = dfg['bin'].describe() dfg_desc.head(20) dfg_desc.to_csv(pj+'processing/buildings/cb2010_building_0913_int_describe.csv') df = pd.read_csv(pj+'processing/buildings/cb2010_building_0913_int_describe.csv', dtype={'geoid':object}, header=None) df.columns = ['geoid', 'stat','value'] df.head(50) df_cnt = df[(df.stat == 'count')] df_cnt.columns = ['geoid', 'stat','count'] df_cnt['countbldg'] = df_cnt['count'].astype(int) df_cnt = df_cnt[['geoid','countbldg']] df_cnt.to_csv(pj+'processing/buildings/cb2010_building_0913_int_describe_cnt.csv', index=False) df_cnt.head(50)