import pandas as pd pj = '/Users/danielmsheehan/Desktop/data/' pj = '/Users/danielmsheehan/Dropbox/data/' df = pd.read_csv(pj+'output/all/taxi_2013.csv', dtype={'tuid':object,'geoid':object}) print df.dtypes df = df[(df.tuid.str[4:6] == '07')] df.to_csv(pj+'output/all/taxi_2013_m07.csv', index=False) df %reset import pandas as pd pd.options.display.max_columns = 5200 pd.options.display.max_rows = 5200 pj = '/Users/danielmsheehan/Desktop/data/' pj = '/Users/danielmsheehan/Dropbox/data/' pj = '/Volumes/Hotel/Dropbox/data/' df = pd.read_csv(pj+'output/all/taxi_2013_m07.csv', dtype={'tuid':object,'geoid':object}) df['count'] = 1 dfg = df.groupby(['tuid','type']).sum() dfg.to_csv(pj+'output/all/taxi_2013_m07_groupby.csv', index=True) df = pd.read_csv(pj+'output/all/taxi_2013_m07_groupby.csv') df.dtypes df1 = df[(df['count'] < 1)] df1 df2 = pd.read_csv(pj+'output/all/taxi_2013_m07.csv', dtype={'tuid':object,'geoid':object}) df3 = df2[(df2.tuid == '20130723462939')] df3