import pandas as pd df = pd.read_csv('police_inct/police_inct.csv') df # Columns of our data frame df.columns # First entry df.ix[0] crime = df[['DC_DIST', 'DISPATCH_DATE_TIME', 'LOCATION_BLOCK', 'UCR_GENERAL', 'OBJECTID', 'TEXT_GENERAL_CODE']] crime # Count the different types of crime crime.TEXT_GENERAL_CODE.value_counts() # Homicide - Criminal is listed twice because of a trailing space # Clean leading and trailing whitespace. crime.TEXT_GENERAL_CODE = crime.TEXT_GENERAL_CODE.map(lambda x: x.strip()) crime.TEXT_GENERAL_CODE.value_counts() # Create a new DataFrame for the 22nd district dist_22 = crime[crime.DC_DIST == 22] # Look at the number of distinct crimes in District 22 dist_22['TEXT_GENERAL_CODE'].value_counts() # Create a cross tabulation of crime type across all districts crime_counts = pd.crosstab(crime.DC_DIST, crime.TEXT_GENERAL_CODE) crime_counts # Normalize types of crime for each district crime_pct = crime_counts.div(crime_counts.sum(1).astype(float), axis=0) # Sort by thefts column. This creates a view and does not change the original DataFrame crime_pct.sort('Thefts') # Plot normalize crime vs district colors=['r', 'g', 'b', 'c', 'y', 'w', 'm', 'k', 'burlywood','navy', 'teal', 'LightSteelBlue', 'Honeydew', 'Goldenrod'] p = crime_pct.plot(kind='bar', stacked=True, color=colors) p.legend(loc=0, bbox_to_anchor=(1,1)) def code_rename(code): """" Lazy consolidattion of crime codes""" lower_code = code.lower() new_codes = ['Assault', 'Burglary', 'Homicide', 'Vehicle', 'Robbery', 'Theft'] for new_code in new_codes: if new_code.lower() in lower_code: return new_code return code # Consolidate crimes crime.TEXT_GENERAL_CODE = crime.TEXT_GENERAL_CODE.map(code_rename) crime.TEXT_GENERAL_CODE.unique() simple_crime_count = pd.crosstab(crime.DC_DIST, crime.TEXT_GENERAL_CODE) # Normalize crime types crime_pct = simple_crime_count.div(simple_crime_count.sum(1).astype(float), axis=0) crime_pct plot = crime_pct.plot(kind='bar', stacked=True, color=['r', 'g', 'b', 'c', 'y', 'k', 'm', 'w']) plot.legend(loc=0, bbox_to_anchor=(1,1)) df.TEXT_GENERAL_CODE = df.TEXT_GENERAL_CODE.map(code_rename) ct_date = pd.crosstab(df['DISPATCH_DATE'],df['TEXT_GENERAL_CODE']) ct_date # Set index as a DateTime format ct_date.index = pd.to_datetime(ct_date.index) # Date filtering ct_date.ix['2012-01'].sum() year_2012 = ct_date.ix['2012'] # Resample data to monthly periods year_2012.resample('M', how='mean', kind='period').plot() legend(loc=0, bbox_to_anchor=(1,1)) # Plot of all years available ct_date.resample('M', how='mean', kind='period').plot() legend(loc=0, bbox_to_anchor=(1,1))