import pandas as pd
df = pd.read_csv('police_inct/police_inct.csv')
df
<class 'pandas.core.frame.DataFrame'> Int64Index: 610868 entries, 0 to 610867 Data columns: DC_DIST 610868 non-null values SECTOR 610868 non-null values DISPATCH_DATE_TIME 610868 non-null values DISPATCH_DATE 610868 non-null values DISPATCH_TIME 610868 non-null values HOUR 610868 non-null values DC_KEY 610868 non-null values LOCATION_BLOCK 610868 non-null values UCR_GENERAL 610868 non-null values OBJECTID 610868 non-null values TEXT_GENERAL_CODE 610868 non-null values SHAPE 607127 non-null values POINT_X 605731 non-null values POINT_Y 605731 non-null values dtypes: float64(3), int64(4), object(7)
# Columns of our data frame
df.columns
Index([DC_DIST, SECTOR, DISPATCH_DATE_TIME, DISPATCH_DATE, DISPATCH_TIME, HOUR, DC_KEY, LOCATION_BLOCK, UCR_GENERAL, OBJECTID, TEXT_GENERAL_CODE, SHAPE, POINT_X, POINT_Y], dtype=object)
# First entry
df.ix[0]
DC_DIST 14 SECTOR N DISPATCH_DATE_TIME 2010-05-05 11:34:00 DISPATCH_DATE 2010-05-05 DISPATCH_TIME 11:34:00 HOUR 11 DC_KEY 199814043321 LOCATION_BLOCK 300 BLOCK E CLIVEDEN ST UCR_GENERAL 300 OBJECTID 602550 TEXT_GENERAL_CODE Robbery No Firearm SHAPE 602550 POINT_X -75.177 POINT_Y 40.05289 Name: 0
crime = df[['DC_DIST', 'DISPATCH_DATE_TIME', 'LOCATION_BLOCK', 'UCR_GENERAL', 'OBJECTID', 'TEXT_GENERAL_CODE']]
crime
<class 'pandas.core.frame.DataFrame'> Int64Index: 610868 entries, 0 to 610867 Data columns: DC_DIST 610868 non-null values DISPATCH_DATE_TIME 610868 non-null values LOCATION_BLOCK 610868 non-null values UCR_GENERAL 610868 non-null values OBJECTID 610868 non-null values TEXT_GENERAL_CODE 610868 non-null values dtypes: int64(3), object(3)
# Count the different types of crime
crime.TEXT_GENERAL_CODE.value_counts()
Thefts 181307 Theft from Vehicle 100473 Burglary Residential 65700 Recovered Stolen Motor Vehicle 60374 Aggravated Assault No Firearm 46417 Motor Vehicle Theft 45302 Robbery No Firearm 35877 Robbery Firearm 29047 Aggravated Assault Firearm 19478 Burglary Non-Residential 16652 Rape 7076 Homicide - Criminal 1932 Homicide - Gross Negligence 737 Homicide - Criminal 364 Homicide - Justifiable 132
# Homicide - Criminal is listed twice because of a trailing space
# Clean leading and trailing whitespace.
crime.TEXT_GENERAL_CODE = crime.TEXT_GENERAL_CODE.map(lambda x: x.strip())
crime.TEXT_GENERAL_CODE.value_counts()
Thefts 181307 Theft from Vehicle 100473 Burglary Residential 65700 Recovered Stolen Motor Vehicle 60374 Aggravated Assault No Firearm 46417 Motor Vehicle Theft 45302 Robbery No Firearm 35877 Robbery Firearm 29047 Aggravated Assault Firearm 19478 Burglary Non-Residential 16652 Rape 7076 Homicide - Criminal 2296 Homicide - Gross Negligence 737 Homicide - Justifiable 132
# Create a new DataFrame for the 22nd district
dist_22 = crime[crime.DC_DIST == 22]
# Look at the number of distinct crimes in District 22
dist_22['TEXT_GENERAL_CODE'].value_counts()
Thefts 8072 Theft from Vehicle 3851 Burglary Residential 3674 Aggravated Assault No Firearm 2954 Recovered Stolen Motor Vehicle 2503 Robbery No Firearm 1934 Robbery Firearm 1889 Aggravated Assault Firearm 1646 Motor Vehicle Theft 1626 Burglary Non-Residential 705 Rape 468 Homicide - Criminal 225 Homicide - Gross Negligence 23 Homicide - Justifiable 6
# Create a cross tabulation of crime type across all districts
crime_counts = pd.crosstab(crime.DC_DIST, crime.TEXT_GENERAL_CODE)
crime_counts
TEXT_GENERAL_CODE | Aggravated Assault Firearm | Aggravated Assault No Firearm | Burglary Non-Residential | Burglary Residential | Homicide - Criminal | Homicide - Gross Negligence | Homicide - Justifiable | Motor Vehicle Theft | Rape | Recovered Stolen Motor Vehicle | Robbery Firearm | Robbery No Firearm | Theft from Vehicle | Thefts |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
DC_DIST | ||||||||||||||
1 | 538 | 1004 | 302 | 1254 | 51 | 11 | 1 | 887 | 98 | 1002 | 475 | 864 | 1843 | 4471 |
2 | 626 | 1902 | 907 | 4289 | 60 | 50 | 7 | 3063 | 282 | 3058 | 1719 | 1930 | 4744 | 10456 |
3 | 302 | 1274 | 478 | 1679 | 47 | 20 | 2 | 1677 | 111 | 1359 | 745 | 1434 | 4810 | 7952 |
4 | 276 | 871 | 244 | 801 | 34 | 17 | 2 | 655 | 88 | 1217 | 541 | 954 | 2616 | 4449 |
5 | 73 | 459 | 301 | 1278 | 4 | 12 | 1 | 622 | 50 | 353 | 194 | 271 | 2413 | 2880 |
6 | 282 | 1228 | 850 | 1141 | 39 | 24 | 2 | 1437 | 158 | 1137 | 684 | 1560 | 7514 | 15635 |
7 | 129 | 643 | 706 | 2166 | 18 | 43 | 2 | 882 | 91 | 919 | 272 | 543 | 3495 | 4053 |
8 | 224 | 1483 | 870 | 2212 | 25 | 51 | 0 | 1496 | 156 | 1535 | 498 | 843 | 5271 | 8911 |
9 | 178 | 733 | 714 | 1627 | 18 | 17 | 3 | 936 | 144 | 1072 | 729 | 1352 | 6960 | 14593 |
12 | 1660 | 3185 | 752 | 4315 | 200 | 58 | 8 | 3111 | 514 | 4702 | 2010 | 1742 | 4542 | 7001 |
14 | 1179 | 2707 | 811 | 5430 | 140 | 24 | 8 | 2352 | 441 | 3823 | 1810 | 1878 | 5331 | 8349 |
15 | 1429 | 4749 | 1519 | 5783 | 124 | 77 | 10 | 4458 | 609 | 5808 | 2663 | 3664 | 7805 | 13447 |
16 | 738 | 1838 | 398 | 2062 | 114 | 20 | 4 | 1057 | 313 | 1616 | 858 | 1057 | 2471 | 4526 |
17 | 915 | 1738 | 284 | 3056 | 107 | 3 | 6 | 1179 | 219 | 1495 | 853 | 1173 | 2963 | 4575 |
18 | 995 | 2384 | 688 | 3251 | 107 | 25 | 10 | 1722 | 360 | 2135 | 1834 | 2124 | 5079 | 11113 |
19 | 1468 | 3065 | 764 | 4109 | 154 | 26 | 9 | 2160 | 459 | 2755 | 1768 | 1597 | 3763 | 6821 |
22 | 1646 | 2954 | 705 | 3674 | 225 | 23 | 6 | 1626 | 468 | 2503 | 1889 | 1934 | 3851 | 8072 |
23 | 348 | 813 | 249 | 867 | 58 | 9 | 2 | 441 | 137 | 980 | 502 | 599 | 1566 | 2309 |
24 | 1267 | 2907 | 1469 | 4101 | 132 | 49 | 10 | 3601 | 542 | 4806 | 1742 | 2539 | 4655 | 10794 |
25 | 1833 | 3286 | 1273 | 3055 | 214 | 55 | 14 | 3906 | 567 | 7249 | 2375 | 2501 | 4025 | 8504 |
26 | 640 | 1747 | 835 | 2631 | 95 | 35 | 4 | 2290 | 281 | 3112 | 1013 | 1387 | 5751 | 6268 |
35 | 1497 | 3048 | 699 | 3651 | 160 | 34 | 10 | 3428 | 568 | 4641 | 2432 | 2482 | 4058 | 7825 |
39 | 1223 | 2365 | 770 | 3259 | 165 | 41 | 11 | 2193 | 403 | 2715 | 1424 | 1401 | 3842 | 5974 |
77 | 1 | 16 | 17 | 1 | 0 | 2 | 0 | 87 | 5 | 244 | 2 | 6 | 279 | 2221 |
92 | 11 | 18 | 47 | 8 | 5 | 11 | 0 | 36 | 12 | 138 | 15 | 42 | 826 | 108 |
# Normalize types of crime for each district
crime_pct = crime_counts.div(crime_counts.sum(1).astype(float), axis=0)
# Sort by thefts column. This creates a view and does not change the original DataFrame
crime_pct.sort('Thefts')
TEXT_GENERAL_CODE | Aggravated Assault Firearm | Aggravated Assault No Firearm | Burglary Non-Residential | Burglary Residential | Homicide - Criminal | Homicide - Gross Negligence | Homicide - Justifiable | Motor Vehicle Theft | Rape | Recovered Stolen Motor Vehicle | Robbery Firearm | Robbery No Firearm | Theft from Vehicle | Thefts |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
DC_DIST | ||||||||||||||
92 | 0.008614 | 0.014096 | 0.036805 | 0.006265 | 0.003915 | 0.008614 | 0.000000 | 0.028191 | 0.009397 | 0.108066 | 0.011746 | 0.032890 | 0.646829 | 0.084573 |
12 | 0.049112 | 0.094231 | 0.022249 | 0.127663 | 0.005917 | 0.001716 | 0.000237 | 0.092041 | 0.015207 | 0.139112 | 0.059467 | 0.051538 | 0.134379 | 0.207130 |
25 | 0.047173 | 0.084566 | 0.032761 | 0.078622 | 0.005507 | 0.001415 | 0.000360 | 0.100522 | 0.014592 | 0.186556 | 0.061122 | 0.064364 | 0.103585 | 0.218854 |
35 | 0.043350 | 0.088263 | 0.020242 | 0.105725 | 0.004633 | 0.000985 | 0.000290 | 0.099267 | 0.016448 | 0.134393 | 0.070425 | 0.071873 | 0.117511 | 0.226595 |
39 | 0.047429 | 0.091716 | 0.029861 | 0.126386 | 0.006399 | 0.001590 | 0.000427 | 0.085046 | 0.015629 | 0.105290 | 0.055224 | 0.054332 | 0.148996 | 0.231676 |
19 | 0.050764 | 0.105989 | 0.026420 | 0.142091 | 0.005325 | 0.000899 | 0.000311 | 0.074694 | 0.015872 | 0.095269 | 0.061138 | 0.055225 | 0.130127 | 0.235874 |
26 | 0.024531 | 0.066963 | 0.032006 | 0.100847 | 0.003641 | 0.001342 | 0.000153 | 0.087776 | 0.010771 | 0.119284 | 0.038829 | 0.053164 | 0.220438 | 0.240255 |
14 | 0.034390 | 0.078960 | 0.023656 | 0.158388 | 0.004084 | 0.000700 | 0.000233 | 0.068605 | 0.012864 | 0.111513 | 0.052796 | 0.054779 | 0.155500 | 0.243532 |
17 | 0.049284 | 0.093612 | 0.015297 | 0.164602 | 0.005763 | 0.000162 | 0.000323 | 0.063503 | 0.011796 | 0.080524 | 0.045944 | 0.063180 | 0.159593 | 0.246418 |
15 | 0.027404 | 0.091073 | 0.029130 | 0.110902 | 0.002378 | 0.001477 | 0.000192 | 0.085492 | 0.011679 | 0.111382 | 0.051069 | 0.070266 | 0.149679 | 0.257877 |
23 | 0.039189 | 0.091554 | 0.028041 | 0.097635 | 0.006532 | 0.001014 | 0.000225 | 0.049662 | 0.015428 | 0.110360 | 0.056532 | 0.067455 | 0.176351 | 0.260023 |
16 | 0.043229 | 0.107662 | 0.023313 | 0.120783 | 0.006678 | 0.001172 | 0.000234 | 0.061914 | 0.018334 | 0.094658 | 0.050258 | 0.061914 | 0.144740 | 0.265112 |
22 | 0.055653 | 0.099878 | 0.023837 | 0.124222 | 0.007608 | 0.000778 | 0.000203 | 0.054977 | 0.015824 | 0.084629 | 0.063869 | 0.065391 | 0.130207 | 0.272924 |
24 | 0.032812 | 0.075284 | 0.038043 | 0.106205 | 0.003418 | 0.001269 | 0.000259 | 0.093256 | 0.014036 | 0.124463 | 0.045113 | 0.065753 | 0.120552 | 0.279536 |
7 | 0.009239 | 0.046054 | 0.050566 | 0.155135 | 0.001289 | 0.003080 | 0.000143 | 0.063171 | 0.006518 | 0.065822 | 0.019481 | 0.038891 | 0.250322 | 0.290288 |
2 | 0.018916 | 0.057474 | 0.027408 | 0.129604 | 0.001813 | 0.001511 | 0.000212 | 0.092557 | 0.008521 | 0.092406 | 0.051945 | 0.058320 | 0.143354 | 0.315958 |
5 | 0.008192 | 0.051509 | 0.033778 | 0.143418 | 0.000449 | 0.001347 | 0.000112 | 0.069801 | 0.005611 | 0.039614 | 0.021771 | 0.030412 | 0.270789 | 0.323196 |
4 | 0.021622 | 0.068233 | 0.019115 | 0.062750 | 0.002664 | 0.001332 | 0.000157 | 0.051312 | 0.006894 | 0.095339 | 0.042382 | 0.074736 | 0.204935 | 0.348531 |
18 | 0.031263 | 0.074905 | 0.021617 | 0.102146 | 0.003362 | 0.000785 | 0.000314 | 0.054105 | 0.011311 | 0.067081 | 0.057624 | 0.066736 | 0.159581 | 0.349169 |
1 | 0.042028 | 0.078431 | 0.023592 | 0.097961 | 0.003984 | 0.000859 | 0.000078 | 0.069291 | 0.007656 | 0.078275 | 0.037106 | 0.067495 | 0.143973 | 0.349270 |
3 | 0.013796 | 0.058200 | 0.021836 | 0.076702 | 0.002147 | 0.000914 | 0.000091 | 0.076610 | 0.005071 | 0.062083 | 0.034034 | 0.065509 | 0.219735 | 0.363271 |
8 | 0.009502 | 0.062906 | 0.036903 | 0.093828 | 0.001060 | 0.002163 | 0.000000 | 0.063457 | 0.006617 | 0.065111 | 0.021124 | 0.035758 | 0.223584 | 0.377985 |
6 | 0.008898 | 0.038749 | 0.026821 | 0.036004 | 0.001231 | 0.000757 | 0.000063 | 0.045344 | 0.004986 | 0.035878 | 0.021583 | 0.049225 | 0.237102 | 0.493358 |
9 | 0.006122 | 0.025210 | 0.024556 | 0.055957 | 0.000619 | 0.000585 | 0.000103 | 0.032191 | 0.004953 | 0.036869 | 0.025072 | 0.046499 | 0.239373 | 0.501892 |
77 | 0.000347 | 0.005554 | 0.005901 | 0.000347 | 0.000000 | 0.000694 | 0.000000 | 0.030198 | 0.001736 | 0.084693 | 0.000694 | 0.002083 | 0.096841 | 0.770913 |
# Plot normalize crime vs district
colors=['r', 'g', 'b', 'c', 'y', 'w', 'm', 'k', 'burlywood','navy', 'teal', 'LightSteelBlue', 'Honeydew', 'Goldenrod']
p = crime_pct.plot(kind='bar', stacked=True, color=colors)
p.legend(loc=0, bbox_to_anchor=(1,1))
<matplotlib.legend.Legend at 0x10fcb4910>
def code_rename(code):
"""" Lazy consolidattion of crime codes"""
lower_code = code.lower()
new_codes = ['Assault', 'Burglary', 'Homicide', 'Vehicle', 'Robbery', 'Theft']
for new_code in new_codes:
if new_code.lower() in lower_code:
return new_code
return code
# Consolidate crimes
crime.TEXT_GENERAL_CODE = crime.TEXT_GENERAL_CODE.map(code_rename)
crime.TEXT_GENERAL_CODE.unique()
array(['Robbery', 'Theft', 'Vehicle', 'Assault', 'Burglary', 'Homicide', 'Rape'], dtype=object)
simple_crime_count = pd.crosstab(crime.DC_DIST, crime.TEXT_GENERAL_CODE)
# Normalize crime types
crime_pct = simple_crime_count.div(simple_crime_count.sum(1).astype(float), axis=0)
crime_pct
TEXT_GENERAL_CODE | Assault | Burglary | Homicide | Rape | Robbery | Theft | Vehicle |
---|---|---|---|---|---|---|---|
DC_DIST | |||||||
1 | 0.120459 | 0.121553 | 0.004921 | 0.007656 | 0.104601 | 0.349270 | 0.291540 |
2 | 0.076391 | 0.157012 | 0.003535 | 0.008521 | 0.110265 | 0.315958 | 0.328317 |
3 | 0.071996 | 0.098538 | 0.003152 | 0.005071 | 0.099543 | 0.363271 | 0.358429 |
4 | 0.089855 | 0.081864 | 0.004152 | 0.006894 | 0.117117 | 0.348531 | 0.351586 |
5 | 0.059701 | 0.177197 | 0.001908 | 0.005611 | 0.052183 | 0.323196 | 0.380204 |
6 | 0.047648 | 0.062825 | 0.002051 | 0.004986 | 0.070809 | 0.493358 | 0.318324 |
7 | 0.055293 | 0.205701 | 0.004512 | 0.006518 | 0.058373 | 0.290288 | 0.379315 |
8 | 0.072407 | 0.130732 | 0.003224 | 0.006617 | 0.056882 | 0.377985 | 0.352153 |
9 | 0.031332 | 0.080513 | 0.001307 | 0.004953 | 0.071571 | 0.501892 | 0.308433 |
12 | 0.143343 | 0.149911 | 0.007870 | 0.015207 | 0.111006 | 0.207130 | 0.365533 |
14 | 0.113351 | 0.182044 | 0.005017 | 0.012864 | 0.107575 | 0.243532 | 0.335618 |
15 | 0.118477 | 0.140033 | 0.004046 | 0.011679 | 0.121335 | 0.257877 | 0.346553 |
16 | 0.150890 | 0.144096 | 0.008083 | 0.018334 | 0.112172 | 0.265112 | 0.301312 |
17 | 0.142896 | 0.179899 | 0.006248 | 0.011796 | 0.109124 | 0.246418 | 0.303620 |
18 | 0.106168 | 0.123763 | 0.004462 | 0.011311 | 0.124360 | 0.349169 | 0.280768 |
19 | 0.156754 | 0.168511 | 0.006536 | 0.015872 | 0.116364 | 0.235874 | 0.300090 |
22 | 0.155532 | 0.148059 | 0.008588 | 0.015824 | 0.129260 | 0.272924 | 0.269813 |
23 | 0.130743 | 0.125676 | 0.007770 | 0.015428 | 0.123986 | 0.260023 | 0.336374 |
24 | 0.108096 | 0.144248 | 0.004946 | 0.014036 | 0.110867 | 0.279536 | 0.338271 |
25 | 0.131739 | 0.111383 | 0.007283 | 0.014592 | 0.125486 | 0.218854 | 0.390663 |
26 | 0.091494 | 0.132853 | 0.005136 | 0.010771 | 0.091993 | 0.240255 | 0.427498 |
35 | 0.131613 | 0.125966 | 0.005907 | 0.016448 | 0.142299 | 0.226595 | 0.351171 |
39 | 0.139145 | 0.156248 | 0.008415 | 0.015629 | 0.109556 | 0.231676 | 0.339331 |
77 | 0.005901 | 0.006248 | 0.000694 | 0.001736 | 0.002777 | 0.770913 | 0.211732 |
92 | 0.022709 | 0.043070 | 0.012529 | 0.009397 | 0.044636 | 0.084573 | 0.783085 |
plot = crime_pct.plot(kind='bar', stacked=True, color=['r', 'g', 'b', 'c', 'y', 'k', 'm', 'w'])
plot.legend(loc=0, bbox_to_anchor=(1,1))
<matplotlib.legend.Legend at 0x10fdc8490>
df.TEXT_GENERAL_CODE = df.TEXT_GENERAL_CODE.map(code_rename)
ct_date = pd.crosstab(df['DISPATCH_DATE'],df['TEXT_GENERAL_CODE'])
ct_date
<class 'pandas.core.frame.DataFrame'> Index: 2628 entries, 2006-01-01 to 2013-03-12 Data columns: Assault 2628 non-null values Burglary 2628 non-null values Homicide 2628 non-null values Rape 2628 non-null values Robbery 2628 non-null values Theft 2628 non-null values Vehicle 2628 non-null values dtypes: int64(7)
# Set index as a DateTime format
ct_date.index = pd.to_datetime(ct_date.index)
# Date filtering
ct_date.ix['2012-01'].sum()
TEXT_GENERAL_CODE Assault 639 Burglary 1066 Homicide 41 Rape 57 Robbery 652 Theft 2012 Vehicle 1905
year_2012 = ct_date.ix['2012']
# Resample data to monthly periods
year_2012.resample('M', how='mean', kind='period').plot()
legend(loc=0, bbox_to_anchor=(1,1))
<matplotlib.legend.Legend at 0x10fcf2b90>
# Plot of all years available
ct_date.resample('M', how='mean', kind='period').plot()
legend(loc=0, bbox_to_anchor=(1,1))
<matplotlib.legend.Legend at 0x10fcfd4d0>