The Uniform Crime Reports is an FBI program to collect standardized crime statistics across the United States. This notebook collects crime information from states and large cities, and compiles them into two CSV files.
import pandas as pd
import requests
from cStringIO import StringIO
shortnames = {'Murder and nonnegligent manslaughter rate': 'Murder',
'Forcible rape rate': 'Rape',
'Robbery rate': 'Robbery',
'Aggravated assault rate': 'Assault',
'Property crime rate': 'Property',
'Burglary rate': 'Burglary',
'Larceny-theft rate': 'Larceny',
'Motor vehicle theft rate': 'Vehicular'}
def city_data(year):
"""
Grab crime statistics by city, for a single year. Return as a CSV string
"""
url = 'http://www.ucrdatatool.gov/Search/Crime/Local/DownCrimeOneYearofDataLarge.cfm/LocalCrimeOneYearofData.csv'
form = {"CrimeCrossId": "40,136,310,375,381,725,886,910,911,929,945,946,961,971,973,978,1010,1013,1028,1042,1056,1103,1116,1117,1135,1143,1146,1158,1177,1227,1233,1240,1245,1252,1261,1265,1267,1269,1272,1289,1306,1317,1322,1362,1380,1391,1434,1452,1471,1476,1477,1481,1486,1491,1496,1503,1524,1531,1540,1541,1550,1557,1562,1563,1574,1584,1596,1610,1630,1639,1642,1647,1652,1672,1750,1753,1763,1765,1767,1817,1820,1823,1825,1833,1874,1878,1888,22564,1918,1941,1969,2024,2089,2122,2141,2157,2194,2214,2244,2257,2277,2428,2456,2463,2487,2704,2724,2793,2811,2560,2976,2983,3045,3056,3081,3104,3182,3201,3204,3274,3310,3312,3436,3858,4037,4101,4134,4141,4411,4583,4701,4846,5032,5216,5303,5368,5453,5774,5789,5876,6134,6306,6430,6523,6612,6643,6970,22787,6985,7244,7391,7453,7506,7633,7655,7798,7948,8036,8055,8451,8621,8670,8713,8836,9144,9195,9539,9627,9672,9883,10027,10043,10291,10306,10518,10702,11291,11304,11386,11418,11472,11509,11763,11948,11956,12012,11017,11063,12262,12517,12520,12629,12750,12797,13003,13015,13168,13171,13183,13187,13222,13307,13794,13987,14155,14294,14305,14463,14477,14499,14532,15146,15319,15559,15574,15692,15826,15847,15954,15968,16061,16513,17184,17198,17736,17815,17847,18014,18293,18378,18383,18510,18552,18573,18576,18713,18733,18752,18763,18784,18843,18874,18902,18916,18965,19000,19016,19061,19109,19123,19144,19242,19262,19320,19366,19378,19411,19444,19449,19462,19466,19543,19575,19592,19632,19662,19680,19863,19941,20075,20085,20132,20133,20142,20220,20332,20430,20436,20501,20576,20718,20786,20816,20915,20930,20942,20959,21179,21256,21295",
"YearStart": str(year),
"YearEnd": str(year),
"DataType":"1,2,3,4"}
return requests.post(url, data=form).content
def parse_city(year):
"""
Download crime data for 1 year across cities, and parse into a DataFrame
"""
print year,
data = city_data(year)
cols = ['Agency', u'State', u'Population', 'Violent Crime rate',
'Murder and nonnegligent manslaughter rate', 'Forcible rape rate',
'Robbery rate', u'Aggravated assault rate',
'Property crime rate', u'Burglary rate', 'Larceny-theft rate', u'Motor vehicle theft rate']
# seek to first line of data
data = data[data.find('Agency,'):]
df = pd.read_csv(StringIO(data))
df = df[cols] # focus on relevant columns...
df = df[df.Population > 100000] # ...and rows
df = df.rename_axis(shortnames, axis=1) # relabel
df['Year'] = year
return df
city = pd.concat(map(parse_city, range(1985, 2013)), ignore_index=True)
city.to_csv('city_crime.csv', index=False)
1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
city.head(20)
Agency | State | Population | Violent Crime rate | Murder | Rape | Robbery | Assault | Property | Burglary | Larceny | Vehicular | Year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Abilene Police Dept | TX | 111317 | 318.9 | 7.2 | 32.3 | 86.2 | 193.1 | 5530.2 | 1458.0 | 3697.5 | 374.6 | 1985 |
1 | Akron City Police Dept | OH | 226704 | 826.2 | 7.5 | 69.7 | 225.4 | 523.6 | 5849.5 | 1410.2 | 4025.5 | 413.8 | 1985 |
2 | Albuquerque Police Dept | NM | 357051 | 1149.7 | 11.8 | 66.7 | 349.2 | 722.0 | 8136.9 | 2572.7 | 5023.1 | 541.1 | 1985 |
3 | Alexandria Police Dept | VA | 108346 | 740.2 | 6.5 | 48.0 | 392.3 | 293.5 | 6339.0 | 1343.8 | 4378.6 | 616.5 | 1985 |
4 | Allentown City Police Dept | PA | 103483 | 382.7 | 3.9 | 45.4 | 184.6 | 148.8 | 5014.4 | 1331.6 | 3426.6 | 256.1 | 1985 |
5 | Amarillo Police Dept | TX | 166747 | 455.2 | 10.2 | 35.4 | 97.2 | 312.4 | 6047.5 | 1612.6 | 4116.4 | 318.4 | 1985 |
6 | Amherst Town Police Dept | NY | 108570 | 210.9 | 0.9 | 2.8 | 38.7 | 168.6 | 2441.7 | 446.7 | 1760.2 | 234.9 | 1985 |
7 | Anaheim Police Dept | CA | 240283 | 483.6 | 6.7 | 44.5 | 250.1 | 182.3 | 6829.0 | 2147.5 | 3971.6 | 710.0 | 1985 |
8 | Anchorage Police Dept | AK | 229579 | 515.7 | 6.1 | 84.1 | 161.6 | 264.0 | 5853.3 | 1259.7 | 3987.3 | 606.3 | 1985 |
9 | Ann Arbor Police Dept | MI | 107832 | 588.9 | 3.7 | 34.3 | 132.6 | 418.2 | 7936.4 | 1448.5 | 6025.1 | 462.8 | 1985 |
10 | Arlington Police Dept | TX | 218931 | 507.0 | 8.2 | 48.0 | 136.6 | 314.3 | 8461.1 | 1599.6 | 6065.8 | 795.7 | 1985 |
11 | Atlanta Police Dept | GA | 436214 | 2661.5 | 33.2 | 156.6 | 1098.5 | 1373.2 | 10521.2 | 2795.0 | 6733.4 | 992.9 | 1985 |
12 | Aurora Police Dept | CO | 198033 | 1136.2 | 5.0 | 44.9 | 219.2 | 867.0 | 7400.8 | 2000.2 | 4916.9 | 483.8 | 1985 |
13 | Austin Police Dept | TX | 406469 | 581.3 | 12.8 | 120.3 | 264.7 | 183.5 | 9024.3 | 2524.4 | 5980.3 | 519.6 | 1985 |
14 | Bakersfield Police Dept | CA | 133983 | 897.1 | 12.7 | 51.5 | 447.1 | 385.9 | 8746.6 | 2878.0 | 5241.0 | 627.7 | 1985 |
15 | Baltimore City Police Dept | MD | 771097 | 2009.9 | 27.6 | 76.8 | 1007.8 | 897.7 | 6565.1 | 1799.0 | 3985.5 | 780.6 | 1985 |
16 | Baton Rouge Police Dept | LA | 245830 | 1761.8 | 14.6 | 51.3 | 395.0 | 1300.9 | 10962.0 | 3247.4 | 6729.9 | 984.8 | 1985 |
17 | Beaumont Police Dept | TX | 126298 | 772.0 | 8.7 | 103.7 | 269.2 | 390.3 | 6500.5 | 2323.9 | 3669.9 | 506.7 | 1985 |
18 | Berkeley Police Dept | CA | 106768 | 1090.2 | 10.3 | 40.3 | 618.2 | 421.5 | 11272.1 | 2744.3 | 7713.9 | 813.9 | 1985 |
19 | Birmingham Police Dept | AL | 281973 | 1188.1 | 34.4 | 100.4 | 508.9 | 544.4 | 8768.9 | 2543.9 | 4979.9 | 1245.2 | 1985 |
url = 'http://www.ucrdatatool.gov/Search/Crime/State/DownCrimeStatebyState.cfm/CrimeStatebyState.csv'
form = {"StateId":"52,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51",
"YearStart":"1960",
"YearEnd":"2012",
"DataType":"1,2,3,4"}
def parse_state(txt):
cols = ['Year', 'Population',
'Violent Crime rate',
'Murder and nonnegligent manslaughter rate',
'Forcible rape rate',
'Robbery rate',
'Aggravated assault rate',
'Property crime rate',
'Burglary rate',
'Larceny-theft rate',
'Motor vehicle theft rate']
state = txt.split('\n')[0].strip()
df = pd.read_csv(StringIO(txt), skiprows=2)
df = df[cols]
df['State'] = state
df = df.rename_axis(shortnames, axis=1)
df = df[df.Population.convert_objects(convert_numeric=True) > 0] # remove notes at end of document
return df
data = requests.post(url, data=form).content
data = data.split('Estimated crime in')[1:]
states = pd.concat(map(parse_state, data), ignore_index=True)
states.to_csv('state_crime.csv', index=False)
states.tail(20)
Year | Population | Violent Crime rate | Murder | Rape | Robbery | Assault | Property | Burglary | Larceny | Vehicular | State | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2731 | 1993 | 257782608 | 747.1 | 9.5 | 41.1 | 256.0 | 440.5 | 4740.0 | 1099.7 | 3033.9 | 606.3 | United States-Total |
2732 | 1994 | 260327021 | 713.6 | 9.0 | 39.3 | 237.8 | 427.6 | 4660.2 | 1042.1 | 3026.9 | 591.3 | United States-Total |
2733 | 1995 | 262803276 | 684.5 | 8.2 | 37.1 | 220.9 | 418.3 | 4590.5 | 987.0 | 3043.2 | 560.3 | United States-Total |
2734 | 1996 | 265228572 | 636.6 | 7.4 | 36.3 | 201.9 | 391.0 | 4451.0 | 945.0 | 2980.3 | 525.7 | United States-Total |
2735 | 1997 | 267783607 | 611.0 | 6.8 | 35.9 | 186.2 | 382.1 | 4316.3 | 918.8 | 2891.8 | 505.7 | United States-Total |
2736 | 1998 | 270248003 | 567.6 | 6.3 | 34.5 | 165.5 | 361.4 | 4052.5 | 863.2 | 2729.5 | 459.9 | United States-Total |
2737 | 1999 | 272690813 | 523.0 | 5.7 | 32.8 | 150.1 | 334.3 | 3743.6 | 770.4 | 2550.7 | 422.5 | United States-Total |
2738 | 2000 | 281421906 | 506.5 | 5.5 | 32.0 | 145.0 | 324.0 | 3618.3 | 728.8 | 2477.3 | 412.2 | United States-Total |
2739 | 2001 | 285317559 | 504.5 | 5.6 | 31.8 | 148.5 | 318.6 | 3658.1 | 741.8 | 2485.7 | 430.5 | United States-Total |
2740 | 2002 | 287973924 | 494.4 | 5.6 | 33.1 | 146.1 | 309.5 | 3630.6 | 747.0 | 2450.7 | 432.9 | United States-Total |
2741 | 2003 | 290788976 | 475.8 | 5.7 | 32.3 | 142.5 | 295.4 | 3591.2 | 741.0 | 2416.5 | 433.7 | United States-Total |
2742 | 2004 | 293656842 | 463.2 | 5.5 | 32.4 | 136.7 | 288.6 | 3514.1 | 730.3 | 2362.3 | 421.5 | United States-Total |
2743 | 2005 | 296507061 | 469.0 | 5.6 | 31.8 | 140.8 | 290.8 | 3431.5 | 726.9 | 2287.8 | 416.8 | United States-Total |
2744 | 2006 | 299398484 | 479.3 | 5.8 | 31.6 | 150.0 | 292.0 | 3346.6 | 733.1 | 2213.2 | 400.2 | United States-Total |
2745 | 2007 | 301621157 | 471.8 | 5.7 | 30.6 | 148.3 | 287.2 | 3276.4 | 726.1 | 2185.4 | 364.9 | United States-Total |
2746 | 2008 | 304059724 | 458.6 | 5.4 | 29.8 | 145.9 | 277.5 | 3214.6 | 733.0 | 2166.1 | 315.4 | United States-Total |
2747 | 2009 | 307006550 | 431.9 | 5.0 | 29.1 | 133.1 | 264.7 | 3041.3 | 717.7 | 2064.5 | 259.2 | United States-Total |
2748 | 2010 | 309330219 | 404.5 | 4.8 | 27.7 | 119.3 | 252.8 | 2945.9 | 701.0 | 2005.8 | 239.1 | United States-Total |
2749 | 2011 | 311587816 | 387.1 | 4.7 | 27.0 | 113.9 | 241.5 | 2905.4 | 701.3 | 1974.1 | 230.0 | United States-Total |
2750 | 2012 | 313914040 | 386.9 | 4.7 | 26.9 | 112.9 | 242.3 | 2859.2 | 670.2 | 1959.3 | 229.7 | United States-Total |