%matplotlib inline
import pandas as pd
import datetime as dt
base = '/home/ovis/cr/dvto'
df = pd.read_csv("%s/Parking_Tags_Data_2012.csv" % base)
df
tag_number_masked | date_of_infraction | infraction_code | infraction_description | set_fine_amount | time_of_infraction | location1 | location2 | location3 | location4 | province | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | ***78746 | 20120101 | 192 | STAND SIGNED TRANSIT STOP | 60 | 0 | NR | 355 PARKSIDE DR | NaN | NaN | ON |
1 | ***31670 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 1 | NR | 220 KING ST W | NaN | NaN | ON |
2 | ***47870 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 1 | N/S | ELM ST | W/O | ELIZABETH ST | ON |
3 | ***68489 | 20120101 | 8 | STD VEH HWY PROHIB TIME/DAY | 60 | 1 | N/S | WALTON ST | E/O | BAY ST | ON |
4 | ***98802 | 20120101 | 337 | PARK - ON BOULEVARD | 50 | 1 | S/S | SHEPPARD AVE E | E/O | NEILSON AVE | ON |
5 | ***85083 | 20120101 | 8 | STD VEH HWY PROHIB TIME/DAY | 60 | 2 | N/S | WALTON ST | E/O | BAY ST | ON |
6 | ***98803 | 20120101 | 337 | PARK - ON BOULEVARD | 50 | 2 | S/S | SHEPPARD AVE E | E/O | NEILSON AVE | ON |
7 | ***08841 | 20120101 | 15 | PARK - 3 M OF FIRE HYDRANT | 100 | 2 | NR | 35 THORNCLIFFE PARK DR | NaN | NaN | NJ |
8 | ***47871 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 3 | N/S | ELM ST | E/O | UNIVERSITY AVE | ON |
9 | ***98804 | 20120101 | 337 | PARK - ON BOULEVARD | 50 | 3 | S/S | SHEPPARD AVE E | E/O | NEILSON AVE | ON |
10 | ***98805 | 20120101 | 337 | PARK - ON BOULEVARD | 50 | 3 | S/S | SHEPPARD AVE E | E/O | NEILSON AVE | ON |
11 | ***99287 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 3 | NR | 968 QUEEN ST W | NaN | NaN | ON |
12 | ***47872 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 4 | N/S | ELM ST | E/O | UNIVERSITY AVE | ON |
13 | ***98806 | 20120101 | 337 | PARK - ON BOULEVARD | 50 | 4 | S/S | SHEPPARD AVE E | E/O | NEILSON AVE | ON |
14 | ***13375 | 20120101 | 192 | STAND SIGNED TRANSIT STOP | 60 | 4 | NR | 193 PARKSIDE DR | NaN | NaN | UT |
15 | ***13376 | 20120101 | 192 | STAND SIGNED TRANSIT STOP | 60 | 5 | NR | 193 PARKSIDE DR | NaN | NaN | ON |
16 | ***13377 | 20120101 | 192 | STAND SIGNED TRANSIT STOP | 60 | 5 | NR | 193 PARKSIDE DR | NaN | NaN | ON |
17 | ***98807 | 20120101 | 337 | PARK - ON BOULEVARD | 50 | 6 | S/S | SHEPPARD AVE E | E/O | NEILSON AVE | ON |
18 | ***68490 | 20120101 | 384 | STOP VEH OTR THN BCYCL-BYCL LN | 60 | 7 | OPP | 86 GERRARD ST E | NaN | NaN | ON |
19 | ***85084 | 20120101 | 384 | STOP VEH OTR THN BCYCL-BYCL LN | 60 | 8 | OPP | 86 GERRARD ST E | NaN | NaN | ON |
20 | ***98808 | 20120101 | 337 | PARK - ON BOULEVARD | 50 | 8 | S/S | SHEPPARD AVE E | E/O | NEILSON AVE | ON |
21 | ***68491 | 20120101 | 384 | STOP VEH OTR THN BCYCL-BYCL LN | 60 | 9 | NR | 101 GERRARD ST E | NaN | NaN | GA |
22 | ***86155 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 9 | AT | 361 FRONT ST W | NaN | NaN | ON |
23 | ***98809 | 20120101 | 337 | PARK - ON BOULEVARD | 50 | 9 | S/S | SHEPPARD AVE E | E/O | NEILSON AVE | ON |
24 | ***99288 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 9 | NR | 942 QUEEN ST W | NaN | NaN | ON |
25 | ***47873 | 20120101 | 9 | STOP HWY PROHIBITED TIME/DAY | 60 | 10 | W/S | ELIZABETH ST | N/O | EDWARD ST | ON |
26 | ***47874 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 12 | NR | 150 ELIZABETH ST | NaN | NaN | ON |
27 | ***68492 | 20120101 | 192 | STAND SIGNED TRANSIT STOP | 60 | 14 | W/S | PARLIAMENT ST | S/O | VERNER LANE | ON |
28 | ***99289 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 14 | NR | 723 QUEEN ST W | NaN | NaN | ON |
29 | ***47875 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 16 | OPP | 77 ELM ST | NaN | NaN | ON |
30 | ***47876 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 17 | OPP | 77 ELM ST | NaN | NaN | ON |
31 | ***86156 | 20120101 | 15 | PARK 3M OF FIRE HYDRANT | 100 | 20 | AT | 361 FRONT ST W | NaN | NaN | ON |
32 | ***99290 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 22 | NR | 801 KING ST W | NaN | NaN | ON |
33 | ***08842 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 22 | AT | 177 REDPATH AVE | NaN | NaN | ON |
34 | ***08843 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 23 | AT | 177 REDPATH AVE | NaN | NaN | ON |
35 | ***47877 | 20120101 | 15 | PARK 3M OF FIRE HYDRANT | 100 | 25 | NR | 13 ELM ST | NaN | NaN | ON |
36 | ***99291 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 26 | NR | 901 KING ST W | NaN | NaN | ON |
37 | ***08844 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 26 | AT | 200 ROEHAMPTON AVE | NaN | NaN | ON |
38 | ***08845 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 26 | AT | 200 ROEHAMPTON AVE | NaN | NaN | ON |
39 | ***98810 | 20120101 | 14 | PARK OBSTRUCT DRIVE/LANE | 40 | 28 | NR | 11 PROTEA GDNS | NaN | NaN | ON |
40 | ***08846 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 28 | AT | 200 ROEHAMPTON AVE | NaN | NaN | ON |
41 | ***08847 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 31 | AT | 200 ROEHAMPTON AVE | NaN | NaN | ON |
42 | ***94096 | 20120101 | 192 | STAND SIGNED TRANSIT STOP | 60 | 37 | NR | 922 PAPE AVE | NaN | NaN | ON |
43 | ***99292 | 20120101 | 5 | PARK-HWY DRNG PROH TIMES/DAYS | 40 | 39 | NR | 1366 DUNDAS ST W | NaN | NaN | ON |
44 | ***47878 | 20120101 | 369 | STAND STR DISABL LDG NO DROPOF | 450 | 41 | NR | 32 GRENVILLE ST | NaN | NaN | ON |
45 | ***47879 | 20120101 | 369 | STAND STR DISABL LDG NO DROPOF | 450 | 43 | NR | 18 GRENVILLE ST | NaN | NaN | ON |
46 | ***47880 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 47 | NR | 7 BREADALBANE ST | NaN | NaN | ON |
47 | ***37886 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 48 | AT | 735 COLLEGE ST | NaN | NaN | ON |
48 | ***37887 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 49 | AT | 735 COLLEGE ST | NaN | NaN | ON |
49 | ***86157 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 49 | AT | 37 GROSVENOR ST | NaN | NaN | ON |
50 | ***08848 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 50 | E/S | MOUNT PLEASANT RD | S/O | GARFIELD AVE | ON |
51 | ***54745 | 20120101 | 3 | PARK/LEAVE ON PRIVATE PROPERTY | 30 | 51 | AT | 1 SCOTT ST | NaN | NaN | ON |
52 | ***37888 | 20120101 | 30 | STOP ON/OVER SIDEWALK/FOOTPATH | 60 | 52 | NR | 178 BEATRICE ST | NaN | NaN | ON |
53 | ***44968 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 55 | OPP | 168 SIMCOE ST | NaN | NaN | ON |
54 | ***47881 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 56 | NR | 27 CARLTON ST | NaN | NaN | ON |
55 | ***47882 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 58 | NR | 27 CARLTON ST | NaN | NaN | ON |
56 | ***47883 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 59 | NR | 27 CARLTON ST | NaN | NaN | ON |
57 | ***44969 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 103 | NR | 100 SIMCOE ST | NaN | NaN | ON |
58 | ***09737 | 20120101 | 356 | STAND IN DISABLED NO PERMIT | 450 | 110 | AT | 11 BRUNEL CT | NaN | NaN | ON |
59 | ***47884 | 20120101 | 9 | STOP HWY PROHIBITED TIME/DAY | 60 | 111 | OPP | 86 GERRARD ST E | NaN | NaN | ON |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2746154 rows × 11 columns
df.dtypes
tag_number_masked object date_of_infraction int64 infraction_code int64 infraction_description object set_fine_amount int64 time_of_infraction float64 location1 object location2 object location3 object location4 object province object dtype: object
pd.isnull(df['date_of_infraction']).value_counts()
False 2746154 dtype: int64
date_format = '%Y%m%d'
def create_weekday(x):
d = dt.datetime.strptime(str(x['date_of_infraction']), date_format)
return d.strftime('%A')
df['weekday'] = df.apply(create_weekday, axis=1)
weekday_counts = df['weekday'].value_counts()
weekday_counts
Thursday 432138 Wednesday 430728 Friday 428295 Tuesday 424923 Monday 379130 Saturday 368239 Sunday 282701 dtype: int64
weekdays = pd.DataFrame({ 'weekday': weekday_counts.keys(), 'count': weekday_counts.values })
weekdays
count | weekday | |
---|---|---|
0 | 432138 | Thursday |
1 | 430728 | Wednesday |
2 | 428295 | Friday |
3 | 424923 | Tuesday |
4 | 379130 | Monday |
5 | 368239 | Saturday |
6 | 282701 | Sunday |
7 rows × 2 columns
weekdays = weekdays.reindex_axis(sorted(weekdays.columns, reverse=True), axis=1)
weekdays
weekday | count | |
---|---|---|
0 | Thursday | 432138 |
1 | Wednesday | 430728 |
2 | Friday | 428295 |
3 | Tuesday | 424923 |
4 | Monday | 379130 |
5 | Saturday | 368239 |
6 | Sunday | 282701 |
7 rows × 2 columns
weekdays.to_csv("%s/weekdays.csv" % base, index=False)
fine_counts = df['set_fine_amount'].value_counts()
fine_counts
30 1549135 40 608474 60 358798 15 125582 100 39852 250 21410 50 18215 450 12257 105 9662 90 1365 300 1125 55 157 150 81 0 35 200 6 dtype: int64
fines = pd.DataFrame({ 'amount': fine_counts.keys(), 'count': fine_counts.values })
fines
amount | count | |
---|---|---|
0 | 30 | 1549135 |
1 | 40 | 608474 |
2 | 60 | 358798 |
3 | 15 | 125582 |
4 | 100 | 39852 |
5 | 250 | 21410 |
6 | 50 | 18215 |
7 | 450 | 12257 |
8 | 105 | 9662 |
9 | 90 | 1365 |
10 | 300 | 1125 |
11 | 55 | 157 |
12 | 150 | 81 |
13 | 0 | 35 |
14 | 200 | 6 |
15 rows × 2 columns
fines.to_csv("%s/fines.csv" % base, index=False)
province_counts = df['province'].value_counts()
province_counts
ON 2612317 QC 36620 AB 12267 NY 10919 BC 7019 NS 6008 FL 5464 MI 5008 MB 3909 NB 3459 AZ 3412 CA 3235 IL 2734 PA 2676 MD 2505 ... XX 78 NT 73 NE 63 MS 61 AK 49 NM 48 ND 41 DC 35 ID 34 SD 32 NU 23 WY 12 HI 11 VI 2 PW 1 Length: 68, dtype: int64
top_provinces = province_counts[province_counts > province_counts[9]].keys()
top_provinces
Index([u'ON', u'QC', u'AB', u'NY', u'BC', u'NS', u'FL', u'MI', u'MB'], dtype='object')
top_provinces = map(lambda x: str(x), top_provinces)
top_provinces
['ON', 'QC', 'AB', 'NY', 'BC', 'NS', 'FL', 'MI', 'MB']
def create_trimmed_provinces(x):
if x['province'] in top_provinces:
return x['province']
else:
return 'Other'
df['trimmed_provinces'] = df.apply(create_trimmed_provinces, axis=1)
trimmed_provinces = df['trimmed_provinces'].value_counts()
trimmed_provinces
ON 2612317 Other 46623 QC 36620 AB 12267 NY 10919 BC 7019 NS 6008 FL 5464 MI 5008 MB 3909 dtype: int64
provinces = pd.DataFrame({ 'province': trimmed_provinces.keys(), 'count': trimmed_provinces.values })
provinces
count | province | |
---|---|---|
0 | 2612317 | ON |
1 | 46623 | Other |
2 | 36620 | QC |
3 | 12267 | AB |
4 | 10919 | NY |
5 | 7019 | BC |
6 | 6008 | NS |
7 | 5464 | FL |
8 | 5008 | MI |
9 | 3909 | MB |
10 rows × 2 columns
provinces = provinces.reindex_axis(sorted(provinces.columns, reverse=True), axis=1)
provinces
province | count | |
---|---|---|
0 | ON | 2612317 |
1 | Other | 46623 |
2 | QC | 36620 |
3 | AB | 12267 |
4 | NY | 10919 |
5 | BC | 7019 |
6 | NS | 6008 |
7 | FL | 5464 |
8 | MI | 5008 |
9 | MB | 3909 |
10 rows × 2 columns
provinces.to_csv("%s/provinces.csv" % base, index=False)
infraction_counts = df['infraction_description'].value_counts()
infraction_counts
PARK PROHIBITED TIME NO PERMIT 428459 PARK HWY PROHIBED TIME/DAY 370808 PARK FAIL TO DISPLAY RECEIPT 341828 PARK/LEAVE ON PRIVATE PROPERTY 332709 PARK FAIL TO DEP. FEE MACHINE 226683 PARK-HWY DRNG PROH TIMES/DAYS 104625 STOP HWY PROHIBITED TIME/DAY 104556 PARK HWY IN EXCESS PRMTD TIME 78527 PARK LONGER THAN 3 HOURS 76466 STD VEH HWY PROHIB TIME/DAY 69418 STOP-HWY-PROHIBITED TIMES/DAYS 64990 PARK HWY AT PROHIB TIME/DAY 42379 STAND VEH-HWY-PROH TIME/DAYS 35148 PARK ON 2AM-6AM DEC 1-MAR 31 34720 PARK OVER 3 HOURS 32540 ... LEAVE OVERNIGHT WITHOUT PERMIT 1 STOP-ON ELEVATED STRUCTURE 1 PARK-HWY-INTFR FORM FUNRL PROC 1 PARK-HWY WITHIN TURING BASIN 1 PARK SIGNALZD XWALK 15M SM SDE 1 PARK-FRONT OF VEH NOT OPP MTR 1 PARK AUTOMOBILE - IN SPACE 1 FAIL TO PARK PARA LEFT CURB 1 STAND VEHICLE SIGNED HIGHWAY 2 1 ANGLE PARK-METERED SPACE-FRONT 1 PARK/STOP/STAND FOR VENDING 1 PARK PRO VEH ON PEDESTRIAN WAY 1 PARK - METER NOT USED 1 PARK IN 30.5M SIGNAL X WALK 1 PARK ON-STREET HANDICAP SPACE 1 Length: 302, dtype: int64
infraction_counts = df['infraction_code'].value_counts()
infraction_counts
5 569772 29 428476 210 341828 3 332757 207 226683 9 184219 2 114809 8 110473 6 102709 15 39711 28 34720 30 25447 134 24246 347 21410 312 19502 ... 129 1 34 1 115 1 90 1 89 1 66 1 54 1 52 1 51 1 45 1 44 1 43 1 36 1 35 1 144 1 Length: 161, dtype: int64
df[df['infraction_code'] == 5].head()
tag_number_masked | date_of_infraction | infraction_code | infraction_description | set_fine_amount | time_of_infraction | location1 | location2 | location3 | location4 | province | weekday | trimmed_provinces | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | ***31670 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 1 | NR | 220 KING ST W | NaN | NaN | ON | Sunday | ON |
2 | ***47870 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 1 | N/S | ELM ST | W/O | ELIZABETH ST | ON | Sunday | ON |
8 | ***47871 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 3 | N/S | ELM ST | E/O | UNIVERSITY AVE | ON | Sunday | ON |
11 | ***99287 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 3 | NR | 968 QUEEN ST W | NaN | NaN | ON | Sunday | ON |
12 | ***47872 | 20120101 | 5 | PARK HWY PROHIBED TIME/DAY | 40 | 4 | N/S | ELM ST | E/O | UNIVERSITY AVE | ON | Sunday | ON |
5 rows × 13 columns
descriptions = {}
def create_sane_infraction_description(x):
code = x['infraction_code']
if code not in descriptions:
descriptions[code] = x['infraction_description']
return descriptions[code]
df['sane_infraction_description'] = df.apply(create_sane_infraction_description, axis=1)
infraction_counts = df['sane_infraction_description'].value_counts()
infraction_counts
PARK HWY PROHIBED TIME/DAY 569772 PARK PROHIBITED TIME NO PERMIT 428476 PARK FAIL TO DISPLAY RECEIPT 341828 PARK/LEAVE ON PRIVATE PROPERTY 332757 PARK FAIL TO DEP. FEE MACHINE 226683 STOP HWY PROHIBITED TIME/DAY 184219 PARK OVER 3 HOURS 114809 STD VEH HWY PROHIB TIME/DAY 110473 PARK HWY OVER PERMITTED TIME 102709 PARK - 3 M OF FIRE HYDRANT 39711 PARK ON 2AM-6AM DEC 1-MAR 31 34720 STOP ON/OVER SIDEWALK/FOOTPATH 25447 PARK IN PUBLIC LANE 24246 PARK IN A FIRE ROUTE 21410 PARKING MACH-NOT USED/NO FEE 19502 ... PARK METERED SPACE PARK LOT 1 STAND VEHICLE-SIGNED HIGHWAY-3 1 STAND VEHICLE-SIGNED HIGHWAY-W 1 PARK AUTOMOBILE - IN SPACE 1 PARK ON-STREET HANDICAP SPACE 1 STAND VEH HWY 30.5 BUS STOP 1 PARK/STOP/STAND FOR VENDING 1 PARK - METER NOT USED 1 STAND VEHICLE SIGNED HIGHWAY 2 1 PARK/STOP OVER 30CM FROM CURB 1 PARK MOTORCYCLE - IN SPACE 1 FAIL ANGLE PARK/STOP-DSGNTD SP 1 STOP-ON ELEVATED STRUCTURE 1 STOP PROH TIME MAITLAND ST. 1 ANGLE PARK-METERED SPACE-FRONT 1 Length: 152, dtype: int64
infractions = pd.DataFrame({ 'infraction': infraction_counts.keys(), 'count': infraction_counts.values })
infractions = infractions.reindex_axis(sorted(infractions.columns, reverse=True), axis=1)
infractions
infraction | count | |
---|---|---|
0 | PARK HWY PROHIBED TIME/DAY | 569772 |
1 | PARK PROHIBITED TIME NO PERMIT | 428476 |
2 | PARK FAIL TO DISPLAY RECEIPT | 341828 |
3 | PARK/LEAVE ON PRIVATE PROPERTY | 332757 |
4 | PARK FAIL TO DEP. FEE MACHINE | 226683 |
5 | STOP HWY PROHIBITED TIME/DAY | 184219 |
6 | PARK OVER 3 HOURS | 114809 |
7 | STD VEH HWY PROHIB TIME/DAY | 110473 |
8 | PARK HWY OVER PERMITTED TIME | 102709 |
9 | PARK - 3 M OF FIRE HYDRANT | 39711 |
10 | PARK ON 2AM-6AM DEC 1-MAR 31 | 34720 |
11 | STOP ON/OVER SIDEWALK/FOOTPATH | 25447 |
12 | PARK IN PUBLIC LANE | 24246 |
13 | PARK IN A FIRE ROUTE | 21410 |
14 | PARKING MACH-NOT USED/NO FEE | 19502 |
15 | STAND SIGNED TRANSIT STOP | 18418 |
16 | PARK - ON BOULEVARD | 16386 |
17 | PARK-PASSENGER/FREIGHT LOADING | 16358 |
18 | PARK 9M OF INTERSCTING HIGHWAY | 16032 |
19 | STOP VEH OTR THN BCYCL-BYCL LN | 12955 |
20 | PARK/LEAVE ON MUNICIPAL PRPTY | 10596 |
21 | PARK-FAIL TO DISPLAY PERMIT | 7498 |
22 | PARK FAIL TO DEPOSIT FEE METER | 6287 |
23 | STAND SIGNED TAXICAB STAND | 6037 |
24 | PARK IN PARK NOT IN DESIG AREA | 5999 |
25 | FAIL TO PARK/STOP PARA TO CURB | 5738 |
26 | PARK - NOT WITHIN PERIOD | 4881 |
27 | PARK OBSTRUCT DRIVE/LANE | 4478 |
28 | FAIL TO PARK/STOP PARA RTHD HW | 4376 |
29 | PARK IN DISABLED NO PERMIT | 3888 |
30 | PARK CONTRARY POSTED CONDITION | 3222 |
31 | STAND ON-ST DISABL LDG NO PRMT | 2958 |
32 | PARK ON-STRT DISABLD NO PERMIT | 2806 |
33 | STOP SIDE STOPPED/PARKED VEH | 2607 |
34 | STOP WITHIN 9M OF CROSSWALK | 1964 |
35 | PARK - BETWEEN ROAD & SIDEWALK | 1761 |
36 | PARK VEHICLE IN CAR-SHARE AREA | 1469 |
37 | PARK TRUCK HWY NO PERM TIME/DA | 1365 |
38 | PARK VEHICLE OUTSIDE OF SPACE | 1220 |
39 | STAND STR DISABL LDG NO DROPOF | 991 |
40 | PARK HWY WITHN T-TYPE INTERSCT | 786 |
41 | STAND BUS PROHIBITED TIME | 710 |
42 | PARK VEHICLE ON ROADWAY | 518 |
43 | PARK 60CM OF DRIVEWAY/LANEWAY | 485 |
44 | PARK VEHICLE WHILE NOT IN PARK | 424 |
45 | STAND ON-STRT DISABL NO PERMIT | 398 |
46 | PARK/LEAVE CONTRARY TO SIGN | 383 |
47 | PARK IN DISABLED NOT DROP OFF | 354 |
48 | PARK VEHICLE-FOR SALE | 299 |
49 | PARK/STOP OVER 30 CM FROM CURB | 277 |
50 | PARK BUS PROHIBITED TIME | 266 |
51 | PARK ON HWY UNDER 6M WIDE | 254 |
52 | STAND DISABLD LDG NOT DROP OFF | 243 |
53 | STOP ADJACENT TO CENTRE BLVD | 228 |
54 | STAND IN DISABLED NOT DROP OFF | 212 |
55 | STOP IN UNDERPASS | 189 |
56 | PARK-HWY 15M TRM DEAD END ST. | 185 |
57 | STOP WITHIN INTERSECTION/XWALK | 177 |
58 | STOP/PARK/LEAVE/STAND CLSD ST | 165 |
59 | STAND VEHICLE WHERE PROHIBITED | 155 |
... | ... |
152 rows × 2 columns
infractions.to_csv("%s/infractions.csv" % base, index=False)
pd.isnull(df['time_of_infraction']).value_counts()
False 2744426 True 1728 dtype: int64
time_of_infraction = df['time_of_infraction']
time_of_infraction
0 0 1 1 2 1 3 1 4 1 5 2 6 2 7 2 8 3 9 3 10 3 11 3 12 4 13 4 14 4 ... 2746139 2328 2746140 2336 2746141 2337 2746142 2338 2746143 2338 2746144 2340 2746145 2341 2746146 2342 2746147 2342 2746148 2346 2746149 2347 2746150 2358 2746151 2358 2746152 2359 2746153 NaN Name: time_of_infraction, Length: 2746154, dtype: float64
time_of_infraction.value_counts()
1605 5297 1607 4567 1606 4442 1608 4116 1610 4095 1535 4087 1609 4056 1135 4050 1310 4005 1305 3981 1210 3902 1230 3897 1145 3888 1220 3888 1307 3883 ... 601 100 608 99 606 99 558 97 613 97 607 96 609 96 559 95 617 95 611 95 616 95 603 91 604 86 990 1 290 1 Length: 1442, dtype: int64
time_of_infraction[time_of_infraction % 100 > 59]
772853 990 2168884 290 Name: time_of_infraction, dtype: float64
time_of_infraction = time_of_infraction[time_of_infraction % 100 <= 59]
time_counts = time_of_infraction.value_counts()
time_counts
1605 5297 1607 4567 1606 4442 1608 4116 1610 4095 1535 4087 1609 4056 1135 4050 1310 4005 1305 3981 1210 3902 1230 3897 1220 3888 1145 3888 1307 3883 ... 557 103 614 101 601 100 608 99 606 99 558 97 613 97 609 96 607 96 616 95 617 95 611 95 559 95 603 91 604 86 Length: 1440, dtype: int64
time_counts = time_of_infraction.value_counts(sort=False)
time_counts
0 584 1 907 2 1074 3 1366 4 1581 5 2869 6 3137 7 3264 8 3166 9 2997 10 3184 11 3055 12 2875 13 2825 14 2706 ... 2345 827 2346 801 2347 829 2348 825 2349 767 2350 856 2351 758 2352 747 2353 757 2354 701 2355 746 2356 703 2357 619 2358 647 2359 617 Length: 1440, dtype: int64
times = pd.DataFrame({ 'time': time_counts.keys(), 'count': time_counts.values })
times = times.reindex_axis(sorted(times.columns, reverse=True), axis=1)
times
time | count | |
---|---|---|
0 | 0 | 584 |
1 | 1 | 907 |
2 | 2 | 1074 |
3 | 3 | 1366 |
4 | 4 | 1581 |
5 | 5 | 2869 |
6 | 6 | 3137 |
7 | 7 | 3264 |
8 | 8 | 3166 |
9 | 9 | 2997 |
10 | 10 | 3184 |
11 | 11 | 3055 |
12 | 12 | 2875 |
13 | 13 | 2825 |
14 | 14 | 2706 |
15 | 15 | 2836 |
16 | 16 | 2788 |
17 | 17 | 2755 |
18 | 18 | 2663 |
19 | 19 | 2635 |
20 | 20 | 2755 |
21 | 21 | 2573 |
22 | 22 | 2579 |
23 | 23 | 2586 |
24 | 24 | 2456 |
25 | 25 | 2492 |
26 | 26 | 2458 |
27 | 27 | 2419 |
28 | 28 | 2376 |
29 | 29 | 2333 |
30 | 30 | 2477 |
31 | 31 | 2318 |
32 | 32 | 2264 |
33 | 33 | 2236 |
34 | 34 | 2130 |
35 | 35 | 2213 |
36 | 36 | 2203 |
37 | 37 | 2040 |
38 | 38 | 2047 |
39 | 39 | 2056 |
40 | 40 | 2108 |
41 | 41 | 2049 |
42 | 42 | 2021 |
43 | 43 | 1986 |
44 | 44 | 1954 |
45 | 45 | 2019 |
46 | 46 | 1969 |
47 | 47 | 1884 |
48 | 48 | 1926 |
49 | 49 | 1829 |
50 | 50 | 1901 |
51 | 51 | 1829 |
52 | 52 | 1855 |
53 | 53 | 1721 |
54 | 54 | 1750 |
55 | 55 | 1724 |
56 | 56 | 1761 |
57 | 57 | 1668 |
58 | 58 | 1687 |
59 | 59 | 1652 |
... | ... |
1440 rows × 2 columns
times.dtypes
time object count int64 dtype: object
times['time'] = times.apply(lambda x: int(x['time']), axis=1)
times
time | count | |
---|---|---|
0 | 0 | 584 |
1 | 1 | 907 |
2 | 2 | 1074 |
3 | 3 | 1366 |
4 | 4 | 1581 |
5 | 5 | 2869 |
6 | 6 | 3137 |
7 | 7 | 3264 |
8 | 8 | 3166 |
9 | 9 | 2997 |
10 | 10 | 3184 |
11 | 11 | 3055 |
12 | 12 | 2875 |
13 | 13 | 2825 |
14 | 14 | 2706 |
15 | 15 | 2836 |
16 | 16 | 2788 |
17 | 17 | 2755 |
18 | 18 | 2663 |
19 | 19 | 2635 |
20 | 20 | 2755 |
21 | 21 | 2573 |
22 | 22 | 2579 |
23 | 23 | 2586 |
24 | 24 | 2456 |
25 | 25 | 2492 |
26 | 26 | 2458 |
27 | 27 | 2419 |
28 | 28 | 2376 |
29 | 29 | 2333 |
30 | 30 | 2477 |
31 | 31 | 2318 |
32 | 32 | 2264 |
33 | 33 | 2236 |
34 | 34 | 2130 |
35 | 35 | 2213 |
36 | 36 | 2203 |
37 | 37 | 2040 |
38 | 38 | 2047 |
39 | 39 | 2056 |
40 | 40 | 2108 |
41 | 41 | 2049 |
42 | 42 | 2021 |
43 | 43 | 1986 |
44 | 44 | 1954 |
45 | 45 | 2019 |
46 | 46 | 1969 |
47 | 47 | 1884 |
48 | 48 | 1926 |
49 | 49 | 1829 |
50 | 50 | 1901 |
51 | 51 | 1829 |
52 | 52 | 1855 |
53 | 53 | 1721 |
54 | 54 | 1750 |
55 | 55 | 1724 |
56 | 56 | 1761 |
57 | 57 | 1668 |
58 | 58 | 1687 |
59 | 59 | 1652 |
... | ... |
1440 rows × 2 columns
times.to_csv("%s/times.csv" % base, index=False)