In [2]:

# --- Imports ---
import sys,os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt 
import seaborn as sns
%matplotlib inline

SF car break-ins geography¶

Data pulling and cleaning -¶

Let's pull all SF Crime data provided by SF data:

https://data.sfgov.org/Public-Safety/Map-Crime-Incidents-from-1-Jan-2003/gxxq-x39z
Incidents are derived from SFPD Crime Incident Reporting system.
Updated on a daily basis;data available from 1 Jan 2003 up until two weeks from current date (2/28/15)

Let's pull it in and peek at the schema.

In [4]:

d_crime=pd.read_csv(os.getcwd()+'/Map__Crime_Incidents_-_from_1_Jan_2003.csv')
print d_crime.shape
d_crime.head(1)

(1723961, 12)

Out[4]:

	IncidntNum	Category	Descript	DayOfWeek	Date	Time	PdDistrict	Resolution	Address	X	Y	Location
0	50436712	ASSAULT	BATTERY	Wednesday	04/20/2005 12:00:00 AM	04:00	MISSION	NONE	18TH ST / CASTRO ST	-122.435003	37.760888	(37.7608878061245, -122.435002864271)

It provides ~ 1.7M records with:

Crime type by district http://sf-police.org/index.aspx?page=844
Spatial and Temporal data for each request.

Here's a nice map of the districts: http://sf-police.org/index.aspx?page=796

Let's create an easy handle (days) for timeseries analysis.

In [5]:

date=pd.to_datetime(d_crime['Date'])
print date.min()
print date.max()
t_delta=(date-date.min()).astype('timedelta64[D]')
d_crime['days']=t_delta
d_crime.head(1)

2003-01-01 00:00:00
2015-02-13 00:00:00

Out[5]:

	IncidntNum	Category	Descript	DayOfWeek	Date	Time	PdDistrict	Resolution	Address	X	Y	Location	days
0	50436712	ASSAULT	BATTERY	Wednesday	04/20/2005 12:00:00 AM	04:00	MISSION	NONE	18TH ST / CASTRO ST	-122.435003	37.760888	(37.7608878061245, -122.435002864271)	840

The first recorded request is 2003-01-01 and most recent is 2015-02-13. Nice.

Distributions and exploration -¶

Useful GroupBy-fu to keep in mind:

In [6]:

def plotdat(data,cat):
    l=data.groupby(cat).size()
    l.sort()
    fig=plt.figure(figsize=(10,5))
    plt.yticks(fontsize=8)
    l.plot(kind='bar',fontsize=12,color='k')
    plt.xlabel('')
    plt.ylabel('Number of reports',fontsize=10)

plotdat(d_crime,'Category')

Let's get a more detailed view by examining Descript, which is the particular crime type.

In [7]:

l=d_crime.groupby('Descript').size()
l.sort()
print l.shape

(912,)

Since there's 912 different crime types, let's slice by percentile and peek at the top types of crime for each PdDistrict.

In [8]:

def types_districts(d_crime,per):
    
    # Group by crime type and district 
    hoods_per_type=d_crime.groupby('Descript').PdDistrict.value_counts(sort=True)
    t=hoods_per_type.unstack().fillna(0)
    
    # Sort by hood sum
    hood_sum=t.sum(axis=0)
    hood_sum.sort(ascending=False)
    t=t[hood_sum.index]
    
    # Filter by crime per district
    crime_sum=t.sum(axis=1)
    crime_sum.sort()
    
    # Large number, so let's slice the data.
    p=np.percentile(crime_sum,per)
    ix=crime_sum[crime_sum>p]
    t=t.loc[ix.index]
    return t
    
t=types_districts(d_crime,96)

Cluster the non-normalized data across the top percentile reports and each PdDistrict.

In [9]:

sns.clustermap(t) 

Out[9]:

<seaborn.matrix.ClusterGrid at 0x104cd2b50>

Normalize verically across PdDistrict.

In [10]:

sns.clustermap(t,standard_scale=1) 

Out[10]:

<seaborn.matrix.ClusterGrid at 0x105e1da50>

Normalize horizontally across crime types.

In [11]:

sns.clustermap(t,standard_scale=0) 

Out[11]:

<seaborn.matrix.ClusterGrid at 0x10be2c910>

(1) GTA is the most common crime in most PdDistricts.

Tenderloin is an outlier, enriched in base/rock crack and narcotics.

(2) For the distribution of crime across areas:

Southern: Theft, including theft from auto.
Tenderloin: Base / rock crack and narcotics.
Bayview: Violence and threats.

Now, let's drill down on a specific question -¶

Lets, re-examine the crime types.

In [12]:

plotdat(d_crime,'Category')

I'm interested in LARCENY/THEFT:

Mainly because I've had my car broken into twice.

In [13]:

# Let's drill down onto one
cat=d_crime[d_crime['Category']=='LARCENY/THEFT']
c=cat['Descript'].value_counts()
c.sort(ascending=False)
c.head(10)

Out[13]:

GRAND THEFT FROM LOCKED AUTO    116246
PETTY THEFT FROM LOCKED AUTO     38641
PETTY THEFT OF PROPERTY          31206
GRAND THEFT OF PROPERTY          21911
PETTY THEFT FROM A BUILDING      19756
GRAND THEFT FROM A BUILDING      18879
PETTY THEFT SHOPLIFTING          18624
GRAND THEFT FROM PERSON          13696
GRAND THEFT PICKPOCKET           11102
PETTY THEFT WITH PRIOR            8190
dtype: int64

We can use what we had above, but we simply slice the input data on a category first (above).

In [14]:

t=types_districts(cat,70)

In [15]:

sns.clustermap(t)

Out[15]:

<seaborn.matrix.ClusterGrid at 0x10c34a890>

In [16]:

sns.clustermap(t,standard_scale=1)

Out[16]:

<seaborn.matrix.ClusterGrid at 0x10994a350>

In [17]:

sns.clustermap(t,standard_scale=0)

Out[17]:

<seaborn.matrix.ClusterGrid at 0x10e2cf890>

Hm. SOUTHERN.

In [20]:

import os
from IPython.display import Image
i = Image(filename=os.getcwd()+'/SF_police.png')
i

Out[20]:

Maybe we can compress this a bit.

Theft dynamics -¶

We'll create a 30 day window.

In [21]:

# Let's drill down onto one
cat=d_crime[d_crime['Category']=='LARCENY/THEFT']

# Bin crime by 30 day window
cat['Month']=np.floor(cat['days']/30) # Approximate month (30 day window)

# Default
district='All'

In [22]:

def timeseries(dat,per):
    ''' Category grouped by month '''
    
    # Group by crime type and district 
    cat_per_time=dat.groupby('Month').Descript.value_counts(sort=True)
    t=cat_per_time.unstack().fillna(0)
        
    # Filter by crime per district
    crime_sum=t.sum(axis=0)
    crime_sum.sort()
    
    # Large number, so let's slice the data.
    p=np.percentile(crime_sum,per)
    ix=crime_sum[crime_sum>p]
    t=t[ix.index]
    return t
    
t_all=timeseries(cat,0)

In [29]:

c=t_all.sum(axis=0)
c.sort(ascending=False)
c.head(5)

Out[29]:

GRAND THEFT FROM LOCKED AUTO    116246
PETTY THEFT FROM LOCKED AUTO     38641
PETTY THEFT OF PROPERTY          31206
GRAND THEFT OF PROPERTY          21911
PETTY THEFT FROM A BUILDING      19756
dtype: float64

In [30]:

c.index

Out[30]:

Index([u'GRAND THEFT FROM LOCKED AUTO', u'PETTY THEFT FROM LOCKED AUTO', u'PETTY THEFT OF PROPERTY', u'GRAND THEFT OF PROPERTY', u'PETTY THEFT FROM A BUILDING', u'GRAND THEFT FROM A BUILDING', u'PETTY THEFT SHOPLIFTING', u'GRAND THEFT FROM PERSON', u'GRAND THEFT PICKPOCKET', u'PETTY THEFT WITH PRIOR', u'GRAND THEFT FROM UNLOCKED AUTO', u'PETTY THEFT FROM UNLOCKED AUTO', u'GRAND THEFT BICYCLE', u'ATTEMPTED THEFT FROM LOCKED VEHICLE', u'GRAND THEFT SHOPLIFTING', u'PETTY THEFT BICYCLE', u'LOST PROPERTY, PETTY THEFT', u'GRAND THEFT PURSESNATCH', u'LOST PROPERTY, GRAND THEFT', u'THEFT OF COMPUTERS OR CELL PHONES', u'GRAND THEFT AUTO STRIP', u'PETTY THEFT AUTO STRIP', u'THEFT FROM MERCHANT OR LIBRARY', u'THEFT OF CHECKS OR CREDIT CARDS', u'ATTEMPTED SHOPLIFTING', u'ATTEMPTED THEFT FROM A BUILDING', u'ATTEMPTED PETTY THEFT OF PROPERTY', u'EMBEZZLEMENT FROM DEPENDENT OR ELDER ADULT BY CARETAKER', u'LICENSE PLATE OR TAB, THEFT OF', u'ATTEMPTED THEFT FROM UNLOCKED VEHICLE', u'ATTEMPTED GRAND THEFT FROM PERSON', u'ATTEMPTED THEFT OF A BICYCLE', u'PETTY THEFT COIN OPERATED MACHINE', u'ATTEMPTED AUTO STRIP', u'THEFT OF ANIMALS (GENERAL)', u'GRAND THEFT BY PROSTITUTE', u'GRAND THEFT MOTORCYCLE STRIP', u'THEFT, GRAND, OF FIREARM', u'GRAND THEFT COIN OPERATED MACHINE', u'THEFT OF UTILITY SERVICES', u'ATTEMPTED GRAND THEFT PURSESNATCH', u'ATTEMPTED GRAND THEFT PICKPOCKET', u'THEFT, BICYCLE, <$50, NO SERIAL NUMBER', u'PETTY THEFT MOTORCYCLE STRIP', u'ATTEMPTED THEFT COIN OPERATED MACHINE', u'PETTY THEFT PHONE BOOTH', u'THEFT, BICYCLE, <$50, SERIAL NUMBER KNOWN', u'THEFT, GRAND, BY FIDUCIARY, >$400 IN 12 MONTHS', u'THEFT, DRUNK ROLL, $200-$400', u'THEFT OF TELECOMMUNICATION SERVICES, INCL. CLONE PHONE', u'THEFT, DRUNK ROLL, >$400', u'THEFT, DRUNK ROLL, $50-$200', u'THEFT, DRUNK ROLL, <$50', u'GRAND THEFT PHONE BOOTH', u'THEFT, BOAT', u'TRADE SECRETS, THEFT OR UNAUTHORIZED COPYING', u'THEFT OF WRITTEN INSTRUMENT', u'ATTEMPTED MOTORCYCLE STRIP', u'THEFT, GRAND, AGRICULTURAL', u'ATTEMPTED THEFT PHONE BOOTH'], dtype='object')

Let's group the drug categories to make this easier to examine.

In [38]:

auto_grand=['GRAND THEFT FROM LOCKED AUTO',
                    'GRAND THEFT FROM UNLOCKED AUTO',
                    'ATTEMPTED THEFT FROM LOCKED VEHICLE',
                    'ATTEMPTED THEFT FROM UNLOCKED VEHICLE',
                    'GRAND THEFT AUTO STRIP',
                    'ATTEMPTED AUTO STRIP',
                    'GRAND THEFT MOTORCYCLE STRIP']

auto_petty=['PETTY THEFT FROM LOCKED AUTO',
               'PETTY THEFT FROM UNLOCKED AUTO',
               'PETTY THEFT AUTO STRIP']

auto=auto_petty+auto_grand

In [46]:

bike=['GRAND THEFT BICYCLE',
      'PETTY THEFT BICYCLE',
      'ATTEMPTED THEFT OF A BICYCLE',
      'THEFT, BICYCLE, <$50, NO SERIAL NUMBER',
      'THEFT, BICYCLE, <$50, SERIAL NUMBER KNOWN']

In [40]:

building=['GRAND THEFT FROM A BUILDING',
          'PETTY THEFT FROM A BUILDING',
          'ATTEMPTED THEFT FROM A BUILDING']

In [41]:

shoplift=['PETTY THEFT SHOPLIFTING',
          'GRAND THEFT SHOPLIFTING',
          'ATTEMPTED SHOPLIFTING']

In [42]:

prop=['GRAND THEFT OF PROPERTY',
              'ATTEMPTED PETTY THEFT OF PROPERTY']

In [47]:

person=['GRAND THEFT FROM PERSON',
               'GRAND THEFT PICKPOCKET',
               'ATTEMPTED GRAND THEFT FROM PERSON',
               'GRAND THEFT PURSESNATCH',
               'ATTEMPTED GRAND THEFT PURSESNATCH',
               'ATTEMPTED GRAND THEFT PICKPOCKET',
               'ATTEMPTED GRAND THEFT FROM PERSON']

In [44]:

# Lets use real dates for plotting
days_from_start=pd.Series(t_all.index*30).astype('timedelta64[D]')
dates_for_plot=date.min()+days_from_start
time_labels=dates_for_plot.map(lambda x: str(x.year)+'-'+str(x.month))

In [53]:

def drug_analysis(t,district,plot):
    t['AUTO']=t[map(lambda s: s.strip(), auto)].sum(axis=1)
    t['BIKE']=t[map(lambda s: s.strip(), bike)].sum(axis=1)
    t['BUILDING']=t[map(lambda s: s.strip(), building)].sum(axis=1)
    t['SHOLIFT']=t[map(lambda s: s.strip(), shoplift)].sum(axis=1)
    t['PROPERTY']=t[map(lambda s: s.strip(), prop)].sum(axis=1)
    t['PERSON']=t[map(lambda s: s.strip(), person)].sum(axis=1)
    theft=t[['AUTO','BIKE','BUILDING','SHOLIFT','PROPERTY','PERSON']]
    if plot:
        theft.index=[int(i) for i in theft.index]
        colors = plt.cm.jet(np.linspace(0, 1, theft.shape[1]))
        theft.plot(kind='bar', stacked=True, figsize=(20,5), color=colors, width=1, title=district,fontsize=6)
    return theft

theft_df_all=drug_analysis(t_all,district,True)

In [51]:

def drug_analysis_rescale(t,district,plot):
    t['AUTO']=t[map(lambda s: s.strip(), auto)].sum(axis=1)
    t['BIKE']=t[map(lambda s: s.strip(), bike)].sum(axis=1)
    t['BUILDING']=t[map(lambda s: s.strip(), building)].sum(axis=1)
    t['SHOLIFT']=t[map(lambda s: s.strip(), shoplift)].sum(axis=1)
    t['PROPERTY']=t[map(lambda s: s.strip(), prop)].sum(axis=1)
    t['PERSON']=t[map(lambda s: s.strip(), person)].sum(axis=1)
    theft=t[['AUTO','BIKE','BUILDING','SHOLIFT','PROPERTY','PERSON']]
    if plot:
        theft=theft.div(theft.sum(axis=1),axis=0)
        theft.index=[int(i) for i in theft.index]
        colors = plt.cm.jet(np.linspace(0, 1, theft.shape[1]))
        theft.plot(kind='bar', stacked=True, figsize=(20,5), color=colors, width=1, title=district, legend=False)
        plt.ylim([0,1])
    return theft

theft_df_all_rescale=drug_analysis_rescale(t_all,district,True)

Let's add the real dates.

In [54]:

dates_for_plot.index=dates_for_plot
sns.set_context(rc={"figure.figsize": (12.5,5.5)})
for d,c in zip(['AUTO','BIKE','BUILDING','PERSON'],['b','r','c','g']):
    plt.plot(dates_for_plot.index,theft_df_all[d],'o-',color=c,ms=6,mew=1.5,mec='white',linewidth=0.5,label=d,alpha=0.75)
plt.legend(loc='upper left',scatterpoints=1,prop={'size':8})

Out[54]:

<matplotlib.legend.Legend at 0x11138aa90>

Let's iterate through each district.

In [55]:

stor=[]
stor_time=[]

for d in set(d_crime['PdDistrict']):
    # Specify district and group by time
    dist_dat=cat[cat['PdDistrict']==d]
    t=timeseries(dist_dat,0)
    # Merge to ensure all categories are preserved!
    t_merge=pd.DataFrame(columns=t_all.columns)
    m=pd.concat([t_merge,t],axis=0).fillna(0)
    m.reset_index(inplace=True)
    # Plot
    drug_df=drug_analysis(m,d,True)
    plt.show()
    s=drug_df.sum(axis=0)
    stor=stor+[s]
    drug_df.columns=cols=[c+"_%s"%d for c in drug_df.columns]
    stor_time=stor_time+[drug_df]
    
theft_dat_time=pd.concat(stor_time,axis=1)
theft_dat=pd.concat(stor,axis=1)
theft_dat.columns=[list(set(d_crime['PdDistrict']))]

We can also look at correlations between areas for different drugs.

In [57]:

sns.set_context(rc={"figure.figsize": (20,20)})
sns.corrplot(theft_dat_time, annot=False, sig_stars=False,diag_names=False)

Out[57]:

<matplotlib.axes.AxesSubplot at 0x10a5275d0>

With this in mind, we can examine select timeseries data.

In [60]:

theft_dat_time.index=dates_for_plot
sns.set_context(rc={"figure.figsize": (7.5,5)})
for d,c in zip(['AUTO_SOUTHERN','AUTO_CENTRAL','AUTO_NORTHERN'],['b','r','c']):
    plt.plot(theft_dat_time.index,theft_dat_time[d],'o-',color=c,ms=6,mew=1,mec='white',linewidth=0.5,label=d,alpha=0.75)
plt.legend(loc='upper left',scatterpoints=1,prop={'size':8})

Out[60]:

<matplotlib.legend.Legend at 0x114f8b790>

In [61]:

sns.set_context(rc={"figure.figsize": (7.5,5)})
for d,c in zip(['PROPERTY_NORTHERN','PROPERTY_CENTRAL'],['b','r']):
    plt.plot(theft_dat_time.index,theft_dat_time[d],'o-',color=c,ms=6,mew=1,mec='white',linewidth=0.5,label=d,alpha=0.75)
plt.legend(loc='upper left',scatterpoints=1,prop={'size':8})

Out[61]:

<matplotlib.legend.Legend at 0x1152d0050>

Spatial relationships -¶

Let's re-do what we did above, but re-scale it.

In [63]:

stor=[]
stor_time=[]

for d in set(d_crime['PdDistrict']):
    # Specify district and group by time
    dist_dat=cat[cat['PdDistrict']==d]
    t=timeseries(dist_dat,0)
    # Merge to ensure all categories are preserved!
    t_merge=pd.DataFrame(columns=t_all.columns)
    m=pd.concat([t_merge,t],axis=0).fillna(0)
    m.reset_index(inplace=True)
    # Plot
    drug_df=drug_analysis_rescale(m,d,True)
    plt.show()
    s=drug_df.sum(axis=0)
    stor=stor+[s]
    drug_df.columns=cols=[c+"_%s"%d for c in drug_df.columns]
    stor_time=stor_time+[drug_df]
    
crime_dat_time_rescale=pd.concat(stor_time,axis=1)
crime_dat_rescale=pd.concat(stor,axis=1)
crime_dat_rescale.columns=[list(set(d_crime['PdDistrict']))]

We can now summarize this data using clustered heatmaps.

In [65]:

sns.clustermap(theft_dat,standard_scale=0)

Out[65]:

<seaborn.matrix.ClusterGrid at 0x111736d90>

In [66]:

sns.clustermap(theft_dat,standard_scale=1)

Out[66]:

<seaborn.matrix.ClusterGrid at 0x1158d8310>

Mapping relationships -¶

Let's isolate all crack-related records.

In [297]:

tmp=cat.copy()
tmp.set_index('Descript',inplace=True)

In [300]:

crack_dat=tmp.loc[crack_features]
crack_pts=crack_dat[['X','Y','Month']]

Plot the crack regimes.

In [414]:

d=pd.DataFrame(crack_pts.groupby('Month').size())
d.index=dates_for_plot
d.columns=['Count']

In [423]:

diff=len(d.index)-120

In [438]:

plt.plot(d.index,d['Count'],'o-',color='k',ms=6,mew=1,mec='white',linewidth=0.5,label=d,alpha=0.75)
plt.axvspan(d.index[40-diff],d.index[40],color='cyan',alpha=0.5)
plt.axvspan(d.index[80-diff],d.index[80],color='red',alpha=0.5)
plt.axvspan(d.index[120],d.index[-1],color='green',alpha=0.5)

Out[438]:

<matplotlib.patches.Polygon at 0x13d3c6c10>

In [439]:

oldest_crack_sums=d.loc[(d.index>d.index[40-diff]) & (d.index<d.index[40])]
old_crack_sums=d.loc[(d.index>d.index[80-diff]) & (d.index<d.index[80])]
new_crack_sums=d.loc[d.index>d.index[120]]

Fold-difference in mean between the two regimes.

In [440]:

old_crack_sums['Count'].mean()/float(new_crack_sums['Count'].mean())

Out[440]:

4.6624129930394425

Two regimes.

In [441]:

oldest_crack=crack_pts[(crack_pts['Month']>(40-diff)) & (crack_pts['Month']<40)]
oldest_crack.columns=['longitude','latitude','time']
old_crack=crack_pts[(crack_pts['Month']>(80-diff)) & (crack_pts['Month']<80)]
old_crack.columns=['longitude','latitude','time']
new_crack=crack_pts[crack_pts['Month']>120]
new_crack.columns=['longitude','latitude','time']

We can look at this spatially.

Use a shapefile for Neighborhoods in SF to overlay the data onto a map.

https://data.sfgov.org/Geographic-Locations-and-Boundaries/Neighborhoods/ejmn-jyk6

Basemap can be used to view this. Some nice work at this link that I drew from:

http://sensitivecities.com/so-youd-like-to-make-a-map-using-python-EN.html

In [314]:

from mpl_toolkits.basemap import Basemap
import fiona
from shapely.geometry import shape, mapping
from pyproj import Proj, transform
from fiona.crs import from_epsg
from itertools import chain

shapefile="/Users/lmartin/Desktop/Misc/Blog_Archive/GIS/sffind_neighborhoods/sffind_neighborhoods.shp"
shp = fiona.open(shapefile)
bds = shp.bounds
print (bds)

shp.close()
extra = 0.01
ll = (bds[0], bds[1])
ur = (bds[2], bds[3])
coords = list(chain(ll, ur))
w, h = coords[2] - coords[0], coords[3] - coords[1]

(-122.5148972319999, 37.708089209000036, -122.35698198799994, 37.83239597600004)

We can use the Basemap library.

In [433]:

m = Basemap(
    projection='tmerc',
    lon_0=-122.,
    lat_0=37.7,
    ellps = 'WGS84',
    llcrnrlon=coords[0] - extra * w,
    llcrnrlat=coords[1] - extra + 0.01 * h,
    urcrnrlon=coords[2] + extra * w,
    urcrnrlat=coords[3] + extra + 0.01 * h,
    lat_ts=0,
    resolution='i',
    suppress_ticks=True)

m.readshapefile(
    '/Users/lmartin/Desktop/Misc/Blog_Archive/GIS/sffind_neighborhoods/sffind_neighborhoods',
    'SF',
    color='none',
    zorder=2)

Out[433]:

(117,
 5,
 [-122.5148972319999, 37.708089209000036, 0.0, 0.0],
 [-122.35698198799994, 37.83239597600004, 0.0, 0.0],
 <matplotlib.collections.LineCollection at 0x1572afd50>)

In [442]:

from descartes import PolygonPatch
from shapely.geometry import Point, Polygon, MultiPoint, MultiPolygon
from shapely.prepared import prep
from matplotlib.collections import PatchCollection

# Set up a map dataframe
df_map = pd.DataFrame({
    'poly': [Polygon(xy) for xy in m.SF],
    'ward_name': [ward['name'] for ward in m.SF_info]})
df_map['area_m'] = df_map['poly'].map(lambda x: x.area)
df_map['area_km'] = df_map['area_m'] / 100000

def makePoints(dat):
    # Create Point objects in map coordinates from dataframe lon and lat values
    map_points = pd.Series([Point(m(mapped_x,mapped_y)) for mapped_x, mapped_y in zip(dat['longitude'],dat['latitude'])])
    plt_points = MultiPoint(list(map_points.values))
    hoods_polygon = prep(MultiPolygon(list(df_map['poly'].values)))
    pts = filter(hoods_polygon.contains,plt_points)
    return pts

sf_oldest_crack_points_todraw=makePoints(oldest_crack)
sf_old_crack_points_todraw=makePoints(old_crack)
sf_new_crack_points_todraw=makePoints(new_crack)

In [443]:

# Draw neighborhoods withpolygons
df_map['patches'] = df_map['poly'].map(lambda x: PolygonPatch(
    x,
    fc='#000000',
    ec='#ffffff', lw=.5, alpha=1,
    zorder=4))

plt.clf()
fig = plt.figure()
ax = fig.add_subplot(111, axisbg='w', frame_on=False)

# Now, we can overlay our points
dev = m.scatter(
    [geom.x for geom in sf_oldest_crack_points_todraw],
    [geom.y for geom in sf_oldest_crack_points_todraw],
    10, marker='o', lw=.25,
    facecolor='cyan', edgecolor='cyan',
    alpha=0.75, antialiased=True,
    label='New crack', zorder=3)

dev = m.scatter(
    [geom.x for geom in sf_old_crack_points_todraw],
    [geom.y for geom in sf_old_crack_points_todraw],
    10, marker='o', lw=.25,
    facecolor='red', edgecolor='red',
    alpha=0.75, antialiased=True,
    label='Old crack', zorder=3)

dev = m.scatter(
    [geom.x for geom in sf_new_crack_points_todraw],
    [geom.y for geom in sf_new_crack_points_todraw],
    10, marker='o', lw=.25,
    facecolor='green', edgecolor='green',
    alpha=0.75, antialiased=True,
    label='New crack', zorder=3)

ax.add_collection(PatchCollection(df_map['patches'].values, match_original=True))
plt.tight_layout()
fig.set_size_inches(15,15)

<matplotlib.figure.Figure at 0x156e0a6d0>

In [ ]: