The idea is to replicate what we've done so far but with 3 enhancements:
We'll start with the usual prelims...
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point, LineString
import geopandas as gpd
import pandas as pd
from fiona.crs import from_epsg
%matplotlib inline
class Survey:
"""
A seismic survey.
"""
def __init__(self, params):
# Assign the variables from the parameter dict,
# using dict.items() for Python 3 compatibility.
for k, v in params.items():
setattr(self, k, v)
# These are just a convenience; we could use the
# tuples directly, or make objects with attrs.
self.xmi = self.corner[0]
self.ymi = self.corner[1]
self.x = self.size[0]
self.y = self.size[1]
self.SL = self.line_spacing[0]
self.RL = self.line_spacing[1]
self.si = self.point_spacing[0]
self.ri = self.point_spacing[1]
self.shiftx = -self.si/2.
self.shifty = -self.ri/2.
@property
def lines(self):
"""
Returns number of (src, rcvr) lines.
"""
slines = int(self.x/self.SL) + 1
rlines = int(self.y/self.RL) + 1
return slines, rlines
@property
def points_per_line(self):
"""
Returns number of (src, rcvr) points per line.
"""
spoints = int(self.y/self.si) + 2
rpoints = int(self.x/self.ri) + 2
return spoints, rpoints
@property
def src(self):
s = [Point(self.xmi+line*self.SL, self.ymi+s*self.si)
for line in range(self.lines[0])
for s in range(self.points_per_line[0])
]
S = gpd.GeoSeries(s)
S.crs = from_epsg(26911)
return S
@property
def rcvr(self):
r = [Point(self.xmi + r*self.ri + self.shiftx, self.ymi + line*self.RL - self.shifty)
for line in range(self.lines[1])
for r in range(self.points_per_line[1])
]
R = gpd.GeoSeries(r)
R.crs = from_epsg(self.epsg)
return R
@property
def layout(self):
"""
Provide a GeoDataFrame of all points,
labelled as columns and in hierarchical index.
"""
# Feels like there might be a better way to do this...
sgdf = gpd.GeoDataFrame({'geometry': self.src, 'station': 'src'})
rgdf = gpd.GeoDataFrame({'geometry': self.rcvr, 'station': 'rcvr'})
# Concatenate with a hierarchical index
layout = pd.concat([sgdf,rgdf], keys=['sources','receivers'])
layout.crs = from_epsg(self.epsg)
return layout
Perhaps s and r should be objects too. I think you might want to have survey.receivers.x for the list of x locations, for example.
params = {'corner': (5750000,4710000),
'size': (3000,1800),
'line_spacing': (600,600),
'point_spacing': (100,100),
'epsg': 26911 # http://spatialreference.org/ref/epsg/26911/
}
survey = Survey(params)
s = survey.src
r = survey.rcvr
r[:10]
0 POINT (5749950 4710050) 1 POINT (5750050 4710050) 2 POINT (5750150 4710050) 3 POINT (5750250 4710050) 4 POINT (5750350 4710050) 5 POINT (5750450 4710050) 6 POINT (5750550 4710050) 7 POINT (5750650 4710050) 8 POINT (5750750 4710050) 9 POINT (5750850 4710050) dtype: object
layout = survey.layout
layout[:10]
geometry | station | ||
---|---|---|---|
sources | 0 | POINT (5750000 4710000) | src |
1 | POINT (5750000 4710100) | src | |
2 | POINT (5750000 4710200) | src | |
3 | POINT (5750000 4710300) | src | |
4 | POINT (5750000 4710400) | src | |
5 | POINT (5750000 4710500) | src | |
6 | POINT (5750000 4710600) | src | |
7 | POINT (5750000 4710700) | src | |
8 | POINT (5750000 4710800) | src | |
9 | POINT (5750000 4710900) | src |
With a hierarchical index you can do cool things, e.g. show the last five sources:
layout.ix['sources'][-5:]
geometry | station | |
---|---|---|
115 | POINT (5753000 4711500) | src |
116 | POINT (5753000 4711600) | src |
117 | POINT (5753000 4711700) | src |
118 | POINT (5753000 4711800) | src |
119 | POINT (5753000 4711900) | src |
layout.crs
{'init': 'epsg:26911', 'no_defs': True}
ax = layout.plot()
Export GeoDataFrames to GIS shapefile.
# gdf.to_file('src_and_rcvr.shp')
We need midpoints. There is a midpoint between every source-receiver pair.
Hopefully it's not too inelegant to get to the midpoints now that we're using this layout object thing.
midpoint_list = [LineString([r, s]).interpolate(0.5, normalized=True)
for r in layout.ix['receivers'].geometry
for s in layout.ix['sources'].geometry
]
As well as knowing the (x,y) of the midpoints, we'd also like to record the distance from each s to each live r (each r in the live patch). This is easy enough to compute:
Point(x1, y1).distance(Point(x2, y2))
Then we can make a list of all the offsets when we count the midpoints into the bins.
offsets = [r.distance(s)
for r in layout.ix['receivers'].geometry
for s in layout.ix['sources'].geometry
]
azimuths = [(180.0/np.pi) * np.arctan((r.x - s.x)/(r.y - s.y))
for r in layout.ix['receivers'].geometry
for s in layout.ix['sources'].geometry
]
offsetx = np.array(offsets)*np.cos(np.array(azimuths)*np.pi/180.)
offsety = np.array(offsets)*np.sin(np.array(azimuths)*np.pi/180.)
Make a Geoseries of the midpoints, offsets and azimths:
midpoints = gpd.GeoDataFrame({
'geometry' : midpoint_list,
'offset' : offsets,
'azimuth': azimuths,
'offsetx' : offsetx,
'offsety' : offsety
})
midpoints[:5]
azimuth | geometry | offset | offsetx | offsety | |
---|---|---|---|---|---|
0 | -45.000000 | POINT (5749975 4710025) | 70.710678 | 50 | -50 |
1 | 45.000000 | POINT (5749975 4710075) | 70.710678 | 50 | 50 |
2 | 18.434949 | POINT (5749975 4710125) | 158.113883 | 150 | 50 |
3 | 11.309932 | POINT (5749975 4710175) | 254.950976 | 250 | 50 |
4 | 8.130102 | POINT (5749975 4710225) | 353.553391 | 350 | 50 |
ax = midpoints.plot()
Save to a shapefile if desired.
#midpt.to_file('CMPs.shp')
midpoints[:5].offsetx # Easy!
0 50 1 50 2 150 3 250 4 350 Name: offsetx, dtype: float64
midpoints.ix[3].geometry.x # Less easy :(
5749975.0
We need lists (or arrays) to pass into the matplotlib quiver plot. This takes four main parameters: x, y, u, and v, where x, y will be our coordinates, and u, v will be the offset vector for that midpoint.
We can get at the GeoDataFrame's attributes easily, but I can't see how to get at the coordinates in the geometry GeoSeries (seems like a user error — it feels like it should be really easy) so I am resorting to this:
x = [m.geometry.x for i, m in midpoints.iterrows()]
y = [m.geometry.y for i, m in midpoints.iterrows()]
fig = plt.figure(figsize=(12,8))
plt.quiver(x, y, midpoints.offsetx, midpoints.offsety, units='xy', width=0.5, scale=1/0.025, pivot='mid', headlength=0)
plt.axis('equal')
plt.show()
The bins are a new geometry, related to but separate from the survey itself, and the midpoints. We will model them as a GeoDataFrame of polygons. The steps are:
# Factor to shift the bins relative to source and receiver points
jig = survey.si / 4.
bin_centres = gpd.GeoSeries([Point(survey.xmi + 0.5*r*survey.ri + jig, survey.ymi + 0.5*s*survey.si + jig)
for r in range(2*survey.points_per_line[1] - 3)
for s in range(2*survey.points_per_line[0] - 2)
])
# Buffers are diamond shaped so we have to scale and rotate them.
scale_factor = np.sin(np.pi/4.)/2.
bin_polys = bin_centres.buffer(scale_factor*survey.ri, 1).rotate(-45)
bins = gpd.GeoDataFrame(geometry=bin_polys)
bins[:3]
geometry | |
---|---|
0 | POLYGON ((5750050 4710000, 5750000 4710000, 57... |
1 | POLYGON ((5750050.000000001 4710050, 5750000 4... |
2 | POLYGON ((5750050.000000001 4710100, 5749999.9... |
ax = bins.plot()
Thank you to Jake Wasserman and Kelsey Jordahl for this code snippet, and many pointers.
This takes about 20 seconds to run on my iMac, compared to something close to 30 minutes for the old nested loops.
reindexed = bins.reset_index().rename(columns={'index':'bins_index'})
joined = gpd.tools.sjoin(reindexed, midpoints)
bin_stats = joined.groupby('bins_index')['offset']\
.agg({'fold': len, 'min_offset': np.min})
bins = gpd.GeoDataFrame(bins.join(bin_stats))
joined[:10]
bins_index | geometry | azimuth | offset | offsetx | offsety | |
---|---|---|---|---|---|---|
0 | 0 | POLYGON ((5750050 4710000, 5750000 4710000, 57... | 45.000000 | 70.710678 | 50 | 50 |
1 | 1 | POLYGON ((5750050.000000001 4710050, 5750000 4... | -45.000000 | 70.710678 | 50 | -50 |
2 | 2 | POLYGON ((5750050.000000001 4710100, 5749999.9... | -18.434949 | 158.113883 | 150 | -50 |
3 | 3 | POLYGON ((5750050 4710150, 5750000 4710150, 57... | -11.309932 | 254.950976 | 250 | -50 |
4 | 4 | POLYGON ((5750050 4710200, 5750000 4710200, 57... | -8.130102 | 353.553391 | 350 | -50 |
5 | 5 | POLYGON ((5750050 4710250, 5750000 4710250, 57... | -6.340192 | 452.769257 | 450 | -50 |
6 | 6 | POLYGON ((5750050 4710300, 5750000 4710300, 57... | 4.398705 | 651.920241 | 650 | 50 |
7 | 6 | POLYGON ((5750050 4710300, 5750000 4710300, 57... | -5.194429 | 552.268051 | 550 | -50 |
8 | 7 | POLYGON ((5750050 4710350, 5750000 4710350, 57... | 5.194429 | 552.268051 | 550 | 50 |
9 | 7 | POLYGON ((5750050 4710350, 5750000 4710350, 57... | -4.398705 | 651.920241 | 650 | -50 |
bins[:10]
geometry | min_offset | fold | |
---|---|---|---|
0 | POLYGON ((5750050 4710000, 5750000 4710000, 57... | 70.710678 | 1 |
1 | POLYGON ((5750050.000000001 4710050, 5750000 4... | 70.710678 | 1 |
2 | POLYGON ((5750050.000000001 4710100, 5749999.9... | 158.113883 | 1 |
3 | POLYGON ((5750050 4710150, 5750000 4710150, 57... | 254.950976 | 1 |
4 | POLYGON ((5750050 4710200, 5750000 4710200, 57... | 353.553391 | 1 |
5 | POLYGON ((5750050 4710250, 5750000 4710250, 57... | 452.769257 | 1 |
6 | POLYGON ((5750050 4710300, 5750000 4710300, 57... | 552.268051 | 2 |
7 | POLYGON ((5750050 4710350, 5750000 4710350, 57... | 552.268051 | 2 |
8 | POLYGON ((5750050.000000001 4710400, 5750000 4... | 452.769257 | 2 |
9 | POLYGON ((5750050.000000001 4710450, 5749999.9... | 353.553391 | 2 |
ax = bins.plot(column="fold")
ax = bins.plot(column="min_offset")