#!/usr/bin/env python # coding: utf-8 # # Seismic acquisition fiddling # The idea is to replicate what we've done so far but with 3 enhancements: # # - With a Survey object to hold the various features of a survey. # - With more GeoPandas stuff, and less fussing with (x,y)'s directly. # - Making bins and assigning midpoints to them. # We'll start with the usual prelims... # In[5]: import numpy as np import matplotlib.pyplot as plt from shapely.geometry import Point, LineString import geopandas as gpd import pandas as pd from fiona.crs import from_epsg get_ipython().run_line_magic('matplotlib', 'inline') # ## Survey object # In[6]: class Survey: """ A seismic survey. """ def __init__(self, params): # Assign the variables from the parameter dict, # using dict.items() for Python 3 compatibility. for k, v in params.items(): setattr(self, k, v) # These are just a convenience; we could use the # tuples directly, or make objects with attrs. self.xmi = self.corner[0] self.ymi = self.corner[1] self.x = self.size[0] self.y = self.size[1] self.SL = self.line_spacing[0] self.RL = self.line_spacing[1] self.si = self.point_spacing[0] self.ri = self.point_spacing[1] self.shiftx = -self.si/2. self.shifty = -self.ri/2. @property def lines(self): """ Returns number of (src, rcvr) lines. """ slines = int(self.x/self.SL) + 1 rlines = int(self.y/self.RL) + 1 return slines, rlines @property def points_per_line(self): """ Returns number of (src, rcvr) points per line. """ spoints = int(self.y/self.si) + 2 rpoints = int(self.x/self.ri) + 2 return spoints, rpoints @property def src(self): s = [Point(self.xmi+line*self.SL, self.ymi+s*self.si) for line in range(self.lines[0]) for s in range(self.points_per_line[0]) ] S = gpd.GeoSeries(s) S.crs = from_epsg(26911) return S @property def rcvr(self): r = [Point(self.xmi + r*self.ri + self.shiftx, self.ymi + line*self.RL - self.shifty) for line in range(self.lines[1]) for r in range(self.points_per_line[1]) ] R = gpd.GeoSeries(r) R.crs = from_epsg(self.epsg) return R @property def layout(self): """ Provide a GeoDataFrame of all points, labelled as columns and in hierarchical index. """ # Feels like there might be a better way to do this... sgdf = gpd.GeoDataFrame({'geometry': self.src, 'station': 'src'}) rgdf = gpd.GeoDataFrame({'geometry': self.rcvr, 'station': 'rcvr'}) # Concatenate with a hierarchical index layout = pd.concat([sgdf,rgdf], keys=['sources','receivers']) layout.crs = from_epsg(self.epsg) return layout # Perhaps s and r should be objects too. I think you might want to have survey.receivers.x for the list of x locations, for example. # ## Instantiate and plot # In[7]: params = {'corner': (5750000,4710000), 'size': (3000,1800), 'line_spacing': (600,600), 'point_spacing': (100,100), 'epsg': 26911 # http://spatialreference.org/ref/epsg/26911/ } survey = Survey(params) # In[8]: s = survey.src r = survey.rcvr r[:10] # In[9]: layout = survey.layout layout[:10] # With a hierarchical index you can do cool things, e.g. show the last five sources: # In[10]: layout.ix['sources'][-5:] # In[11]: layout.crs # In[12]: ax = layout.plot() # Export GeoDataFrames to GIS shapefile. # In[13]: # gdf.to_file('src_and_rcvr.shp') # ## Midpoint calculations # We need midpoints. There is a midpoint between every source-receiver pair. # # Hopefully it's not too inelegant to get to the midpoints now that we're using this layout object thing. # In[14]: midpoint_list = [LineString([r, s]).interpolate(0.5, normalized=True) for r in layout.ix['receivers'].geometry for s in layout.ix['sources'].geometry ] # As well as knowing the (x,y) of the midpoints, we'd also like to record the distance from each *s* to each live *r* (each *r* in the live patch). This is easy enough to compute: # # Point(x1, y1).distance(Point(x2, y2)) # # Then we can make a list of all the offsets when we count the midpoints into the bins. # In[15]: offsets = [r.distance(s) for r in layout.ix['receivers'].geometry for s in layout.ix['sources'].geometry ] # In[16]: azimuths = [(180.0/np.pi) * np.arctan((r.x - s.x)/(r.y - s.y)) for r in layout.ix['receivers'].geometry for s in layout.ix['sources'].geometry ] # In[17]: offsetx = np.array(offsets)*np.cos(np.array(azimuths)*np.pi/180.) offsety = np.array(offsets)*np.sin(np.array(azimuths)*np.pi/180.) # Make a Geoseries of the midpoints, offsets and azimths: # In[18]: midpoints = gpd.GeoDataFrame({ 'geometry' : midpoint_list, 'offset' : offsets, 'azimuth': azimuths, 'offsetx' : offsetx, 'offsety' : offsety }) midpoints[:5] # In[19]: ax = midpoints.plot() # Save to a shapefile if desired. # In[20]: #midpt.to_file('CMPs.shp') # ## Spider plot # In[21]: midpoints[:5].offsetx # Easy! # In[22]: midpoints.ix[3].geometry.x # Less easy :( # We need lists (or arrays) to pass into the [matplotlib quiver plot](http://matplotlib.org/examples/pylab_examples/quiver_demo.html). This takes four main parameters: *x, y, u,* and *v*, where *x, y* will be our coordinates, and *u, v* will be the offset vector for that midpoint. # # We can get at the GeoDataFrame's attributes easily, but I can't see how to get at the coordinates in the geometry GeoSeries (seems like a user error — it feels like it should be really easy) so I am resorting to this: # In[23]: x = [m.geometry.x for i, m in midpoints.iterrows()] y = [m.geometry.y for i, m in midpoints.iterrows()] # In[24]: fig = plt.figure(figsize=(12,8)) plt.quiver(x, y, midpoints.offsetx, midpoints.offsety, units='xy', width=0.5, scale=1/0.025, pivot='mid', headlength=0) plt.axis('equal') plt.show() # ## Bins # The bins are a new geometry, related to but separate from the survey itself, and the midpoints. We will model them as a GeoDataFrame of polygons. The steps are: # # 1. Compute the bin centre locations with our usual list comprehension trick. # 1. Buffer the centres with a square. # 1. Gather the buffered polygons into a GeoDataFrame. # In[30]: # Factor to shift the bins relative to source and receiver points jig = survey.si / 4. bin_centres = gpd.GeoSeries([Point(survey.xmi + 0.5*r*survey.ri + jig, survey.ymi + 0.5*s*survey.si + jig) for r in range(2*survey.points_per_line[1] - 3) for s in range(2*survey.points_per_line[0] - 2) ]) # Buffers are diamond shaped so we have to scale and rotate them. scale_factor = np.sin(np.pi/4.)/2. bin_polys = bin_centres.buffer(scale_factor*survey.ri, 1).rotate(-45) bins = gpd.GeoDataFrame(geometry=bin_polys) bins[:3] # In[31]: ax = bins.plot() # ## New spatial join # Thank you to Jake Wasserman and Kelsey Jordahl for this code snippet, and many pointers. # # This takes about 20 seconds to run on my iMac, compared to something close to 30 minutes for the old nested loops. # In[32]: reindexed = bins.reset_index().rename(columns={'index':'bins_index'}) joined = gpd.tools.sjoin(reindexed, midpoints) bin_stats = joined.groupby('bins_index')['offset']\ .agg({'fold': len, 'min_offset': np.min}) bins = gpd.GeoDataFrame(bins.join(bin_stats)) # In[33]: joined[:10] # In[34]: bins[:10] # In[35]: ax = bins.plot(column="fold") # In[36]: ax = bins.plot(column="min_offset") # In[ ]: