#!/usr/bin/env python # coding: utf-8 # #
Interactive Computing with Jupyter and Python3 for GeoScientists
# ##
Introduction
# Assumptions and Expectations: # # - You have programmed in something (Fortran, Matlab, Java, C, ...) # - This is *not* a tutorial. # # During this demo, you will be exposed to: # # - *Python*, a high-level interpreted programming language. # - *Jupyter (formerly IPython) Notebook*, a web-based interactive computational environment (hint: you're looking at it) # - *Numpy*, the fundamental package for scientific computing. (array data type, libraries for linear algebra, Fourier transforms, etc.) # - *Matplotlib*, a 2D plotting library. # - *Pandas*, a data analysis library. # - *ObsPy*, an open-source project dedicated to provide a Python framework for processing seismological data. # ##
IPython Background
# What is IPython? # # - A Python kernel which interprets and runs Python code # - An interface where a user interacts with Python code # # # Those interfaces can include: # # - the command line # - the IPython notebook (browser) # - other GUI applications # - ... # ##
Getting Started
# You'll need *git* installed - this may require administrator privileges. Recent versions of Mac OSX come with git pre-installed. Type # # *which git* # # in a Terminal window to see if you have this software already. # # - Windows: http://git-scm.com/download/win # - Mac OS X: http://git-scm.com/download/mac # Next you need Python, with some additional libraries that makes it interesting for scientists. # # I recommend Anaconda from Continuum Analytics. http://continuum.io/downloads # There are other sources for "scientific" Python distributions, listed here: http://www.scipy.org/install.html # You will also need to install the following packages from a command line (OSX Terminal or Windows Command Prompt): # # - BaseMap: "conda install basemap" # - ObsPy: "pip install obspy" # - mapio: "pip install git+git://github.com/usgs/MapIOio.git" # - smtools: "pip install git+git://github.com/usgs/smtools.git" # Once Jupyter is installed, you need to start it up (from a Terminal or Command Prompt) in the directory where the IPythonDemo.ipynb file lives: # jupyter notebook # # # #### Jupyter Cells # This is a markdown *cell*. # # This is the second *line*. # # * first # * second # # Hello # # #
  • one
  • #
  • two
  • #
    # # This is a *test* # # You cause the browser to render the markdown text in this cell by hitting "Shift-Return". # In[1]: #This is a code cell #You execute all of the code in these cells by hitting "Shift-Return". foo = 5 print(foo) print('goodbye') # The following "magic" command (anything prefaced with "%" is referred to as *magic*) allows plots to be shown in-line with the code and markdown cells. # In[2]: get_ipython().run_line_magic('matplotlib', 'inline') # ##
    Plotting
    # #### Plotting with Random Data: # In[3]: #The import statement allows you to use code from other modules import numpy as np import matplotlib.pyplot as plt x = np.random.rand(1000,1) #this results in a Numpy array - more on this later y = np.random.rand(1000,1) # In[4]: fig = plt.figure(figsize=(10,6)) plt.plot(x,y,'bx') th = plt.title('Title') xl = plt.xlabel('X') yl = plt.ylabel('Y') # #### Interactive Plotting # In[5]: import mpld3 fig = plt.figure(figsize=(8,5)) plt.plot(x,y,'b.') th = plt.title('Random Points') xl = plt.xlabel('X') yl = plt.ylabel('Y') mpld3.display() # #### Plotting with Real Data or How Not To Do Loss Modeling # NB: The SciPy stack includes a package called pandas, which provides R-like data analysis and tools for reading in spreadsheet-like data (CSV, Excel, etc.) # In[6]: import pandas as pd df = pd.read_excel('EXPO_CAT_2007_12.xlsx',sheetname='Sheet1') f = plt.figure(figsize=(10,6)) ph = plt.semilogy(df['PAGER_prefMag'].get_values(),df['PAGER_prefShakingDeaths'].get_values(),'r.') th = plt.xlabel('Magnitude') th = plt.ylabel('Shaking Deaths') th = plt.title('Magnitude vs Shaking Deaths') # ...and this is why we don't model fatalities this way! # pandas reads the spreadsheet into a data structure called a *DataFrame*. You can do many things with a DataFrame, the least of which is viewing the contents in a notebook, as below. # In[7]: df # ##
    Data Arrays
    # #### Python lists vs Numpy Arrays # In[8]: x = [1,2,3,4,5] #a homogenous python list y = np.array(x) #this is a numpy array, created with a list as input #To multiply every element in a list by 5, you have to loop over the list for i in range(0,len(x)): x[i] = x[i] * 7 print(x) #To multiply every element in a numpy array by 5, just do this: y = y * 7 print(y) # #### Optimization # There are a number of packages and methods for optimizing Python code. Here is a very basic progression. # In[9]: #Looping over two lists of a million values and doing calculations on each element import datetime a = range(0,1000000) b = range(0,1000000) t1 = datetime.datetime.now() for i in range(0,len(a)): a[i]**2 + b[i]**2 + 2*a[i]*b[i] t2 = datetime.datetime.now() dt1 = ((t2-t1).microseconds/1e6) print('Looping over list: %.2e seconds' % dt1) # In[10]: #Vectorizing with numpy a = np.arange(1e6) b = np.arange(1e6) t3 = datetime.datetime.now() a**2 + b**2 + 2*a*b t4 = datetime.datetime.now() dt2 = ((t4-t3).microseconds/1e6) print('Numpy array: %.2e seconds' % dt2) print('Numpy is %.1fx faster than looping over a list' % (dt1/dt2)) # In[11]: #Running on multiple cores with numexpr import numexpr as ne t5 = datetime.datetime.now() ne.evaluate("a**2 + b**2 + 2*a*b") #Turn simple numpy expression into a string t6 = datetime.datetime.now() dt3 = ((t6-t5).microseconds/1e6) print('numexpr eval: %.2e seconds' % dt3) print('Numexpr is %.1fx faster than numpy' % (dt2/dt3)) # #### 2D Arrays (ShakeMap Example) # In[12]: from mapio.shake import ShakeGrid #ShakeGrid is a class that holds one variable of a ShakeMap grid XML file shakemap = ShakeGrid.load('grid.xml',adjust='res') eventdict = shakemap.getEventDict() fig = plt.figure(figsize=(8,8)) ax = fig.add_axes([0,0,1.0,1.0]) p = plt.imshow(shakemap.getLayer('mmi').getData(),cmap='jet',vmin=1,vmax=10) plt.xlabel('Columns') plt.ylabel('Rows') locstr = eventdict['event_description'] mag = eventdict['magnitude'] datestr = eventdict['event_timestamp'].strftime('%b %d, %Y %H:%M:%S') th = plt.title('PGA for %s - %s M%.1f' % (locstr,datestr,mag)) ch=plt.colorbar(shrink=0.7) #To save the above figure to a file, you can call the matplotlib savefig() function #NB: This function MUST be in the same cell as the code generating the figure #The formats supported by savefig() can vary, but generally speaking PNG, JPG, and PDF are well supported. plt.savefig('shakemap.png') # In[13]: #extra code cell to clean the PNG file made above - delete or don't execute this cell if you want to keep it #Any system command can be run from IPython by preceding it with "!" get_ipython().system('rm shakemap.png') # ##
    Geo-Spatial Data in Python
    # There are a couple of fundamental packages for dealing with vector (points, lines, polygons) data in Python. They are: # # - fiona A package which reads in various vector data formats (shapefiles, geojson, etc.) # - shapely A package which allows various geometric operations on shapes. # First, let's read in a shapefile of the boundaries of the US (from US Census: https://www.census.gov/geo/maps-data/data/cbf/cbf_nation.html) # In[14]: import fiona from shapely.geometry import shape, Point # In[15]: recs = fiona.open('cb_2015_us_nation_20m.shp','r') #turn a record into a shapely shape (MultiPolygon, in this case) uspoly = shape(recs[0]['geometry']) #This multipolygon has the continental US, plus lots of islands. #Let's just grab the continental US (should be the one with most vertices) npoints = [] for i in range(0,len(uspoly)): poly = uspoly[i] npoints.append(len(poly.exterior.coords)) npoints = np.array(npoints) imax = npoints.argmax() continent = uspoly[imax] recs.close() # You can look at a shape in Jupyter simply by typing the name of the variable. # In[16]: continent # Let's try looking at a list of recent (1 day) earthquakes and figuring out which ones are inside the continental US... # In[17]: import urllib.request as request import json feedurl = 'http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/2.5_day.geojson' fh = request.urlopen(feedurl) data = fh.read().decode('utf-8') jdict = json.loads(data) insidelat = [] insidelon = [] outsidelat = [] outsidelon = [] for event in jdict['features']: lon,lat,depth = event['geometry']['coordinates'] p = Point(lon,lat) if continent.contains(p): print('Event %s is inside continental US' % event['id']) insidelat.append(lat) insidelon.append(lon) else: outsidelat.append(lat) outsidelon.append(lon) fh.close() # We can also plot the inside/outside points on top of our continent... # In[18]: cx,cy = zip(*continent.exterior.coords) f = plt.figure(figsize=(12,8)) plt.plot(cx,cy,'b') plt.plot(insidelon,insidelat,'r*') plt.plot(outsidelon,outsidelat,'gd') plt.axis([-140,-60,25,50]) lh=plt.legend(['Continental US','Earthquakes inside','Earthquakes outside'],numpoints=1) # ##
    ObsPy - Python for Seismologists
    # *ObsPy* is a Python package which provides tools to access, process, and visualize seismological data. # # Notes: # * The NEIC maintains a system called the Continuous Waveform Buffer (CWB) that holds seismometer waveform data used for earthquake detection/location. # * There is a CWB module for ObsPy, which we can use to get data from seismometers. # * Seismometer waveform data is organized into *Streams*. From the obspy website: # # >Streams are list-like objects which contain multiple Trace objects, i.e. gap-less continuous time series and related header/meta information. # #### Retrieving Data from CWB # In[19]: import warnings warnings.simplefilter("ignore", DeprecationWarning) from obspy.clients.neic import Client from obspy import UTCDateTime from obspy import read client = Client() t = UTCDateTime() - 5 * 3600 # 5 hours before now stream = client.get_waveforms("IU", "ANMO", "00", "BH?", t, t + 120) #get 2 minutes worth of data fig = plt.figure(figsize=(12,4)) ph = stream.plot() # #### Plotting Focal Mechanisms on a Map # In[20]: #First we need to write a quick function to get the last 7 or 30 days's worth of focal mechanisms. #To do this, we'll pull down the last 7 days of events from the web site, and extract the strike, #dip and rake values for those events which have had a moment tensor generated for them. import json def getRecentFM(): feed7 = 'http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_week.geojson' feed30 = 'http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_month.geojson' fh = request.urlopen(feed7) data = fh.read().decode('utf-8') fh.close() jdict = json.loads(data) focals = [] for event in jdict['features']: etypes = event['properties']['types'].strip(',').split(',') if 'moment-tensor' in etypes or 'focal-mechanism' in etypes: lon,lat,depth = event['geometry']['coordinates'] eurl = event['properties']['detail'] fh = request.urlopen(eurl) edata = fh.read().decode('utf-8') fh.close() edict = json.loads(edata) mt = edict['properties']['products']['moment-tensor'][0] if 'nodal-plane-1-strike' not in mt['properties']: continue strike = float(mt['properties']['nodal-plane-1-strike']) dip = float(mt['properties']['nodal-plane-1-dip']) if 'nodal-plane-1-rake' in mt['properties']: rake = float(mt['properties']['nodal-plane-1-rake']) else: rake = float(mt['properties']['nodal-plane-1-slip']) focals.append((lat,lon,strike,dip,rake)) return focals # In[21]: eventlist = getRecentFM() # In[22]: len(eventlist) # Set up a map with Basemap, a mapping extension to the matplotlib package - handy for replacing GMT :) # In[23]: from mpl_toolkits.basemap import Basemap from obspy.imaging.beachball import beach fig = plt.figure(figsize=(12,12)) ax = fig.add_axes([0,0,1.0,1.0]) m = Basemap(projection='cyl', lon_0=142.36929, lat_0=38.3215, resolution='c',ax=ax) m.drawcoastlines() m.fillcontinents(lake_color='#2E2EFE') m.drawparallels(np.arange(-90., 120., 30.)) m.drawmeridians(np.arange(0., 420., 60.)) m.drawmapboundary(fill_color='#2E2EFE') for event in eventlist: lat,lon = event[0:2] if lon < 0: lon += 360 #cylindrical projections need longitudes 0-360 fm = event[2:] x, y = m(lon, lat) b = beach(fm, xy=(x, y), width=10, linewidth=1, alpha=0.85,facecolor='r') b.set_zorder(10) bc=ax.add_collection(b) # #### Using ObsPy to get peak ground motions # # Below are the first few lines of a Turkish strong-motion data set. The *smtools* package provides a function for reading that data. # In[24]: get_ipython().system('head 20111023104120_4902.txt') # In[25]: import warnings warnings.filterwarnings("ignore") from smtools.turkey import readturkey tracelist = readturkey('20111023104120_4902.txt') ewtrace = tracelist[0][1] #Get the EW component of the station in this data file from Turkey net = ewtrace.stats['network'] station = ewtrace.stats['station'] location = ewtrace.stats['location'] channel = ewtrace.stats['channel'] channel_id = '%s.%s.%s.%s' % (net,station,location,channel) print(channel_id) ewtrace.plot() # In[26]: ewtrace.detrend('linear') ewtrace.detrend('demean') #ewtrace.taper(max_percentage=0.05, type='cosine') ewtrace.plot() # In[27]: ewtrace.filter('highpass',freq=0.02,zerophase=True,corners=4) pga = np.max(np.abs(ewtrace.data)) print('Peak acceleration of %s is %.2f m/s^2' % (channel_id,pga)) vtrace = ewtrace.copy() vtrace.integrate() pgv = np.max(np.abs(vtrace.data)) print('Peak velocity of %s is %.2f m/s' % (channel_id,pgv)) vtrace.plot() # #### Stopping IPython # In the Terminal window where you started IPython, hit Ctrl-C and answer 'y' when presented with this question: # # *Shutdown this notebook server (y/[n])?* # ##
    Resources
    # * This demonstration: https://github.com/mhearne-usgs/ipythondemo # * Markdown Syntax: http://daringfireball.net/projects/markdown/ # * Python Tutorial for Scientists: http://nbviewer.ipython.org/gist/rpmuller/5920182 # * IPython Tutorial: http://ipython.org/ipython-doc/2/interactive/tutorial.html # * SciPy (core numerical libraries): http://docs.scipy.org/doc/scipy/reference/ # * Matplotlib (generic 2D and 3D plotting): http://matplotlib.org/api/pyplot_summary.html and http://matplotlib.org/gallery.html # * Basemap (Geographic maps): http://matplotlib.org/basemap/api/basemap_api.html and http://matplotlib.org/basemap/users/examples.html # * Shapely (Geometric shapes and operations on them): http://toblerity.org/shapely/manual.html # * Fiona (Reading in spatial vector data): http://toblerity.org/fiona/manual.html # * ObsPy (seismological tools): http://docs.obspy.org/tutorial/ # * Pandas (data modeling/analysis tools): http://pandas.pydata.org/ # ##
    Other Possible Topics:
    # - Using nbconvert to save Notebook as PDF, HTML, Python script... # - SymPy : Symbolic math package - like Mathematica # - More on Pandas: Implementation of R-like Data Frame object # - More detailed overview of ObsPy - triggers, picks, etc. # - Scipy modules for: # - statistics # - function optimization # - interpolation # - Fourier transforms # - signal processing # - linear algebra # - machine learning (Bayesian, logistic regression, etc., etc.) # # In[28]: with plt.xkcd(): x = np.arange(0,12) y = np.random.rand(12,1) plt.plot(x,y,'b') plt.xlabel('Months') plt.ylabel('Profits') # In[ ]: