#!/usr/bin/env python # coding: utf-8 # # Numpy and Matplotlib # # # These are two of the most fundamental parts of the scientific python "ecosystem". Most everything else is built on top of them. # # In[8]: import numpy as np # What did we just do? We _imported_ a package. This brings new variables (mostly functions) into our interpreter. We access them as follows. # In[5]: # find out what is in our namespace dir() # In[7]: # find out what's in numpy dir(np) # In[9]: # find out what version we have np.__version__ # The numpy documentation is crucial! # # http://docs.scipy.org/doc/numpy/reference/ # # ## NDArrays ## # # The core class is the numpy ndarray (n-dimensional array). # In[14]: from IPython.display import Image Image(url='http://docs.scipy.org/doc/numpy/_images/threefundamental.png') # In[10]: # create an array from a list a = np.array([9,0,2,1,0]) # In[11]: # find out the datatype a.dtype # In[12]: # find out the shape a.shape # In[15]: # what is the shape type(a.shape) # In[21]: # another array with a different datatype and shape b = np.array([[5,3,1,9],[9,2,3,0]], dtype=np.float64) # In[22]: # check dtype and shape b.dtype, b.shape # __Important Concept__: The fastest varying dimension is the last dimension! The outer level of the hierarchy is the first dimension. (This is called "c-style" indexing) # ## More array creation ## # # There are lots of ways to create arrays. # In[31]: # create some uniform arrays c = np.zeros((9,9)) d = np.ones((3,6,3), dtype=np.complex128) e = np.full((3,3), np.pi) e = np.ones_like(c) f = np.zeros_like(d) # In[34]: # create some ranges np.arange(10) # In[38]: # arange is left inclusive, right exclusive np.arange(2,4,0.25) # In[43]: # linearly spaced np.linspace(2,4,20) # In[44]: # log spaced np.logspace(1,2,10) # In[88]: # two dimensional grids x = np.linspace(-2*np.pi, 2*np.pi, 100) y = np.linspace(-np.pi, np.pi, 50) xx, yy = np.meshgrid(x, y) xx.shape, yy.shape # ## Indexing ## # # Basic indexing is similar to lists # In[130]: # get some individual elements of xx xx[0,0], xx[-1,-1], xx[3,-5] # In[131]: # get some whole rows and columns xx[0].shape, xx[:,-1].shape # In[132]: # get some ranges xx[3:10,30:40].shape # There are many advanced ways to index arrays. You can read about them in the manual. Here is one example. # In[134]: # use a boolean array as an index idx = xx<0 yy[idx].shape # In[136]: # the array got flattened xx.ravel().shape # ## Array Operations ## # # There are a huge number of operations available on arrays. All the familiar arithemtic operators are applied on an element-by-element basis. # # ### Basic Math ## # In[89]: f = np.sin(xx) * np.cos(0.5*yy) # At this point you might be getting curious what these arrays "look" like. So we need to introduce some visualization. # In[90]: from matplotlib import pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[91]: plt.pcolormesh(f) # ## Manipulating array dimensions ## # In[92]: # transpose plt.pcolormesh(f.T) # In[93]: # reshape an array (wrong size) g = np.reshape(f, (8,9)) # In[96]: # reshape an array (right size) and mess it up print(f.size) g = np.reshape(f, (200,25)) plt.pcolormesh(g) # In[99]: # tile an array plt.pcolormesh(np.tile(f,(6,1))) # ## Broadcasting ## # # Broadcasting is an efficient way to multiply arrays of different sizes # # In[191]: Image(url='http://scipy-lectures.github.io/_images/numpy_broadcasting.png', width=720) # In[103]: # multiply f by x print(f.shape, x.shape) g = f * x print(g.shape) # In[104]: # multiply f by y print(f.shape, y.shape) h = f * y print(h.shape) # In[109]: # use newaxis special syntax h = f * y[:,np.newaxis] print(h.shape) # In[116]: plt.pcolormesh(g) # ## Reduction Operations ## # In[140]: # sum g.sum() # In[139]: # mean g.mean() # In[141]: # std g.std() # In[142]: # apply on just one axis g_ymean = g.mean(axis=0) g_xmean = g.mean(axis=1) # In[143]: plt.plot(x, g_ymean) # In[145]: plt.plot(g_xmean, y) # ## Fancy Plotting ## # # Enough lessons, let's have some fun. # In[149]: fig = plt.figure(figsize=(12,8)) ax1 = plt.subplot2grid((6,6),(0,1),colspan=5) ax2 = plt.subplot2grid((6,6),(1,0),rowspan=5) # In[156]: fig = plt.figure(figsize=(10,6)) ax1 = plt.subplot2grid((6,6),(0,1),colspan=5) ax2 = plt.subplot2grid((6,6),(1,0),rowspan=5) ax3 = plt.subplot2grid((6,6),(1,1),rowspan=5, colspan=5) ax1.plot(x, g_ymean) ax2.plot(g_xmean, y) ax3.pcolormesh(x, y, g) ax1.set_xlim([x.min(), x.max()]) ax3.set_xlim([x.min(), x.max()]) ax2.set_ylim([y.min(), y.max()]) ax3.set_ylim([y.min(), y.max()]) plt.tight_layout() # ## Real Data ## # # ARGO float profile from North Atlantic # In[174]: # download with curl get_ipython().system('curl -O http://www.ldeo.columbia.edu/~rpa/argo_float_4901412.npz') # In[175]: # load numpy file and examine keys data = np.load('argo_float_4901412.npz') data.keys() # In[176]: # access some data T = data['T'] # In[177]: # there are "nans", missing data, which screw up our routines T.min() # ## Masked Arrays ## # # This is how we deal with missing data in numpy # In[185]: # create masked array T = np.ma.masked_invalid(data['T']) type(T) # In[186]: # max and min T.max(), T.min() # In[187]: # load other data S = np.ma.masked_invalid(data['S']) P = np.ma.masked_invalid(data['P']) # In[188]: # scatter plot plt.scatter(S, T, c=P) plt.grid() plt.colorbar() # # Individual Exercise # # # * Calculate and plot the depth mean profile of T, S and P with error bars to indicate the standard deviation. # * Calculate the timeseries of the depth-averaged profiles of T, S, and P # In[ ]: