#!/usr/bin/env python
# coding: utf-8

# # Numpy and Matplotlib #
# 
# These are two of the most fundamental parts of the scientific python "ecosystem". Most everything else is built on top of them.
# 

# In[8]:


import numpy as np


# What did we just do? We _imported_ a package. This brings new variables (mostly functions) into our interpreter. We access them as follows.

# In[5]:


# find out what is in our namespace
dir()


# In[7]:


# find out what's in numpy
dir(np)


# In[9]:


# find out what version we have
np.__version__


# The numpy documentation is crucial!
# 
# http://docs.scipy.org/doc/numpy/reference/
# 
# ## NDArrays ##
# 
# The core class is the numpy ndarray (n-dimensional array).

# In[14]:


from IPython.display import Image
Image(url='http://docs.scipy.org/doc/numpy/_images/threefundamental.png')


# In[10]:


# create an array from a list
a = np.array([9,0,2,1,0])


# In[11]:


# find out the datatype
a.dtype


# In[12]:


# find out the shape
a.shape


# In[15]:


# what is the shape
type(a.shape)


# In[21]:


# another array with a different datatype and shape
b = np.array([[5,3,1,9],[9,2,3,0]], dtype=np.float64)


# In[22]:


# check dtype and shape
b.dtype, b.shape


# __Important Concept__: The fastest varying dimension is the last dimension! The outer level of the hierarchy is the first dimension. (This is called "c-style" indexing)

# ## More array creation ##
# 
# There are lots of ways to create arrays.

# In[31]:


# create some uniform arrays
c = np.zeros((9,9))
d = np.ones((3,6,3), dtype=np.complex128)
e = np.full((3,3), np.pi)
e = np.ones_like(c)
f = np.zeros_like(d)


# In[34]:


# create some ranges
np.arange(10)


# In[38]:


# arange is left inclusive, right exclusive
np.arange(2,4,0.25)


# In[43]:


# linearly spaced
np.linspace(2,4,20)


# In[44]:


# log spaced
np.logspace(1,2,10)


# In[88]:


# two dimensional grids
x = np.linspace(-2*np.pi, 2*np.pi, 100)
y = np.linspace(-np.pi, np.pi, 50)
xx, yy = np.meshgrid(x, y)
xx.shape, yy.shape


# ## Indexing ##
# 
# Basic indexing is similar to lists

# In[130]:


# get some individual elements of xx
xx[0,0], xx[-1,-1], xx[3,-5]


# In[131]:


# get some whole rows and columns
xx[0].shape, xx[:,-1].shape


# In[132]:


# get some ranges
xx[3:10,30:40].shape


# There are many advanced ways to index arrays. You can read about them in the manual. Here is one example.

# In[134]:


# use a boolean array as an index
idx = xx<0
yy[idx].shape


# In[136]:


# the array got flattened
xx.ravel().shape


# ## Array Operations ##
# 
# There are a huge number of operations available on arrays. All the familiar arithemtic operators are applied on an element-by-element basis.
# 
# ### Basic Math ##

# In[89]:


f = np.sin(xx) * np.cos(0.5*yy)


# At this point you might be getting curious what these arrays "look" like. So we need to introduce some visualization.

# In[90]:


from matplotlib import pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')


# In[91]:


plt.pcolormesh(f)


# ## Manipulating array dimensions ##

# In[92]:


# transpose
plt.pcolormesh(f.T)


# In[93]:


# reshape an array (wrong size)
g = np.reshape(f, (8,9))


# In[96]:


# reshape an array (right size) and mess it up
print(f.size)
g = np.reshape(f, (200,25))
plt.pcolormesh(g)


# In[99]:


# tile an array
plt.pcolormesh(np.tile(f,(6,1)))


# ## Broadcasting ##
# 
# Broadcasting is an efficient way to multiply arrays of different sizes
# 

# In[191]:


Image(url='http://scipy-lectures.github.io/_images/numpy_broadcasting.png',
     width=720)


# In[103]:


# multiply f by x
print(f.shape, x.shape)
g = f * x
print(g.shape)


# In[104]:


# multiply f by y
print(f.shape, y.shape)
h = f * y
print(h.shape)


# In[109]:


# use newaxis special syntax
h = f * y[:,np.newaxis]
print(h.shape)


# In[116]:


plt.pcolormesh(g)


# ## Reduction Operations ##

# In[140]:


# sum
g.sum()


# In[139]:


# mean
g.mean()


# In[141]:


# std
g.std()


# In[142]:


# apply on just one axis
g_ymean = g.mean(axis=0)
g_xmean = g.mean(axis=1)


# In[143]:


plt.plot(x, g_ymean)


# In[145]:


plt.plot(g_xmean, y)


# ## Fancy Plotting ##
# 
# Enough lessons, let's have some fun.

# In[149]:


fig = plt.figure(figsize=(12,8))
ax1 = plt.subplot2grid((6,6),(0,1),colspan=5)
ax2 = plt.subplot2grid((6,6),(1,0),rowspan=5)


# In[156]:


fig = plt.figure(figsize=(10,6))
ax1 = plt.subplot2grid((6,6),(0,1),colspan=5)
ax2 = plt.subplot2grid((6,6),(1,0),rowspan=5)
ax3 = plt.subplot2grid((6,6),(1,1),rowspan=5, colspan=5)

ax1.plot(x, g_ymean)
ax2.plot(g_xmean, y)
ax3.pcolormesh(x, y,  g)

ax1.set_xlim([x.min(), x.max()])
ax3.set_xlim([x.min(), x.max()])
ax2.set_ylim([y.min(), y.max()])
ax3.set_ylim([y.min(), y.max()])

plt.tight_layout()


# ## Real Data ##
# 
# ARGO float profile from North Atlantic

# In[174]:


# download with curl
get_ipython().system('curl -O http://www.ldeo.columbia.edu/~rpa/argo_float_4901412.npz')


# In[175]:


# load numpy file and examine keys
data = np.load('argo_float_4901412.npz')
data.keys()


# In[176]:


# access some data
T = data['T']


# In[177]:


# there are "nans", missing data, which screw up our routines
T.min()


# ## Masked Arrays ##
# 
# This is how we deal with missing data in numpy

# In[185]:


# create masked array
T = np.ma.masked_invalid(data['T'])
type(T)


# In[186]:


# max and min
T.max(), T.min()


# In[187]:


# load other data
S = np.ma.masked_invalid(data['S'])
P = np.ma.masked_invalid(data['P'])


# In[188]:


# scatter plot
plt.scatter(S, T, c=P)
plt.grid()
plt.colorbar()


# # Individual Exercise #
# 
# * Calculate and plot the depth mean profile of T, S and P with error bars to indicate the standard deviation.
# * Calculate the timeseries of the depth-averaged profiles of T, S, and P

# In[ ]: