#!/usr/bin/env python # coding: utf-8 # #GLACINDIA Workshop # ##Part 6: Numpy arrays # Nikolay Koldunov # # koldunovn@gmail.com # - a powerful N-dimensional array object # - sophisticated (broadcasting) functions # - tools for integrating C/C++ and Fortran code # - useful linear algebra, Fourier transform, and random number capabilities # # In[1]: import numpy as np get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: np.set_printoptions(precision=3 , suppress= True) # this is just to make the output look better # ## Load data # We going to work with data from [GHCN (Global Historical Climatology Network)-Daily](http://www.ncdc.noaa.gov/oa/climate/ghcn-daily/) data. # # Convinient way to select data from there is to use [KNMI Climatological Service](http://climexp.knmi.nl/selectdailyseries.cgi?id) # # Load data in to a variable (Delhi daily air temperatures): # In[3]: ls # In[4]: temp = np.loadtxt('DelhiTmax.txt') # We load data in to the spetiall variable called `numpy array`. This is homogeneous multidimensional array. It is a table of elements (usually numbers), all of the same type. Numpy arrays are basic elements of almost all python based scientific software. # In[5]: type(temp) # In[6]: temp # Shape of the array can be viewed as a `size of the table` that contain data: # In[7]: temp.shape # # However this `tables` can have 3 and more dimentions. # So it's a *row-major* order. Matlab and Fortran use *column-major* order for arrays. # Numpy arrays are statically typed, which allow faster operations # In[8]: temp.dtype # You can't assign value of different type to element of the numpy array: # In[9]: temp[0,0] = 'Year' # Slicing works similarly to Matlab: # In[10]: temp[0:5,:] # In[11]: temp[-5:-1,:] # One can look at the data. This is done by matplotlib module: # In[12]: import matplotlib.pylab as plt plt.plot(temp[:,3]) # ## Index slicing # In general it is similar to Matlab # First 12 elements of **second** column (months). Remember that indexing starts with 0: # In[13]: temp[0:12,1] # First raw: # In[14]: temp[:10,:] # ##Exercise # # - Plot only first 1000 values # - Plot last 1000 values # # We can create mask, selecting all raws where values in third raw (days) equals 10: # In[15]: mask = (temp[:,2]==10) # Here we apply this mask and show only first 5 raws of the array: # In[16]: temp[mask][:20,:] # You don't have to create separate variable for mask, but apply it directly. Here instead of first five rows I show five last rows: # In[17]: temp[temp[:,2]==10][-5:,:] # You can combine conditions. In this case we select days from 10 to 12 (only first 10 elements are shown): # In[18]: temp[(temp[:,2]>=10)&(temp[:,2]<=12)][0:10,:] # ##Exercise # # Select only summer months # Select only first half of the year # # ## Basic operations # Create example array from first 12 values of second column and perform some basic operations: # In[19]: days = temp[0:12,2] days # In[20]: days+10 # In[21]: days*20 # In[22]: days*days # In[23]: np.sin(days) # ##Exercise # # - Create new array that will contain only temperatures # # - Convert all temperatures to deg F # # # ## Basic statistics # Create *temp_values* that will contain only data values: # In[24]: temp_values = temp[:,3] temp_values # Simple statistics: # In[25]: temp_values.min() # In[26]: temp_values.max() # In[27]: temp_values.mean() # In[28]: temp_values.std() # In[29]: temp_values.sum() # You can also use *sum* function: # In[30]: np.sum(temp_values) # One can make operations on the subsets: # ##Exercise # # Calculate mean for first 1000 values of temperature # # ## Saving data # You can save your data as a text file # In[31]: np.savetxt('temp_only_values.csv',temp[:, 3], fmt='%.4f') # ##Exercises # # * Select and plot only data for October # * Calculate monthly means for years from 1990 to 1999 and plot them # In[ ]: