#!/usr/bin/env python # coding: utf-8 # # NumPy arrays # Nikolay Koldunov # # koldunovn@gmail.com # ================ # # - a powerful N-dimensional array object # - sophisticated (broadcasting) functions # - tools for integrating C/C++ and Fortran code # - useful linear algebra, Fourier transform, and random number capabilities # # In[1]: import numpy as np get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: np.set_printoptions(precision=3 , suppress= True) # this is just to make the output look better # ## Load data # Load data in to a variable: # In[3]: temp = np.loadtxt('Ham_3column.txt') # In[4]: temp # In[5]: temp.shape # # In[6]: temp.size # So it's a *row-major* order. Matlab and Fortran use *column-major* order for arrays. # In[7]: type(temp) # Numpy arrays are statically typed, which allow faster operations # In[8]: temp.dtype # You can't assign value of different type to element of the numpy array: # In[9]: temp[0,0] = 'Year' # Slicing works similarly to Matlab: # In[10]: temp[0:5,:] # In[11]: temp[-5:-1,:] # One can look at the data. This is done by matplotlib module: # In[12]: import matplotlib.pylab as plt plt.plot(temp[:,3]) # ## Index slicing # In general it is similar to Matlab # First 12 elements of **second** column (months). Remember that indexing starts with 0: # In[13]: temp[0:12,2] # First raw: # In[14]: temp[0,:] # ## Exercise # # - Plot only first 1000 values # - Plot last 1000 values # # In[ ]: # In[ ]: # In[ ]: # We can create mask, selecting all raws where values in third raw (days) equals 10: # In[15]: mask = (temp[:,2]==10) # Here we apply this mask and show only first 5 raws of the array: # In[16]: temp[mask][:20,:] # You don't have to create separate variable for mask, but apply it directly. Here instead of first five rows I show five last rows: # In[17]: temp[temp[:,2]==10][-5:,:] # You can combine conditions. In this case we select days from 10 to 12 (only first 10 elements are shown): # In[18]: temp[(temp[:,2]>=10)&(temp[:,2]<=12)][0:10,:] # ## Exercise # # Select only summer months # Select only first half of the year # # In[ ]: # ## Basic operations # Create example array from first 12 values of second column and perform some basic operations: # In[19]: days = temp[0:12,2] days # In[20]: days+10 # In[21]: days*20 # In[22]: days*days # What's wrong with this figure? # In[23]: plt.plot(temp[:100,3]) # ## Exercise # # - Create new array that will contain only temperatures # # - Convert temperature to deg C # # - Convert all temperatures to deg F # # In[ ]: # ## Basic statistics # Create *temp_values* that will contain only data values: # In[24]: temp_values = temp[:,3]/10. temp_values # Simple statistics: # In[25]: temp_values.min() # In[26]: temp_values.max() # In[27]: temp_values.mean() # In[28]: temp_values.std() # In[29]: temp_values.sum() # You can also use *sum* function: # In[30]: np.sum(temp_values) # One can make operations on the subsets: # ## Exercise # # Calculate mean for first 1000 values of temperature # # In[ ]: # ## Saving data # You can save your data as a text file # In[31]: np.savetxt('temp_only_values.csv',temp[:, 3]/10., fmt='%.4f') # [Python formatting options](https://pyformat.info/) # Head of resulting file: # In[32]: get_ipython().system('head temp_only_values.csv') # You can also save it as binary: # In[33]: f=open('temp_only_values.bin', 'w') temp[:,3].tofile(f) f.close() # ## Exercises # # * Select and plot only data for October # * Calculate monthly means for years from 1990 to 1999 and plot them