#!/usr/bin/env python
# coding: utf-8
# # NumPy arrays
# Nikolay Koldunov
#
# koldunovn@gmail.com
# ================
#
# - a powerful N-dimensional array object
# - sophisticated (broadcasting) functions
# - tools for integrating C/C++ and Fortran code
# - useful linear algebra, Fourier transform, and random number capabilities
#
# In[1]:
import numpy as np
get_ipython().run_line_magic('matplotlib', 'inline')
# In[2]:
np.set_printoptions(precision=3 , suppress= True) # this is just to make the output look better
# ## Load data
# Load data in to a variable:
# In[3]:
temp = np.loadtxt('Ham_3column.txt')
# In[4]:
temp
# In[5]:
temp.shape
#
# In[6]:
temp.size
# So it's a *row-major* order. Matlab and Fortran use *column-major* order for arrays.
# In[7]:
type(temp)
# Numpy arrays are statically typed, which allow faster operations
# In[8]:
temp.dtype
# You can't assign value of different type to element of the numpy array:
# In[9]:
temp[0,0] = 'Year'
# Slicing works similarly to Matlab:
# In[10]:
temp[0:5,:]
# In[11]:
temp[-5:-1,:]
# One can look at the data. This is done by matplotlib module:
# In[12]:
import matplotlib.pylab as plt
plt.plot(temp[:,3])
# ## Index slicing
# In general it is similar to Matlab
# First 12 elements of **second** column (months). Remember that indexing starts with 0:
# In[13]:
temp[0:12,2]
# First raw:
# In[14]:
temp[0,:]
# ## Exercise
#
# - Plot only first 1000 values
# - Plot last 1000 values
#
# In[ ]:
# In[ ]:
# In[ ]:
# We can create mask, selecting all raws where values in third raw (days) equals 10:
# In[15]:
mask = (temp[:,2]==10)
# Here we apply this mask and show only first 5 raws of the array:
# In[16]:
temp[mask][:20,:]
# You don't have to create separate variable for mask, but apply it directly. Here instead of first five rows I show five last rows:
# In[17]:
temp[temp[:,2]==10][-5:,:]
# You can combine conditions. In this case we select days from 10 to 12 (only first 10 elements are shown):
# In[18]:
temp[(temp[:,2]>=10)&(temp[:,2]<=12)][0:10,:]
# ## Exercise
#
# Select only summer months
# Select only first half of the year
#
# In[ ]:
# ## Basic operations
# Create example array from first 12 values of second column and perform some basic operations:
# In[19]:
days = temp[0:12,2]
days
# In[20]:
days+10
# In[21]:
days*20
# In[22]:
days*days
# What's wrong with this figure?
# In[23]:
plt.plot(temp[:100,3])
# ## Exercise
#
# - Create new array that will contain only temperatures
#
# - Convert temperature to deg C
#
# - Convert all temperatures to deg F
#
# In[ ]:
# ## Basic statistics
# Create *temp_values* that will contain only data values:
# In[24]:
temp_values = temp[:,3]/10.
temp_values
# Simple statistics:
# In[25]:
temp_values.min()
# In[26]:
temp_values.max()
# In[27]:
temp_values.mean()
# In[28]:
temp_values.std()
# In[29]:
temp_values.sum()
# You can also use *sum* function:
# In[30]:
np.sum(temp_values)
# One can make operations on the subsets:
# ## Exercise
#
# Calculate mean for first 1000 values of temperature
#
# In[ ]:
# ## Saving data
# You can save your data as a text file
# In[31]:
np.savetxt('temp_only_values.csv',temp[:, 3]/10., fmt='%.4f')
# [Python formatting options](https://pyformat.info/)
# Head of resulting file:
# In[32]:
get_ipython().system('head temp_only_values.csv')
# You can also save it as binary:
# In[33]:
f=open('temp_only_values.bin', 'w')
temp[:,3].tofile(f)
f.close()
# ## Exercises
#
# * Select and plot only data for October
# * Calculate monthly means for years from 1990 to 1999 and plot them