Tutorial Setup

Check your install

In [ ]:
import numpy
In [ ]:
import scipy
In [ ]:
import matplotlib
In [ ]:
import sklearn
In [ ]:
import psutil
In [ ]:
import pandas
In [ ]:
import IPython.parallel

Finding the location of an installed package and its version:

In [ ]:
numpy.__path__
In [ ]:
numpy.__version__

Check that you have the datasets

In [ ]:
%run ../fetch_data.py
# %run ../fetch_data.py twenty_newsgroups sentiment140 covertype
In [ ]:
import os
for fname in os.listdir('../datasets/'):
    print(fname)

A NumPy primer

NumPy array dtypes and shapes

In [ ]:
import numpy as np
In [ ]:
a = np.array([1, 2, 3])
In [ ]:
a
In [ ]:
b = np.array([[0, 2, 4], [1, 3, 5]])
In [ ]:
b
In [ ]:
b.shape
In [ ]:
b.dtype
In [ ]:
a.shape
In [ ]:
a.dtype
In [ ]:
np.zeros(5)
In [ ]:
np.ones(shape=(3, 4), dtype=np.int32)

Common array operations

In [ ]:
c = b * 0.5
In [ ]:
c
In [ ]:
c.shape
In [ ]:
c.dtype
In [ ]:
a
In [ ]:
d = a + c
In [ ]:
d
In [ ]:
d[0]
In [ ]:
d[0, 0]
In [ ]:
d[:, 0]
In [ ]:
d.sum()
In [ ]:
d.mean()
In [ ]:
d.sum(axis=0)
In [ ]:
d.mean(axis=1)

Reshaping and inplace update

In [ ]:
e = np.arange(12)
In [ ]:
e
In [ ]:
f = e.reshape(3, 4)
In [ ]:
f
In [ ]:
e
In [ ]:
e[5:] = 0
In [ ]:
e
In [ ]:
f

Combining arrays

In [ ]:
a
In [ ]:
b
In [ ]:
d
In [ ]:
np.concatenate([a, a, a])
In [ ]:
np.vstack([a, b, d])
In [ ]:
np.hstack([b, d])

A Matplotlib primer

In [ ]:
%matplotlib inline
In [ ]:
import matplotlib.pyplot as plt
In [ ]:
x = np.linspace(0, 2, 10)
In [ ]:
x
In [ ]:
plt.plot(x, 'o-');
In [ ]:
plt.plot(x, x, 'o-', label='linear')
plt.plot(x, x ** 2, 'x-', label='quadratic')

plt.legend(loc='best')
plt.title('Linear vs Quadratic progression')
plt.xlabel('Input')
plt.ylabel('Output');
In [ ]:
samples = np.random.normal(loc=1.0, scale=0.5, size=1000)
In [ ]:
samples.shape
In [ ]:
samples.dtype
In [ ]:
samples[:30]
In [ ]:
plt.hist(samples, bins=50);
In [ ]:
samples_1 = np.random.normal(loc=1, scale=.5, size=10000)
samples_2 = np.random.standard_t(df=10, size=10000)
In [ ]:
bins = np.linspace(-3, 3, 50)
_ = plt.hist(samples_1, bins=bins, alpha=0.5, label='samples 1')
_ = plt.hist(samples_2, bins=bins, alpha=0.5, label='samples 2')
plt.legend(loc='upper left');
In [ ]:
plt.scatter(samples_1, samples_2, alpha=0.1);
In [ ]:
 
In [ ]: