Notebook

In [ ]:

The exercices work that way:

You have a cell with TODOs that raise errors with a description of what is needed. Do that.
Then run the cell(ctrl-enter) to execute it.
It should print "Success" at the end (there is validation code in the cell). If not, try again.
If you want to see the solution, execute the cell that start with "%load" after the exercice.

First, there are Theano exercices, then 1 scan specific exercics, then some exercices related to the LSTM example.

In [ ]:

# Exercices 1
# This exercice ask you to create Theano variable and do some
# computation on them.
import numpy as np
from theano import function
raise NotImplementedError("TODO: add any other imports you need")


def make_scalar():
    """
    Returns a new Theano scalar.
    """

    raise NotImplementedError("TODO: implement this function.")


def log(x):
    """
    Returns the logarithm of a Theano scalar x.
    """

    raise NotImplementedError("TODO: implement this function.")


def add(x, y):
    """
    Adds two theano scalars together and returns the result.
    """

    raise NotImplementedError("TODO: implement this function.")
    
# The following code use your code and test it.
a = make_scalar()
b = make_scalar()
c = log(b)
d = add(a, c)
f = function([a, b], d)
a = np.cast[a.dtype](1.)
b = np.cast[b.dtype](2.)
actual = f(a, b)
expected = 1. + np.log(2.)
assert np.allclose(actual, expected)
print "SUCCESS!"

In [ ]:

%load 01_scalar_soln.py

In [ ]:

# Exercices 2
# This exercices ask you to make Theano variable, elemwise
# multiplication and matrix/vector dot product.
import numpy as np
from theano import function
raise NotImplementedError("TODO: add any other imports you need")


def make_vector():
    """
    Returns a new Theano vector.
    """

    raise NotImplementedError("TODO: implement this function.")


def make_matrix():
    """
    Returns a new Theano matrix.
    """

    raise NotImplementedError("TODO: implement this function.")

def elemwise_mul(a, b):
    """
    a: A theano matrix
    b: A theano matrix
    Returns the elementwise product of a and b
    """

    raise NotImplementedError("TODO: implement this function.")


def matrix_vector_mul(a, b):
    """
    a: A theano matrix
    b: A theano vector
    Returns the matrix-vector product of a and b
    """

    raise NotImplementedError("TODO: implement this function.")

# The following code use your code and test it.
a = make_vector()
b = make_vector()
c = elemwise_mul(a, b)
d = make_matrix()
e = matrix_vector_mul(d, c)

f = function([a, b, d], e)

rng = np.random.RandomState([1, 2, 3])
a_value = rng.randn(5).astype(a.dtype)
b_value = rng.rand(5).astype(b.dtype)
c_value = a_value * b_value
d_value = rng.randn(5, 5).astype(d.dtype)
expected = np.dot(d_value, c_value)

actual = f(a_value, b_value, d_value)
assert np.allclose(actual, expected)
print "SUCCESS!"

In [ ]:

%load 02_vector_mat_soln.py

In [ ]:

# Exercices 3
# This exercices ask you to create Theano tensor variable, do
# broadcastable addition and to compute the max over part of a tensor.
import numpy as np
from theano import function
raise NotImplementedError("TODO: add any other imports you need")


def make_tensor(dim):
    """
    Returns a new Theano tensor with no broadcastable dimensions.
    dim: the total number of dimensions of the tensor.
    (You can use any dtype you like)
    """

    raise NotImplementedError("TODO: implement this function.")


def broadcasted_add(a, b):
    """
    a: a 3D theano tensor
    b: a 4D theano tensor
    Returns c, a 4D theano tensor, where

    c[i, j, k, l] = a[l, k, i] + b[i, j, k, l]

    for all i, j, k, l
    """

    raise NotImplementedError("TODO: implement this function.")

def partial_max(a):
    """
    a: a 4D theano tensor

    Returns b, a theano matrix, where

    b[i, j] = max_{k,l} a[i, k, l, j]

    for all i, j
    """

    raise NotImplementedError("TODO: implement this function.")

# The following code use your code and test it.
a = make_tensor(3)
b = make_tensor(4)
c = broadcasted_add(a, b)
d = partial_max(c)

f = function([a, b], d)

rng = np.random.RandomState([1, 2, 3])
a_value = rng.randn(2, 2, 2).astype(a.dtype)
b_value = rng.rand(2, 2, 2, 2).astype(b.dtype)
c_value = np.transpose(a_value, (2, 1, 0))[:, None, :, :] + b_value
expected = c_value.max(axis=1).max(axis=1)

actual = f(a_value, b_value)

assert np.allclose(actual, expected), (actual, expected)
print "SUCCESS!"

In [ ]:

%load 03_tensor_soln.py

In [ ]:

# Exercices 4
# This exercice ask you to compile a Theano functiont and call it to
# execute "x + y".
from theano import tensor as T
raise NotImplementedError("TODO: add any other imports you need")


def evaluate(x, y, expr, x_value, y_value):
    """
    x: A theano variable
    y: A theano variable
    expr: A theano expression involving x and y
    x_value: A numpy value
    y_value: A numpy value

    Returns the value of expr when x_value is substituted for x
    and y_value is substituted for y
    """

    raise NotImplementedError("TODO: implement this function.")


# The following code use your code and test it.
x = T.iscalar()
y = T.iscalar()
z = x + y
assert evaluate(x, y, z, 1, 2) == 3
print "SUCCESS!"

In [ ]:

%load 04_function_soln.py

In [ ]:

# Exercices 5
# This exercice make you use shared variable. You must create them and
# update them by swapping 2 shared variables values.
import numpy as np
raise NotImplementedError("TODO: add any other imports you need")


def make_shared(shape):
    """
    Returns a theano shared variable containing a tensor of the specified
    shape.
    You can use any value you want.
    """
    raise NotImplementedError("TODO: implement the function")


def exchange_shared(a, b):
    """
    a: a theano shared variable
    b: a theano shared variable
    Uses get_value and set_value to swap the values stored in a and b
    """
    raise NotImplementedError("TODO: implement the function")


def make_exchange_func(a, b):
    """
    a: a theano shared variable
    b: a theano shared variable
    Returns f
    where f is a theano function, that, when called, swaps the
    values in a and b
    f should not return anything
    """
    raise NotImplementedError("TODO: implement the function")


# The following code use your code and test it.
a = make_shared((5, 4, 3))
assert a.get_value().shape == (5, 4, 3)
b = make_shared((5, 4, 3))
assert a.get_value().shape == (5, 4, 3)
a.set_value(np.zeros((5, 4, 3), dtype=a.dtype))
b.set_value(np.ones((5, 4, 3), dtype=b.dtype))
exchange_shared(a, b)
assert np.all(a.get_value() == 1.)
assert np.all(b.get_value() == 0.)
f = make_exchange_func(a, b)
rval = f()
assert isinstance(rval, list)
assert len(rval) == 0
assert np.all(a.get_value() == 0.)
assert np.all(b.get_value() == 1.)

print "SUCCESS!"

In [ ]:

%load 05_shared_soln.py

In [ ]:

# Exercices 6
# This exercice make use Theano symbolic grad
from theano import tensor as T


def grad_sum(x, y, z):
    """
    x: A theano variable
    y: A theano variable
    z: A theano expression involving x and y

    Returns dz / dx + dz / dy
    """
    raise NotImplementedError("TODO: implement this function.")


# The following code use your code and test it.
x = T.scalar()
y = T.scalar()
z = x + y
s = grad_sum(x, y, z)
assert s.eval({x: 0, y: 0}) == 2
print "SUCCESS!"

In [ ]:

%load 06_grad_soln.py

In [ ]:

# Exercice 7 #TODO: talk about mode=FAST_COMPILE
# This code have a bug. Run this cell to see it.
# Use Theano flag (easy in shell, harder in ipython) or extra parameter to a function 
# to find the cause and fix it.
# Do not find the bug by inspecting the code. This is to help you find the bug
# in more complicated case when code inspection isn't working well.

import numpy as np
from theano import function
from theano import tensor as T
a = T.vector()
b = T.log(a)
c = T.nnet.sigmoid(b)
d = T.sqrt(c)
e = T.concatenate((d, c), axis=0)
f = b * c * d
g = e + f
h = g / c
fn = function([a], h)
fn(np.ones((3,)).astype(a.dtype))

In [ ]:

%load 07_mode.py

In [ ]:

# Exercice 8
# This exercice is different. The initial version work.
# So you must modify it as described bellow and it should still give the same output.

# Modify and execute the polynomial example to have the reduction(the sum() call) done by scan.
import numpy
import theano
import theano.tensor as T
theano.config.warn.subtensor_merge_bug = False

coefficients = theano.tensor.vector("coefficients")
x = T.scalar("x")
max_coefficients_supported = 10000

# Generate the components of the polynomial
full_range=theano.tensor.arange(max_coefficients_supported)
components, updates = theano.scan(fn=lambda coeff, power, free_var:
                                   coeff * (free_var ** power),
                                outputs_info=None,
                                sequences=[coefficients, full_range],
                                non_sequences=x)

polynomial = components.sum()
calculate_polynomial = theano.function(inputs=[coefficients, x],
                                     outputs=polynomial)

test_coeff = numpy.asarray([1, 0, 2], dtype=numpy.float32)
print calculate_polynomial(test_coeff, 3)
# 19.0

In [ ]:

%load 08_scan_polynomial_soln.py

LSTM Exercice

In [ ]:

1) Modif LSTM: Reverse the input sequence and try it like that:
   Sutskever-NIPS2014 (No change to Theano code, but useful to better understand how to make 2)
2) Modif LSTM: Add to have 2 LSTM layers. The new one take
   the input in the reverse order. Then you concatenate the mean
   of the outputs of both LSTM to the logistic regression.
3) Modif LSTM: Add the V_o parameter and use it. (No solutions provided)
    
Note. 2) need more epoch before we start to see that it learn something. With max_epochs=16, we start to see it for all version.

You can load the original example code in the next cell.
Run it once. It will charge the data.
At the end of that code, there is in comment example how to run it for a short time (~10m on my laptop, core i5).
During that time, we see that is start to learn, but I do not let it go too long for this tutorial.

In [5]:

%load lstm.py

In [2]:

%load lstm_reverse.diff

In [4]:

%load lstm_double.diff