#!/usr/bin/env python # coding: utf-8 # ## Convolution Neural Network (CNN) # In this notebook we show how to do the classification using a simple CNN. First we load the data and the necessary libraries. As in the previous notebook we could also load the whole data set. # # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import matplotlib.image as imgplot import cPickle as pickle import gzip with gzip.open('mnist_4000.pkl.gz', 'rb') as f: (X,y) = pickle.load(f) PIXELS = len(X[0,0,0,:]) X.shape, y.shape, PIXELS # In[5]: #from create_mnist import load_data_2d #X,y,PIXELS = load_data_2d('/home/dueo/dl-playground/data/mnist.pkl.gz') #X.shape, y # X contains the images and y contains the labels. # ### A first simple CNN # Now let's train a network using the loaded data. First we have to design the architecture of the network. # #### Definition of the network # We again use the simple definition using the class `NeuralNet` from `nolearn.lasagne` to create a network like this # # # In[2]: from lasagne import layers from lasagne import nonlinearities from nolearn.lasagne import NeuralNet net1 = NeuralNet( # Geometry of the network layers=[ ('input', layers.InputLayer), ('conv1', layers.Conv2DLayer), ('pool1', layers.MaxPool2DLayer), ('conv2', layers.Conv2DLayer), ('pool2', layers.MaxPool2DLayer), ('hidden4', layers.DenseLayer), ('output', layers.DenseLayer), ], input_shape=(None, 1, PIXELS, PIXELS), #None in the first axis indicates that the batch size can be set later conv1_num_filters=32, conv1_filter_size=(3, 3), pool1_pool_size=(2, 2), #pool_size used to be called ds in old versions of lasagne conv2_num_filters=64, conv2_filter_size=(2, 2), pool2_pool_size=(2, 2), hidden4_num_units=500, output_num_units=10, output_nonlinearity=nonlinearities.softmax, # learning rate parameters update_learning_rate=0.01, update_momentum=0.9, regression=False, # We only train for 10 epochs max_epochs=10, verbose=1, # Training test-set split eval_size = 0.2 ) # ####Training of the net. # As in the MLP example the data is split automatically into 80% training set and 20% test set. Since it takes quite a while to finish an epoch (at least with a CPU), we reduce the data to 1000 samples (800 for training and 200 for testing). Note also that the geometry makes sense. The first 3x3 convolution knocks off 2 pixels from the 28x28 images resulting in 26x26 images. Then the maxpooling with size 2x2 reduces these images to 13x13 pixels... # In[3]: net = net1.fit(X[0:1000,:,:,:],y[0:1000]) # Note this takes a bit time on a CPU (approx 7 sec) for each epoch. If running on the GPU it onlty takes about 0.2 sec for each epoch. # We have a trained classifier with which we can make predictions. # In[4]: net.predict(X[3000:3010,:,:,:]) # **That's basically all we need! ** We can make predictions on new data. # In the following I will show you how to store and reload the learned model. The reloaded model can then be further trained. # # ##### Storing the trained model # We now store the trained model using the pickle mechanism as follows: # In[5]: import cPickle as pickle with open('data/net1.pickle', 'wb') as f: pickle.dump(net, f, -1) # In[98]: get_ipython().run_line_magic('ls', '-rtlh data') # #### Loading a stored model # We load the model trained model again... # In[6]: import cPickle as pickle with open('data/net1.pickle', 'rb') as f: net_pretrain = pickle.load(f) # #### Training further (more iterations) # We can now take the net and train it for further iterations. We will see that the training loss already starts with the low value from the previous model. So the model is really reloaded. # In[7]: net_pretrain.fit(X[0:1000,:,:,:],y[0:1000]); # #### Training further (new data) # We can train also on new data. Now for 5 epochs... # In[8]: net_pretrain.max_epochs = 5 net_pretrain.fit(X[1000:2000,:,:,:],y[1000:2000]); # ### Evaluate the model # We now make predictions on unseen data. We have trained only on the images 0-1999. # In[9]: toTest = range(3001,3026) preds = net1.predict(X[toTest,:,:,:]) preds # Let's look at the correponding images. # In[10]: fig = plt.figure(figsize=(10,10)) for i,num in enumerate(toTest): a=fig.add_subplot(5,5,(i+1)) #NB the one based API sucks! plt.axis('off') a.set_title(str(preds[i]) + " (" + str(y[num]) + ")") plt.imshow(-X[num,0,:,:], interpolation='none',cmap=plt.get_cmap('gray')) # ## Miscelaneous # # ### Accessing the weights of the network # To caluclate the number of weights in the networks, we have to take the following layers into account: # # 1. First Convolutional Layer: 32x3x3 + 32 = 320(32 filter and 32 biases) # 2. Second Convolutional Layer: This layer goes from 32 to 64 using 32*64 = 2048 Kernels of size 2x2. So Altogether: 32x64x2x2 + 64 = 8256 weights are used. # 3. Fully connected layer (hidden4): The fully connected layers contains 500 nodes with connect to the 64 6x6 images for the last pooling layer. Hence, we have 500 x 6 x 6 x 64 + 500 = 1152500 weights # 4. Output layer: The 500 nodes of hidden4 are the fully connected to the 10 outnodes reflecting the 10 classes. Together with the bias, we have 5010 weights. # # So altogether for this toy model we already have about 1.2 Million parameters, much less then we have examples. A nightmare in classical statistic. Modern architectures like "Oxford Net" have more than 100 Million parameter. The weights can be obtained as follows (the biases are given in the one dimensional terms). # In[12]: import operator import numpy as np weights = [w.get_value() for w in net.get_all_params()] numParas = 0 for i, weight in enumerate(weights): n = reduce(operator.mul, np.shape(weight)) print(str(i), " ", str(np.shape(weight)), str(n)) numParas += n print("Number of parameters " + str(numParas)) # ### Visualizing the weights # # The 32 3x3 weight of the convolutional layer can be visualized as follows. # In[13]: conv = net.get_all_params() ws = conv[7].get_value() #Use the layernumber for the '(32, 1, 3, 3)', '288' layer from above fig = plt.figure(figsize=(6,6)) for i in range(0,32): a=fig.add_subplot(6,6,(i+1))#NB the one based API sucks! plt.axis('off') plt.imshow(ws[i,0,:,:], interpolation='none',cmap=plt.get_cmap('gray'))