%pylab inline
%autoreload 2
import models as m
import visualization as v
import layers as l
import trainers as t
import utils
import time
import numpy as np
from dataset import load_dataset, mnist
Populating the interactive namespace from numpy and matplotlib
# Loads mnist images (28x28 pixels, so vectors of length 784). Array shape is [# examples, 784]
mnist_data = mnist()
print mnist_data.shape
(50000, 784)
Only 36 hidden units, so samples are not nearly as clear as more complex models (see PCD15 below)
cd1 = m.BinaryRBM(784, 36, t.CD_model(1), ordered_trainers=[t.WeightDecay(0.1)])
cd1.train(lr=0.1, epoch=15, batch_size=20, data=mnist_data, lr_schedule=t.lr_constant, checkpoint=1)
Epoch 1: 5.57390213013 | 0.188208092684 Epoch 2: 5.55866003036 | 0.174009810673 Epoch 3: 5.52248597145 | 0.171164946487 Epoch 4: 5.53973603249 | 0.169069583248 Epoch 5: 5.61135911942 | 0.167298398653 Epoch 6: 5.61759305 | 0.165897242254 Epoch 7: 5.55936789513 | 0.164859603253 Epoch 8: 5.5711889267 | 0.164098081171 Epoch 9: 5.51164412498 | 0.16366337216 Epoch 10: 5.5558860302 | 0.163404337451 Epoch 11: 5.51408195496 | 0.16321610836 Epoch 12: 5.72841215134 | 0.163006684909 Epoch 13: 5.80656695366 | 0.162835821116 Epoch 14: 5.61577701569 | 0.162607212148 Epoch 15: 5.55477190018 | 0.162390080779 Total time: 83.848457098
# Plot bias and weight matrices
v.plot_images(cd1.connections[0].W, (28, 28), (6, 6), space=1, size=(5, 5), )
v.show_image(cd1.layers[0].bias, dim=(28,28))
Sampling from a trained model consists of creating a sampler, say G, by calling "model.dream" with starting data and #steps between samples. Calling G.next() then returns the next sample.
In this example, samples are drawn from the just trained RBM "cd1".
CD1 training does not create a very good sample in this case.
# Generate 10 samples for each of 5 different starting examples
for start in mnist_data[np.random.randint(0, len(mnist_data), 5)]:
# Initialize generator, 1000 gibbs steps between samples
G = cd1.dream(start, steps=1000)
im = [G.next() for i in range(10)] # Generate 10 sample
v.plot_images([start] + im, (28, 28), (1, 11), space=1, size=(10, 10)) #
Models have a few basic parts: layers, connections, statistics, and trainers
Both of these calculate the gradient given the data and model dependent statistics.
This model is much higher quality than the small CD1 RBM (more hidden units, better model estimation). It also takes significantly longer to train since 15 gibbs steps must be taken for every weight update.
pcd1 = m.BinaryRBM(784, 500, t.PCD_model(1))
pcd1.train(lr=0.1, epoch=15, batch_size=20, data=mnist_data, checkpoint=1)
Epoch 1: 72.8314359188 | 0.150373788722 Epoch 2: 62.3414239883 | 0.133895184853 Epoch 3: 57.6334960461 | 0.130842071181 Epoch 4: 71.0778319836 | 0.129144292264 Epoch 5: 84.4407529831 | 0.127328086893 Epoch 6: 79.4299650192 | 0.126022192216 Epoch 7: 90.3721528053 | 0.124596743171 Epoch 8: 62.2018380165 | 0.123527431247 Epoch 9: 60.0852479935 | 0.122127699642 Epoch 10: 58.6974749565 | 0.121009460411 Epoch 11: 59.4325830936 | 0.119887508055 Epoch 12: 62.647274971 | 0.118416723056 Epoch 13: 69.0628449917 | 0.117480424665 Epoch 14: 63.0048360825 | 0.116180068465 Epoch 15: 59.6218490601 | 0.115109754428 Total time: 1012.88859296
# Plot bias and weight matrices
v.plot_images(pcd1.connections[0].W, (28, 28), (10, 10), space=1, size=(10, 10))
v.show_image(pcd1.layers[0].bias, dim=(28,28))
for start in mnist_data[np.random.randint(0, len(mnist_data), 10)]:
# 1000 gibbs steps between samples
g = pcd1.dream(start, steps=1000)
im = [g.next() for i in range(10)]
v.plot_images([start] + im, (28, 28), (1, 11), space=1, size=(7, 7))
When 5 steps are taken between visualization, we can more easily see how the samples slowly change shape.
for start in mnist_data[np.random.randint(0, len(mnist_data), 3)]:
# 5 gibbs steps between samples
g = pcd1.dream(start, steps=5)
im = [g.next() for i in range(99)]
v.plot_images([start] + im, (28, 28), (5, 20), space=1, size=(14, 14))