from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.datasets import mnist
from keras.utils import np_utils
from keras import initializations
def init_weights(shape, name=None):
return initializations.normal(shape, scale=0.01, name=name)
Using TensorFlow backend.
from keras.utils.visualize_util import plot
# Hyper parameters
batch_size = 128
nb_epoch = 10
# Parameters for MNIST dataset
img_rows, img_cols = 28, 28
nb_classes = 10
# Parameters for LSTM network
nb_lstm_outputs = 30
nb_time_steps = img_rows
dim_input_vector = img_cols
# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print('X_train original shape:', X_train.shape)
input_shape = (nb_time_steps, dim_input_vector)
X_train = X_train.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
('X_train original shape:', (60000, 28, 28)) ('X_train shape:', (60000, 28, 28)) (60000, 'train samples') (10000, 'test samples')
# Build LSTM network
model = Sequential()
model.add(LSTM(nb_lstm_outputs, input_shape=input_shape))
model.add(Dense(nb_classes, activation='softmax', init=init_weights))
model.summary()
____________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ==================================================================================================== lstm_1 (LSTM) (None, 30) 7080 lstm_input_1[0][0] ____________________________________________________________________________________________________ dense_1 (Dense) (None, 10) 310 lstm_1[0][0] ==================================================================================================== Total params: 7390 ____________________________________________________________________________________________________
plot(model, to_file='lstm_model.png')
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, Y_train, nb_epoch=nb_epoch, batch_size=batch_size, shuffle=True, verbose=1)
Epoch 1/10 60000/60000 [==============================] - 28s - loss: 1.3082 - acc: 0.6124 Epoch 2/10 60000/60000 [==============================] - 28s - loss: 0.5380 - acc: 0.8467 Epoch 3/10 60000/60000 [==============================] - 28s - loss: 0.3363 - acc: 0.9060 Epoch 4/10 60000/60000 [==============================] - 28s - loss: 0.2553 - acc: 0.9292 Epoch 5/10 60000/60000 [==============================] - 28s - loss: 0.2113 - acc: 0.9408 Epoch 6/10 60000/60000 [==============================] - 28s - loss: 0.1811 - acc: 0.9488 Epoch 7/10 60000/60000 [==============================] - 28s - loss: 0.1578 - acc: 0.9548 Epoch 8/10 60000/60000 [==============================] - 28s - loss: 0.1407 - acc: 0.9597 Epoch 9/10 60000/60000 [==============================] - 28s - loss: 0.1284 - acc: 0.9633 Epoch 10/10 60000/60000 [==============================] - 28s - loss: 0.1188 - acc: 0.9655
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])
10000/10000 [==============================] - 5s ('Test score:', 0.11906883909329773) ('Test accuracy:', 0.96530000000000005)