In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils
Using TensorFlow backend.
In [21]:
import keras
keras.__version__
Out[21]:
'2.0.5'
In [2]:
np.random.seed(42)
In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
10903552/11490434 [===========================>..] - ETA: 0s 

Картинки 24х24, развернем каждую в вектор длины 784.

In [4]:
X_train.shape, X_test.shape
Out[4]:
((60000, 28, 28), (10000, 28, 28))
In [5]:
X_train = X_train.reshape(X_train.shape[0], 784).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 784).astype('float32')

Масштабируем данные.

In [6]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
In [7]:
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
In [8]:
y_train[:3, :]
Out[8]:
array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.]])
In [9]:
model = Sequential()
In [12]:
model.add(Dense(800, input_dim=784, kernel_initializer='normal', activation='relu'))
In [13]:
model.add(Dense(10, kernel_initializer='normal', activation='softmax'))
In [14]:
model.compile(loss='categorical_crossentropy', optimizer='SGD', metrics=['accuracy'])
In [15]:
print(model.summary())
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 800)               628000    
_________________________________________________________________
dense_2 (Dense)              (None, 10)                8010      
_________________________________________________________________
dense_3 (Dense)              (None, 800)               8800      
_________________________________________________________________
dense_4 (Dense)              (None, 10)                8010      
=================================================================
Total params: 652,820
Trainable params: 652,820
Non-trainable params: 0
_________________________________________________________________
None

Визуализация модели (нужно выполнить pip install pydot-ng)

In [18]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))
Out[18]:
G 139891033145040 dense_1_input: InputLayer input: output: (None, 784) (None, 784) 139891033144464 dense_1: Dense input: output: (None, 784) (None, 800) 139891033145040->139891033144464 139890994950288 dense_2: Dense input: output: (None, 800) (None, 10) 139891033144464->139890994950288 139890995173520 dense_3: Dense input: output: (None, 10) (None, 800) 139890994950288->139890995173520 139890995174288 dense_4: Dense input: output: (None, 800) (None, 10) 139890995173520->139890995174288

Callbacks

Ранняя остановка

Нужно также сказать несколько слов о такой важной особенности Keras, как колбеки. Через них реализовано много полезной функциональности. Например, если вы тренируете сеть в течение очень долгого времени, вам нужно понять, когда пора остановиться, если ошибка на вашем датасете перестала уменьшаться. По-английски описываемая функциональность называется "early stopping" ("ранняя остановка").

In [19]:
from keras.callbacks import EarlyStopping  
early_stopping = EarlyStopping(monitor='loss')

Tensorboard

Еще в качестве колбека можно использовать сохранение логов в формате, удобном для Tensorboard — это специальная утилита для обработки и визуализации информации из логов Tensorflow. После того, как обучение закончится (или даже в процессе!), вы можете запустить Tensorboard, указав абсолютный путь к директории с логами: tensorboard --logdir=/path/to/logs

In [20]:
from keras.callbacks import TensorBoard  
tensorboard = TensorBoard(log_dir='../logs/', write_graph=True)
In [23]:
%%time
model.fit(X_train_scaled, y_train, batch_size=200, epochs=100, 
          validation_split=0.2, callbacks=[early_stopping, tensorboard], verbose=2);
Train on 48000 samples, validate on 12000 samples
Epoch 1/100
4s - loss: 0.5569 - acc: 0.8703 - val_loss: 0.6286 - val_acc: 0.8392
Epoch 2/100
4s - loss: 0.5331 - acc: 0.8825 - val_loss: 0.6521 - val_acc: 0.8056
Epoch 3/100
4s - loss: 0.5124 - acc: 0.8879 - val_loss: 0.5859 - val_acc: 0.8623
Epoch 4/100
4s - loss: 0.4892 - acc: 0.8970 - val_loss: 0.5624 - val_acc: 0.8705
Epoch 5/100
4s - loss: 0.4695 - acc: 0.9018 - val_loss: 0.5454 - val_acc: 0.8783
Epoch 6/100
4s - loss: 0.4479 - acc: 0.9068 - val_loss: 0.5229 - val_acc: 0.8814
Epoch 7/100
4s - loss: 0.4141 - acc: 0.9184 - val_loss: 0.5296 - val_acc: 0.8748
Epoch 8/100
4s - loss: 0.3972 - acc: 0.9211 - val_loss: 0.5017 - val_acc: 0.8826
Epoch 9/100
4s - loss: 0.3762 - acc: 0.9268 - val_loss: 0.4880 - val_acc: 0.8828
Epoch 10/100
4s - loss: 0.3564 - acc: 0.9335 - val_loss: 0.4799 - val_acc: 0.8811
Epoch 11/100
4s - loss: 0.3405 - acc: 0.9365 - val_loss: 0.4492 - val_acc: 0.8964
Epoch 12/100
4s - loss: 0.3232 - acc: 0.9414 - val_loss: 0.4373 - val_acc: 0.8974
Epoch 13/100
4s - loss: 0.3100 - acc: 0.9435 - val_loss: 0.4296 - val_acc: 0.8984
Epoch 14/100
4s - loss: 0.3022 - acc: 0.9439 - val_loss: 0.4260 - val_acc: 0.8979
Epoch 15/100
4s - loss: 0.2862 - acc: 0.9485 - val_loss: 0.4138 - val_acc: 0.9018
Epoch 16/100
4s - loss: 0.2721 - acc: 0.9518 - val_loss: 0.4070 - val_acc: 0.9023
Epoch 17/100
4s - loss: 0.2571 - acc: 0.9555 - val_loss: 0.4056 - val_acc: 0.9024
Epoch 18/100
4s - loss: 0.2493 - acc: 0.9563 - val_loss: 0.4482 - val_acc: 0.8796
Epoch 19/100
4s - loss: 0.2418 - acc: 0.9570 - val_loss: 0.3796 - val_acc: 0.9093
Epoch 20/100
5s - loss: 0.2295 - acc: 0.9616 - val_loss: 0.3772 - val_acc: 0.9117
Epoch 21/100
4s - loss: 0.2222 - acc: 0.9615 - val_loss: 0.4037 - val_acc: 0.8967
Epoch 22/100
4s - loss: 0.2116 - acc: 0.9644 - val_loss: 0.3631 - val_acc: 0.9149
Epoch 23/100
4s - loss: 0.2025 - acc: 0.9651 - val_loss: 0.3645 - val_acc: 0.9127
Epoch 24/100
4s - loss: 0.1950 - acc: 0.9668 - val_loss: 0.3583 - val_acc: 0.9146
Epoch 25/100
4s - loss: 0.1936 - acc: 0.9666 - val_loss: 0.3562 - val_acc: 0.9158
Epoch 26/100
4s - loss: 0.1830 - acc: 0.9693 - val_loss: 0.3564 - val_acc: 0.9127
Epoch 27/100
4s - loss: 0.1747 - acc: 0.9707 - val_loss: 0.3471 - val_acc: 0.9167
Epoch 28/100
4s - loss: 0.1705 - acc: 0.9712 - val_loss: 0.3482 - val_acc: 0.9179
Epoch 29/100
4s - loss: 0.1626 - acc: 0.9732 - val_loss: 0.3422 - val_acc: 0.9197
Epoch 30/100
4s - loss: 0.1581 - acc: 0.9746 - val_loss: 0.3423 - val_acc: 0.9192
Epoch 31/100
4s - loss: 0.1523 - acc: 0.9753 - val_loss: 0.3464 - val_acc: 0.9158
Epoch 32/100
4s - loss: 0.1467 - acc: 0.9762 - val_loss: 0.3396 - val_acc: 0.9194
Epoch 33/100
4s - loss: 0.1430 - acc: 0.9763 - val_loss: 0.3377 - val_acc: 0.9190
Epoch 34/100
4s - loss: 0.1371 - acc: 0.9783 - val_loss: 0.3325 - val_acc: 0.9211
Epoch 35/100
4s - loss: 0.1348 - acc: 0.9787 - val_loss: 0.3368 - val_acc: 0.9192
Epoch 36/100
4s - loss: 0.1287 - acc: 0.9800 - val_loss: 0.3310 - val_acc: 0.9213
Epoch 37/100
5s - loss: 0.1279 - acc: 0.9793 - val_loss: 0.3345 - val_acc: 0.9198
Epoch 38/100
5s - loss: 0.1243 - acc: 0.9800 - val_loss: 0.3408 - val_acc: 0.9179
Epoch 39/100
4s - loss: 0.1193 - acc: 0.9814 - val_loss: 0.3307 - val_acc: 0.9202
Epoch 40/100
4s - loss: 0.1152 - acc: 0.9823 - val_loss: 0.3295 - val_acc: 0.9209
Epoch 41/100
5s - loss: 0.1143 - acc: 0.9823 - val_loss: 0.3320 - val_acc: 0.9186
Epoch 42/100
5s - loss: 0.1097 - acc: 0.9831 - val_loss: 0.3315 - val_acc: 0.9213
Epoch 43/100
5s - loss: 0.1085 - acc: 0.9833 - val_loss: 0.3339 - val_acc: 0.9207
Epoch 44/100
5s - loss: 0.1060 - acc: 0.9836 - val_loss: 0.3398 - val_acc: 0.9178
Epoch 45/100
4s - loss: 0.1031 - acc: 0.9846 - val_loss: 0.3299 - val_acc: 0.9214
Epoch 46/100
4s - loss: 0.1003 - acc: 0.9850 - val_loss: 0.3347 - val_acc: 0.9179
Epoch 47/100
5s - loss: 0.0979 - acc: 0.9856 - val_loss: 0.3342 - val_acc: 0.9204
Epoch 48/100
5s - loss: 0.0970 - acc: 0.9858 - val_loss: 0.3282 - val_acc: 0.9215
Epoch 49/100
5s - loss: 0.0946 - acc: 0.9860 - val_loss: 0.3316 - val_acc: 0.9200
Epoch 50/100
4s - loss: 0.0924 - acc: 0.9861 - val_loss: 0.3309 - val_acc: 0.9210
Epoch 51/100
4s - loss: 0.0898 - acc: 0.9875 - val_loss: 0.3294 - val_acc: 0.9222
Epoch 52/100
5s - loss: 0.0878 - acc: 0.9874 - val_loss: 0.3385 - val_acc: 0.9185
Epoch 53/100
4s - loss: 0.0859 - acc: 0.9879 - val_loss: 0.3258 - val_acc: 0.9207
Epoch 54/100
4s - loss: 0.0853 - acc: 0.9878 - val_loss: 0.3314 - val_acc: 0.9211
Epoch 55/100
5s - loss: 0.0824 - acc: 0.9885 - val_loss: 0.3322 - val_acc: 0.9214
Epoch 56/100
5s - loss: 0.0826 - acc: 0.9883 - val_loss: 0.3485 - val_acc: 0.9147
CPU times: user 23min 40s, sys: 10min 57s, total: 34min 38s
Wall time: 4min 38s
Out[23]:
<keras.callbacks.History at 0x7f3ae8f3e8d0>

C помощью Tensorboard удобно отслеживать процесс обучения нейронной сети.

Оцениваем качество обучения сети на тестовых данных

In [24]:
scores = model.evaluate(X_test_scaled, y_test, verbose=0)
print("Доля верных ответов на тестовых данных: %.2f%%" % (scores[1]*100))
Доля верных ответов на тестовых данных: 91.04%

Сохраняем сеть

Архитектуру – в JSON-файл

In [25]:
model_json = model.to_json()
with open("mnist_model.json", "w") as json_file:
    json_file.write(model_json)
In [26]:
import pprint
import json
In [27]:
with open("mnist_model.json", "r") as json_file:
    pprint.pprint(json.loads(json_file.read()))
{u'backend': u'tensorflow',
 u'class_name': u'Sequential',
 u'config': [{u'class_name': u'Dense',
              u'config': {u'activation': u'relu',
                          u'activity_regularizer': None,
                          u'batch_input_shape': [None, 784],
                          u'bias_constraint': None,
                          u'bias_initializer': {u'class_name': u'Zeros',
                                                u'config': {}},
                          u'bias_regularizer': None,
                          u'dtype': u'float32',
                          u'kernel_constraint': None,
                          u'kernel_initializer': {u'class_name': u'RandomNormal',
                                                  u'config': {u'mean': 0.0,
                                                              u'seed': None,
                                                              u'stddev': 0.05}},
                          u'kernel_regularizer': None,
                          u'name': u'dense_1',
                          u'trainable': True,
                          u'units': 800,
                          u'use_bias': True}},
             {u'class_name': u'Dense',
              u'config': {u'activation': u'softmax',
                          u'activity_regularizer': None,
                          u'bias_constraint': None,
                          u'bias_initializer': {u'class_name': u'Zeros',
                                                u'config': {}},
                          u'bias_regularizer': None,
                          u'kernel_constraint': None,
                          u'kernel_initializer': {u'class_name': u'RandomNormal',
                                                  u'config': {u'mean': 0.0,
                                                              u'seed': None,
                                                              u'stddev': 0.05}},
                          u'kernel_regularizer': None,
                          u'name': u'dense_2',
                          u'trainable': True,
                          u'units': 10,
                          u'use_bias': True}},
             {u'class_name': u'Dense',
              u'config': {u'activation': u'relu',
                          u'activity_regularizer': None,
                          u'batch_input_shape': [None, 784],
                          u'bias_constraint': None,
                          u'bias_initializer': {u'class_name': u'Zeros',
                                                u'config': {}},
                          u'bias_regularizer': None,
                          u'dtype': u'float32',
                          u'kernel_constraint': None,
                          u'kernel_initializer': {u'class_name': u'RandomNormal',
                                                  u'config': {u'mean': 0.0,
                                                              u'seed': None,
                                                              u'stddev': 0.05}},
                          u'kernel_regularizer': None,
                          u'name': u'dense_3',
                          u'trainable': True,
                          u'units': 800,
                          u'use_bias': True}},
             {u'class_name': u'Dense',
              u'config': {u'activation': u'softmax',
                          u'activity_regularizer': None,
                          u'bias_constraint': None,
                          u'bias_initializer': {u'class_name': u'Zeros',
                                                u'config': {}},
                          u'bias_regularizer': None,
                          u'kernel_constraint': None,
                          u'kernel_initializer': {u'class_name': u'RandomNormal',
                                                  u'config': {u'mean': 0.0,
                                                              u'seed': None,
                                                              u'stddev': 0.05}},
                          u'kernel_regularizer': None,
                          u'name': u'dense_4',
                          u'trainable': True,
                          u'units': 10,
                          u'use_bias': True}}],
 u'keras_version': u'2.0.5'}

Веса сохраняем в бинарный hd5-файл

In [28]:
model.save_weights("mnist_model.h5")

Теперь сеть можно восстановить и использовать

In [29]:
from keras.models import model_from_json

with open("mnist_model.json") as json_file:
    loaded_model_json = json_file.read()

# Создаем модель на основе загруженных данных
loaded_model = model_from_json(loaded_model_json)
# Загружаем веса в модель
loaded_model.load_weights("mnist_model.h5")

Перед использованием модели ее обязательно нужно скомпилировать.

In [30]:
model.compile(loss='categorical_crossentropy', optimizer='SGD', metrics=['accuracy'])
In [31]:
scores = model.evaluate(X_test_scaled, y_test, verbose=0)
print("Доля верных ответов на тестовых данных: %.2f%%" % (scores[1]*100))
Доля верных ответов на тестовых данных: 91.04%