#!/usr/bin/env python # coding: utf-8 # # 程序说明 # 名称:使用sklearn wrapper做参数搜索 # # 时间:2016年11月17日 # # 说明:建造一个简单的卷积模型,通过使用sklearn的GridSearchCV去发现最好的模型。 # # 数据集:MNIST # ## 1.加载keras模块 # In[1]: from __future__ import print_function import numpy as np np.random.seed(1337) # for reproducibility from keras.datasets import mnist from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten from keras.layers import Convolution2D, MaxPooling2D from keras.utils import np_utils from keras.wrappers.scikit_learn import KerasClassifier from sklearn.grid_search import GridSearchCV # ## 2.变量初始化 # In[2]: nb_classes = 10 # input image dimensions img_rows, img_cols = 28, 28 # ## 3.准备数据 # In[3]: # load training data and do basic data normalization (X_train, y_train), (X_test, y_test) = mnist.load_data() X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) X_train = X_train.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_test /= 255 # ### 转换类标号 # In[4]: # convert class vectors to binary class matrices y_train = np_utils.to_categorical(y_train, nb_classes) y_test = np_utils.to_categorical(y_test, nb_classes) # ## 4.建立模型 # ### 使用Sequential() # 构造一个有两个卷积层和若干个全连接层组成的模型,这里全连接的层数是由参数所决定的。 # # dense_layer_sizes:层尺寸的列表。这个列表中对于每个层都有一组数字。 # # nb_filters:每个卷积层中滤波器的个数 # # nb_conv:卷积核的尺寸 # # nb_pool:用于max pooling的池化面积 # In[5]: def make_model(dense_layer_sizes, nb_filters, nb_conv, nb_pool): '''Creates model comprised of 2 convolutional layers followed by dense layers dense_layer_sizes: List of layer sizes. This list has one number for each layer nb_filters: Number of convolutional filters in each convolutional layer nb_conv: Convolutional kernel size nb_pool: Size of pooling area for max pooling ''' model = Sequential() model.add(Convolution2D(nb_filters, nb_conv, nb_conv, border_mode='valid', input_shape=(img_rows, img_cols, 1))) model.add(Activation('relu')) model.add(Convolution2D(nb_filters, nb_conv, nb_conv)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) model.add(Dropout(0.25)) model.add(Flatten()) for layer_size in dense_layer_sizes: model.add(Dense(layer_size)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) return model # ## 5.sklearn接口 # # KerasClassifier()实现了sklearn的分类器接口 # # `keras.wrappers.scikit_learn.KerasClassifier(build_fn=None, **sk_params)` # # build_fn:可调用的函数或类对象 # # sk_params:模型参数和训练参数 # In[6]: dense_size_candidates = [[32], [64], [32, 32], [64, 64]] my_classifier = KerasClassifier(make_model, batch_size=32) # ### sklearn中的GridSearchCV函数 # 说明:对估计器的指定参数值进行穷举搜索。 # # In[7]: validator = GridSearchCV(my_classifier, param_grid={'dense_layer_sizes': dense_size_candidates, # nb_epoch可用于调整,即使不是模型构建函数的参数 'nb_epoch': [3, 6], 'nb_filters': [8], 'nb_conv': [3], 'nb_pool': [2]}, scoring='log_loss', n_jobs=1) # ### 开始拟合 # In[8]: validator.fit(X_train, y_train) # ### 打印最好模型的参数 # In[9]: print('The parameters of the best model are: ') print(validator.best_params_) # ### 返回模型 # # validator.best_estimator_ 返回sklearn-wrapped版本的最好模型 # # validator.best_estimator_.model 返回(unwrapped)keras模型 # In[12]: best_model = validator.best_estimator_.model metric_names = best_model.metrics_names metric_values = best_model.evaluate(X_test, y_test) print('\n') for metric, value in zip(metric_names, metric_values): print(metric, ': ', value)