#!/usr/bin/env python # coding: utf-8 # *[www.joyofdata.de](http://blog.joyofdata.de) - [@joyofdata](https://twitter.com/joyofdata) - [github.com/joyofdata](https://github.com/joyofdata)* # **More information you will find here: [Neural Networks with Caffe on the GPU](http://www.joyofdata.de/blog/neural-networks-with-caffe-on-the-gpu)** # --- #

Training Multi-Layer Neural Network with Caffe

# In[2]: import subprocess import platform sys.path.append("/home/ubuntu/caffe/python/") import caffe caffe.set_mode_gpu() import lmdb from sklearn.cross_validation import StratifiedShuffleSplit import pandas as pd import numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # #Status Quo # In[3]: print "OS: ", platform.platform() print "Python: ", sys.version.split("\n")[0] print "CUDA: ", subprocess.Popen(["nvcc","--version"], stdout=subprocess.PIPE).communicate()[0].split("\n")[3] print "LMDB: ", ".".join([str(i) for i in lmdb.version()]) # #Load Data from CSV and Trasform # The CSV is assumed to be the training data from the ["Otto Group Product Classification Challenge"](https://www.kaggle.com/c/otto-group-product-classification-challenge) at Kaggle. It contains 95 columns: # - [0] id (discarded) # - [1..93] features (integer values) # - [94] label (9 categories - `Class_1,..,Class_9`) # In[4]: df = pd.read_csv("train.csv", sep=",") features = df.ix[:,1:-1].as_matrix() labels = df.ix[:,-1].as_matrix() # In[5]: vec_log = numpy.vectorize(lambda x: log(x+1)) vec_int = numpy.vectorize(lambda str: int(str[-1])-1) # In[6]: features = vec_log(features) labels = vec_int(labels) # #Stratified Split for Training and Testing # In[7]: sss = StratifiedShuffleSplit(labels, 1, test_size=0.02, random_state=0) sss = list(sss)[0] # In[8]: features_training = features[sss[0],] labels_training = labels[sss[0],] features_testing = features[sss[1],] labels_testing = labels[sss[1],] # #Load Data into LMDB # In[9]: # http://deepdish.io/2015/04/28/creating-lmdb-in-python/ def load_data_into_lmdb(lmdb_name, features, labels=None): env = lmdb.open(lmdb_name, map_size=features.nbytes*2) features = features[:,:,None,None] for i in range(features.shape[0]): datum = caffe.proto.caffe_pb2.Datum() datum.channels = features.shape[1] datum.height = 1 datum.width = 1 if features.dtype == np.int: datum.data = features[i].tostring() elif features.dtype == np.float: datum.float_data.extend(features[i].flat) else: raise Exception("features.dtype unknown.") if labels is not None: datum.label = int(labels[i]) str_id = '{:08}'.format(i) with env.begin(write=True) as txn: txn.put(str_id, datum.SerializeToString()) # In[10]: load_data_into_lmdb("/home/ubuntu/data/train_data_lmdb", features_training, labels_training) load_data_into_lmdb("/home/ubuntu/data/test_data_lmdb", features_testing, labels_testing) # #Check Content of LMDB # In[11]: # http://research.beenfrog.com/code/2015/03/28/read-leveldb-lmdb-for-caffe-with-python.html def get_data_for_case_from_lmdb(lmdb_name, id): lmdb_env = lmdb.open(lmdb_name, readonly=True) lmdb_txn = lmdb_env.begin() raw_datum = lmdb_txn.get(id) datum = caffe.proto.caffe_pb2.Datum() datum.ParseFromString(raw_datum) feature = caffe.io.datum_to_array(datum) label = datum.label return (label, feature) # In[12]: get_data_for_case_from_lmdb("/home/ubuntu/data/train_data_lmdb/", "00012345") # #Training the Model # In[13]: proc = subprocess.Popen( ["/home/ubuntu/caffe/build/tools/caffe","train","--solver=config.prototxt"], stderr=subprocess.PIPE) res = proc.communicate()[1] # http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/examples/hdf5_classification.ipynb # or # caffe.set_mode_gpu() # solver = caffe.get_solver("config.prototxt") # solver.solve() # In[14]: print res # #Applying the Model # In[15]: net = caffe.Net("model_prod.prototxt","./_iter_100001.caffemodel", caffe.TEST) # In[16]: l, f = get_data_for_case_from_lmdb("/home/ubuntu/data/test_data_lmdb/", "00001230") out = net.forward(**{net.inputs[0]: np.asarray([f])}) # if the index of the largest element matches the integer # label we stored for that case - then the prediction is right print np.argmax(out["prob"][0]) == l, "\n", out plt.bar(range(9),out["prob"][0]) # #Visualizing the Network Graph # In[172]: from google.protobuf import text_format from caffe.draw import get_pydot_graph from caffe.proto import caffe_pb2 from IPython.display import display, Image _net = caffe_pb2.NetParameter() f = open("model_prod.prototxt") text_format.Merge(f.read(), _net) display(Image(get_pydot_graph(_net,"TB").create_png())) # #Visualizing the Weights # In[192]: # weights connecting the input with relu1 arr = net.params["ip1"][0].data # In[222]: fig = plt.figure(figsize=(10,10)) ax = fig.add_subplot(111) fig.colorbar(cax, orientation="horizontal") cax = ax.matshow(arr, interpolation='none') # In[230]: _ = plt.hist(arr.tolist(), bins=20)