%pylab inline %cd .. import os, shutil dir_o = 'examples/tmp.libsvm_data' ! mkdir -p {dir_o} from sklearn.datasets import load_digits digits = load_digits() print digits.images.shape, digits.target.shape imshow(digits.images[0], cmap=gray(), interpolation='none') def split2(arr): div = arr.shape[0] / 2 return arr[:div], arr[div:] data2 = split2(digits.data) target2 = split2(digits.target) print [d.shape[0] for d in data2] from sklearn.linear_model import LogisticRegression lr = LogisticRegression().fit(data2[0], target2[0]) from sklearn.metrics import accuracy_score print 'Test Accuracy:', accuracy_score(target2[1], lr.predict(data2[1])) def libsvm_format_generator(data, target): from StringIO import StringIO for datum, tar in zip(data, target): sio = StringIO() print >> sio, tar, for i, v in enumerate(datum): if v == 0: continue print >> sio, '%d:%g' % (i, v), yield sio.getvalue() print libsvm_format_generator(data2[0], target2[0]).next() with open(os.path.join(dir_o, 'input_train.txt'), 'w') as f: for l in libsvm_format_generator(data2[0], target2[0]): print >> f, l with open(os.path.join(dir_o, 'input_test.txt'), 'w') as f: for l in libsvm_format_generator(data2[1], target2[1]): print >> f, l %%writefile {dir_o}/linear_train_test.prototxt name: "digits_libsvm" layers { name: "digits" type: LIBSVM_DATA top: "data" top: "label" libsvm_data_param { source: "examples/tmp.libsvm_data/input_train.txt" batch_size: 100 channels: 64 shuffle: true } transform_param { scale: 0.00392156862745098 } include: { phase: TRAIN } } layers { name: "digits" type: LIBSVM_DATA top: "data" top: "label" libsvm_data_param { source: "examples/tmp.libsvm_data/input_test.txt" batch_size: 899 channels: 64 shuffle: false } transform_param { scale: 0.00392156862745098 } include: { phase: TEST } } layers { name: "ip1" type: INNER_PRODUCT bottom: "data" top: "ip1" blobs_lr: 1 blobs_lr: 2 inner_product_param { num_output: 64 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layers { name: "accuracy" type: ACCURACY bottom: "ip1" bottom: "label" top: "accuracy" include: { phase: TEST } } layers { name: "loss" type: SOFTMAX_LOSS bottom: "ip1" bottom: "label" top: "loss" } %%writefile {dir_o}/linear_solver.prototxt net: "examples/tmp.libsvm_data/linear_train_test.prototxt" test_iter: 1 test_interval: 1000 base_lr: 1.0 momentum: 0.9 #weight_decay: 0.00001 lr_policy: "fixed" display: 1000 max_iter: 4000 snapshot: 4000 snapshot_prefix: "examples/tmp.libsvm_data/linear" solver_mode: CPU random_seed: 0 # Just for reproducible purpose !build/tools/caffe train --solver={dir_o}/linear_solver.prototxt 2>&1 | egrep -A1 "Iteration .*, Test" %%writefile {dir_o}/mlp_train_test.prototxt name: "digits_libsvm" layers { name: "digits" type: LIBSVM_DATA top: "data" top: "label" libsvm_data_param { source: "examples/tmp.libsvm_data/input_train.txt" batch_size: 100 channels: 64 shuffle: true } transform_param { scale: 0.00392156862745098 } include: { phase: TRAIN } } layers { name: "digits" type: LIBSVM_DATA top: "data" top: "label" libsvm_data_param { source: "examples/tmp.libsvm_data/input_test.txt" batch_size: 899 channels: 64 shuffle: false } transform_param { scale: 0.00392156862745098 } include: { phase: TEST } } layers { name: "ip1" type: INNER_PRODUCT bottom: "data" top: "ip1" blobs_lr: 1 blobs_lr: 2 inner_product_param { num_output: 64 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layers { name: "relu1" type: RELU bottom: "ip1" top: "ip1" } layers { name: "ip2" type: INNER_PRODUCT bottom: "ip1" top: "ip2" blobs_lr: 1 blobs_lr: 2 inner_product_param { num_output: 10 weight_filler { type: "gaussian" std: 0.1 } bias_filler { type: "constant" } } } layers { name: "accuracy" type: ACCURACY bottom: "ip2" bottom: "label" top: "accuracy" include: { phase: TEST } } layers { name: "loss" type: SOFTMAX_LOSS bottom: "ip2" bottom: "label" top: "loss" } %%writefile {dir_o}/mlp_solver.prototxt net: "examples/tmp.libsvm_data/mlp_train_test.prototxt" test_iter: 1 test_interval: 1000 base_lr: 0.1 momentum: 0.9 weight_decay: 0.00001 lr_policy: "fixed" display: 1000 max_iter: 4000 snapshot: 4000 snapshot_prefix: "examples/tmp.libsvm_data/mlp" solver_mode: CPU random_seed: 0 # Just for reproducible purpose !build/tools/caffe train --solver={dir_o}/mlp_solver.prototxt 2>&1 | egrep -A1 "Iteration .*, Test" !rm -rf {dir_o}