%pylab inline
%cd ..
import os, shutil
dir_o = 'examples/tmp.libsvm_data'
! mkdir -p {dir_o}

from sklearn.datasets import load_digits
digits = load_digits()
print digits.images.shape, digits.target.shape

imshow(digits.images[0], cmap=gray(), interpolation='none')

def split2(arr):
    div = arr.shape[0] / 2
    return arr[:div], arr[div:]
data2 = split2(digits.data)
target2 = split2(digits.target)
print [d.shape[0] for d in data2]

from sklearn.linear_model import LogisticRegression
lr = LogisticRegression().fit(data2[0], target2[0])
from sklearn.metrics import accuracy_score
print 'Test Accuracy:', accuracy_score(target2[1], lr.predict(data2[1]))

def libsvm_format_generator(data, target):
    from StringIO import StringIO
    for datum, tar in zip(data, target):
        sio = StringIO()
        print >> sio, tar,
        for i, v in enumerate(datum):
            if v == 0: continue
            print >> sio, '%d:%g' % (i, v),
        yield sio.getvalue()

print libsvm_format_generator(data2[0], target2[0]).next()

with open(os.path.join(dir_o, 'input_train.txt'), 'w') as f:
    for l in libsvm_format_generator(data2[0], target2[0]):
        print >> f, l
with open(os.path.join(dir_o, 'input_test.txt'), 'w') as f:
    for l in libsvm_format_generator(data2[1], target2[1]):
        print >> f, l

%%writefile {dir_o}/linear_train_test.prototxt
name: "digits_libsvm"
layers {
  name: "digits"
  type: LIBSVM_DATA
  top: "data"
  top: "label"
  libsvm_data_param {
    source: "examples/tmp.libsvm_data/input_train.txt"
    batch_size: 100
    channels: 64
    shuffle: true
  }
  transform_param {
    scale: 0.00392156862745098
  }
  include: { phase: TRAIN }
}
layers {
  name: "digits"
  type: LIBSVM_DATA
  top: "data"
  top: "label"
  libsvm_data_param {
    source: "examples/tmp.libsvm_data/input_test.txt"
    batch_size: 899
    channels: 64
    shuffle: false
  }
  transform_param {
    scale: 0.00392156862745098
  }
  include: { phase: TEST }
}
layers {
  name: "ip1"
  type: INNER_PRODUCT
  bottom: "data"
  top: "ip1"
  blobs_lr: 1
  blobs_lr: 2
  inner_product_param {
    num_output: 64
    weight_filler {
      type: "gaussian"
      std: 0.1
    }
    bias_filler {
      type: "constant"
    }
  }
}
layers {
  name: "accuracy"
  type: ACCURACY
  bottom: "ip1"
  bottom: "label"
  top: "accuracy"
  include: { phase: TEST }
}
layers {
  name: "loss"
  type: SOFTMAX_LOSS
  bottom: "ip1"
  bottom: "label"
  top: "loss"
}

%%writefile {dir_o}/linear_solver.prototxt
net: "examples/tmp.libsvm_data/linear_train_test.prototxt"
test_iter: 1
test_interval: 1000
base_lr: 1.0
momentum: 0.9
#weight_decay: 0.00001
lr_policy: "fixed"
display: 1000
max_iter: 4000
snapshot: 4000
snapshot_prefix: "examples/tmp.libsvm_data/linear"
solver_mode: CPU
random_seed: 0 # Just for reproducible purpose

!build/tools/caffe  train --solver={dir_o}/linear_solver.prototxt 2>&1 | egrep -A1 "Iteration .*, Test"

%%writefile {dir_o}/mlp_train_test.prototxt
name: "digits_libsvm"
layers {
  name: "digits"
  type: LIBSVM_DATA
  top: "data"
  top: "label"
  libsvm_data_param {
    source: "examples/tmp.libsvm_data/input_train.txt"
    batch_size: 100
    channels: 64
    shuffle: true
  }
  transform_param {
    scale: 0.00392156862745098
  }
  include: { phase: TRAIN }
}
layers {
  name: "digits"
  type: LIBSVM_DATA
  top: "data"
  top: "label"
  libsvm_data_param {
    source: "examples/tmp.libsvm_data/input_test.txt"
    batch_size: 899
    channels: 64
    shuffle: false
  }
  transform_param {
    scale: 0.00392156862745098
  }
  include: { phase: TEST }
}
layers {
  name: "ip1"
  type: INNER_PRODUCT
  bottom: "data"
  top: "ip1"
  blobs_lr: 1
  blobs_lr: 2
  inner_product_param {
    num_output: 64
    weight_filler {
      type: "gaussian"
      std: 0.1
    }
    bias_filler {
      type: "constant"
    }
  }
}
layers {
  name: "relu1"
  type: RELU
  bottom: "ip1"
  top: "ip1"
}
layers {
  name: "ip2"
  type: INNER_PRODUCT
  bottom: "ip1"
  top: "ip2"
  blobs_lr: 1
  blobs_lr: 2
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "gaussian"
      std: 0.1
    }
    bias_filler {
      type: "constant"
    }
  }
}
layers {
  name: "accuracy"
  type: ACCURACY
  bottom: "ip2"
  bottom: "label"
  top: "accuracy"
  include: { phase: TEST }
}
layers {
  name: "loss"
  type: SOFTMAX_LOSS
  bottom: "ip2"
  bottom: "label"
  top: "loss"
}

%%writefile {dir_o}/mlp_solver.prototxt
net: "examples/tmp.libsvm_data/mlp_train_test.prototxt"
test_iter: 1
test_interval: 1000
base_lr: 0.1
momentum: 0.9
weight_decay: 0.00001
lr_policy: "fixed"
display: 1000
max_iter: 4000
snapshot: 4000
snapshot_prefix: "examples/tmp.libsvm_data/mlp"
solver_mode: CPU
random_seed: 0 # Just for reproducible purpose

!build/tools/caffe  train --solver={dir_o}/mlp_solver.prototxt 2>&1 | egrep -A1 "Iteration .*, Test"

!rm -rf {dir_o}