%pylab inline import numpy as np import scipy.io dataset = scipy.io.loadmat('../../../data/ocr/ocr_taskar.mat') # patterns for training p_tr = dataset['patterns_train'] # patterns for testing p_ts = dataset['patterns_test'] # labels for training l_tr = dataset['labels_train'] # labels for testing l_ts = dataset['labels_test'] # feature dimension n_dims = p_tr[0,0].shape[0] # number of states n_stats = 26 # number of training samples n_tr_samples = p_tr.shape[1] # number of testing samples n_ts_samples = p_ts.shape[1] import matplotlib.pyplot as plt def show_word(patterns, index): """show a word with padding""" plt.rc('image', cmap='binary') letters = patterns[0,index][:128,:] n_letters = letters.shape[1] for l in xrange(n_letters): lett = np.transpose(np.reshape(letters[:,l], (8,16))) lett = np.hstack((np.zeros((16,1)), lett, np.zeros((16,1)))) lett = np.vstack((np.zeros((1,10)), lett, np.zeros((1,10)))) subplot(1,n_letters,l) imshow(lett) plt.xticks(()) plt.yticks(()) plt.tight_layout() show_word(p_tr, 174) show_word(p_tr, 471) show_word(p_tr, 57) from modshogun import TableFactorType # unary, type_id = 0 cards_u = np.array([n_stats], np.int32) w_gt_u = np.zeros(n_stats*n_dims) fac_type_u = TableFactorType(0, cards_u, w_gt_u) # pairwise, type_id = 1 cards = np.array([n_stats,n_stats], np.int32) w_gt = np.zeros(n_stats*n_stats) fac_type = TableFactorType(1, cards, w_gt) # first bias, type_id = 2 cards_s = np.array([n_stats], np.int32) w_gt_s = np.zeros(n_stats) fac_type_s = TableFactorType(2, cards_s, w_gt_s) # last bias, type_id = 3 cards_t = np.array([n_stats], np.int32) w_gt_t = np.zeros(n_stats) fac_type_t = TableFactorType(3, cards_t, w_gt_t) # all initial parameters w_all = [w_gt_u,w_gt,w_gt_s,w_gt_t] # all factor types ftype_all = [fac_type_u,fac_type,fac_type_s,fac_type_t] def prepare_data(x, y, ftype, num_samples): """prepare FactorGraphFeatures and FactorGraphLabels """ from modshogun import Factor, TableFactorType, FactorGraph from modshogun import FactorGraphObservation, FactorGraphLabels, FactorGraphFeatures samples = FactorGraphFeatures(num_samples) labels = FactorGraphLabels(num_samples) for i in xrange(num_samples): n_vars = x[0,i].shape[1] data = x[0,i].astype(np.float64) vc = np.array([n_stats]*n_vars, np.int32) fg = FactorGraph(vc) # add unary factors for v in xrange(n_vars): datau = data[:,v] vindu = np.array([v], np.int32) facu = Factor(ftype[0], vindu, datau) fg.add_factor(facu) # add pairwise factors for e in xrange(n_vars-1): datap = np.array([1.0]) vindp = np.array([e,e+1], np.int32) facp = Factor(ftype[1], vindp, datap) fg.add_factor(facp) # add bias factor to first letter datas = np.array([1.0]) vinds = np.array([0], np.int32) facs = Factor(ftype[2], vinds, datas) fg.add_factor(facs) # add bias factor to last letter datat = np.array([1.0]) vindt = np.array([n_vars-1], np.int32) fact = Factor(ftype[3], vindt, datat) fg.add_factor(fact) # add factor graph samples.add_sample(fg) # add corresponding label states_gt = y[0,i].astype(np.int32) states_gt = states_gt[0,:]; # mat to vector loss_weights = np.array([1.0/n_vars]*n_vars) fg_obs = FactorGraphObservation(states_gt, loss_weights) labels.add_label(fg_obs) return samples, labels # prepare training pairs (factor graph, node states) samples, labels = prepare_data(p_tr, l_tr, ftype_all, n_tr_samples) try: import networkx as nx # pip install networkx except ImportError: import pip pip.main(['install', '--user', 'networkx']) import networkx as nx import matplotlib.pyplot as plt # create a graph G = nx.Graph() node_pos = {} # add variable nodes, assuming there are 3 letters G.add_nodes_from(['v0','v1','v2']) for i in xrange(3): node_pos['v%d' % i] = (2*i,1) # add factor nodes G.add_nodes_from(['F0','F1','F2','F01','F12','Fs','Ft']) for i in xrange(3): node_pos['F%d' % i] = (2*i,1.006) for i in xrange(2): node_pos['F%d%d' % (i,i+1)] = (2*i+1,1) node_pos['Fs'] = (-1,1) node_pos['Ft'] = (5,1) # add edges to connect variable nodes and factor nodes G.add_edges_from([('v%d' % i,'F%d' % i) for i in xrange(3)]) G.add_edges_from([('v%d' % i,'F%d%d' % (i,i+1)) for i in xrange(2)]) G.add_edges_from([('v%d' % (i+1),'F%d%d' % (i,i+1)) for i in xrange(2)]) G.add_edges_from([('v0','Fs'),('v2','Ft')]) # draw graph fig, ax = plt.subplots(figsize=(6,2)) nx.draw_networkx_nodes(G,node_pos,nodelist=['v0','v1','v2'],node_color='white',node_size=700,ax=ax) nx.draw_networkx_nodes(G,node_pos,nodelist=['F0','F1','F2'],node_color='yellow',node_shape='s',node_size=300,ax=ax) nx.draw_networkx_nodes(G,node_pos,nodelist=['F01','F12'],node_color='blue',node_shape='s',node_size=300,ax=ax) nx.draw_networkx_nodes(G,node_pos,nodelist=['Fs'],node_color='green',node_shape='s',node_size=300,ax=ax) nx.draw_networkx_nodes(G,node_pos,nodelist=['Ft'],node_color='purple',node_shape='s',node_size=300,ax=ax) nx.draw_networkx_edges(G,node_pos,alpha=0.7) plt.axis('off') plt.tight_layout() from modshogun import FactorGraphModel, TREE_MAX_PROD # create model and register factor types model = FactorGraphModel(samples, labels, TREE_MAX_PROD) model.add_factor_type(ftype_all[0]) model.add_factor_type(ftype_all[1]) model.add_factor_type(ftype_all[2]) model.add_factor_type(ftype_all[3]) from modshogun import DualLibQPBMSOSVM from modshogun import BmrmStatistics import pickle import time # create bundle method SOSVM, there are few variants can be chosen # BMRM, Proximal Point BMRM, Proximal Point P-BMRM, NCBM # usually the default one i.e. BMRM is good enough # lambda is set to 1e-2 bmrm = DualLibQPBMSOSVM(model, labels, 0.01) bmrm.set_TolAbs(20.0) bmrm.set_verbose(True) # train t0 = time.time() bmrm.train() t1 = time.time() w_bmrm = bmrm.get_w() print "BMRM took", t1 - t0, "seconds." import matplotlib.pyplot as plt fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,4)) primal_bmrm = bmrm.get_helper().get_primal_values() dual_bmrm = bmrm.get_result().get_hist_Fd_vector() # plot duality gaps xs = range(dual_bmrm.size) axes[0].plot(xs, (primal_bmrm-dual_bmrm), label='duality gap') axes[0].set_xlabel('iteration') axes[0].set_ylabel('duality gap') axes[0].legend(loc=1) axes[0].set_title('duality gaps'); axes[0].grid(True) # plot primal and dual values xs = range(dual_bmrm.size-1) axes[1].plot(xs, primal_bmrm[1:], label='primal') axes[1].plot(xs, dual_bmrm[1:], label='dual') axes[1].set_xlabel('iteration') axes[1].set_ylabel('objective') axes[1].legend(loc=1) axes[1].set_title('primal vs dual'); axes[1].grid(True) # statistics bmrm_stats = bmrm.get_result() nCP = bmrm_stats.nCP nzA = bmrm_stats.nzA print 'number of cutting planes: %d' % nCP print 'number of active cutting planes: %d' % nzA from modshogun import StochasticSOSVM # the 3rd parameter is do_weighted_averaging, by turning this on, # a possibly faster convergence rate may be achieved. # the 4th parameter controls outputs of verbose training information sgd = StochasticSOSVM(model, labels, True, True) sgd.set_num_iter(100) sgd.set_lambda(0.01) # train t0 = time.time() sgd.train() t1 = time.time() w_sgd = sgd.get_w() print "SGD took", t1 - t0, "seconds." fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,4)) primal_sgd = sgd.get_helper().get_primal_values() xs = range(dual_bmrm.size-1) axes[0].plot(xs, primal_bmrm[1:], label='BMRM') axes[0].plot(range(99), primal_sgd[1:100], label='SGD') axes[0].set_xlabel('effecitve passes') axes[0].set_ylabel('primal objective') axes[0].set_title('whole training progress') axes[0].legend(loc=1) axes[0].grid(True) axes[1].plot(range(99), primal_bmrm[1:100], label='BMRM') axes[1].plot(range(99), primal_sgd[1:100], label='SGD') axes[1].set_xlabel('effecitve passes') axes[1].set_ylabel('primal objective') axes[1].set_title('first 100 effective passes') axes[1].legend(loc=1) axes[1].grid(True) fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,4)) terr_bmrm = bmrm.get_helper().get_train_errors() terr_sgd = sgd.get_helper().get_train_errors() xs = range(terr_bmrm.size-1) axes[0].plot(xs, terr_bmrm[1:], label='BMRM') axes[0].plot(range(99), terr_sgd[1:100], label='SGD') axes[0].set_xlabel('effecitve passes') axes[0].set_ylabel('training error') axes[0].set_title('whole training progress') axes[0].legend(loc=1) axes[0].grid(True) axes[1].plot(range(99), terr_bmrm[1:100], label='BMRM') axes[1].plot(range(99), terr_sgd[1:100], label='SGD') axes[1].set_xlabel('effecitve passes') axes[1].set_ylabel('training error') axes[1].set_title('first 100 effective passes') axes[1].legend(loc=1) axes[1].grid(True) def hinton(matrix, max_weight=None, ax=None): """Draw Hinton diagram for visualizing a weight matrix.""" ax = ax if ax is not None else plt.gca() if not max_weight: max_weight = 2**np.ceil(np.log(np.abs(matrix).max())/np.log(2)) ax.patch.set_facecolor('gray') ax.set_aspect('equal', 'box') ax.xaxis.set_major_locator(plt.NullLocator()) ax.yaxis.set_major_locator(plt.NullLocator()) for (x,y),w in np.ndenumerate(matrix): color = 'white' if w > 0 else 'black' size = np.sqrt(np.abs(w)) rect = plt.Rectangle([x - size / 2, y - size / 2], size, size, facecolor=color, edgecolor=color) ax.add_patch(rect) ax.autoscale_view() ax.invert_yaxis() # get pairwise parameters, also accessible from # w[n_dims*n_stats:n_dims*n_stats+n_stats*n_stats] model.w_to_fparams(w_sgd) # update factor parameters w_p = ftype_all[1].get_w() w_p = np.reshape(w_p,(n_stats,n_stats)) hinton(w_p) # get testing data samples_ts, labels_ts = prepare_data(p_ts, l_ts, ftype_all, n_ts_samples) from modshogun import FactorGraphFeatures, FactorGraphObservation, TREE_MAX_PROD, MAPInference # get a factor graph instance from test data fg0 = samples_ts.get_sample(100) fg0.compute_energies() fg0.connect_components() # create a MAP inference using tree max-product infer_met = MAPInference(fg0, TREE_MAX_PROD) infer_met.inference() # get inference results y_pred = infer_met.get_structured_outputs() y_truth = FactorGraphObservation.obtain_from_generic(labels_ts.get_label(100)) print y_pred.get_data() print y_truth.get_data() from modshogun import LabelsFactory, SOSVMHelper # training error of BMRM method bmrm.set_w(w_bmrm) model.w_to_fparams(w_bmrm) lbs_bmrm = LabelsFactory.to_structured(bmrm.apply()) acc_loss = 0.0 ave_loss = 0.0 for i in xrange(n_tr_samples): y_pred = lbs_bmrm.get_label(i) y_truth = labels.get_label(i) acc_loss = acc_loss + model.delta_loss(y_truth, y_pred) ave_loss = acc_loss / n_tr_samples print('BMRM: Average training error is %.4f' % ave_loss) # training error of stochastic method print('SGD: Average training error is %.4f' % SOSVMHelper.average_loss(w_sgd, model)) # testing error bmrm.set_features(samples_ts) bmrm.set_labels(labels_ts) lbs_bmrm_ts = LabelsFactory.to_structured(bmrm.apply()) acc_loss = 0.0 ave_loss_ts = 0.0 for i in xrange(n_ts_samples): #result = model.argmax(w,i) #y_pred = FactorGraphObservation.obtain_from_generic(result.argmax) y_pred = lbs_bmrm_ts.get_label(i) y_truth = labels_ts.get_label(i) acc_loss = acc_loss + model.delta_loss(y_truth, y_pred) ave_loss_ts = acc_loss / n_ts_samples print('BMRM: Average testing error is %.4f' % ave_loss_ts) # testing error of stochastic method print('SGD: Average training error is %.4f' % SOSVMHelper.average_loss(sgd.get_w(), model))