#!/usr/bin/env python # coding: utf-8 # In[44]: get_ipython().run_line_magic('pylab', 'inline') import numpy as np import irm import seaborn as sns import sys sys.path.append("../code/") import cvpipelineutil import sklearn.metrics # # synthetic data, no distance-dependence # In[45]: true_classes = ['gradstudent', 'postdoc', 'faculty', 'vendor'] tries_talk_probs = {} MAXDIST = 100 tries_talk_probs[('gradstudent', 'gradstudent')] = (MAXDIST, 0.5) tries_talk_probs[('gradstudent', 'postdoc')] = (MAXDIST, 0.2) tries_talk_probs[('gradstudent', 'faculty')] = (MAXDIST, 0.1) tries_talk_probs[('gradstudent', 'vendor')] = (MAXDIST, 0.5) tries_talk_probs[('postdoc', 'postdoc')] = (MAXDIST, 0.7) tries_talk_probs[('postdoc', 'faculty')] = (MAXDIST, 0.7) tries_talk_probs[('faculty', 'faculty')] = (MAXDIST, 0.9) tries_talk_probs[('vendor', 'gradstudent')] = (MAXDIST, 0.2) tries_talk_probs[('vendor', 'postdoc')] = (MAXDIST, 0.4) tries_talk_probs[('vendor', 'faculty')] = (MAXDIST, 0.7) tries_talk_probs[('vendor', 'vendor')] = (MAXDIST, 0.1) # turn into numeric types ttp_numeric = {} for k, v in tries_talk_probs.iteritems(): ttp_numeric[(true_classes.index(k[0]), true_classes.index(k[1]))] = v # In[46]: np.random.seed(0) nodes, conmat = irm.data.generate.c_class_neighbors(10, ttp_numeric, JITTER=0.1) np.fill_diagonal(conmat, 0) # don't try and talk to yourself #randomize the order ca = np.random.permutation(len(nodes)) nodes = nodes[ca] conmat = conmat[ca] conmat = conmat[:, ca] # In[47]: pylab.imshow(conmat, interpolation='nearest') pylab.grid(False) pylab.ylabel('e_i') pylab.xlabel('e_j') pylab.title("e_i tries to talk to e_j") pylab.xticks([]) pylab.yticks([]) pylab.savefig("nodist.data.png") # In[48]: # inference? conmat = conmat.astype(np.uint8) model_name = "BetaBernoulliNonConj" init, data = irm.irmio.default_graph_init(conmat, model_name) init['domains']['d1']['assignment'] = np.random.permutation(len(conmat)) % 100 mulamb = 10.0 p_max = 0.95 HPS = {'alpha' : 1.0, 'beta' : 1.0} init['relations']['R1']['hps'] = HPS slow_anneal = irm.runner.default_kernel_anneal(iterations=10) kc ={'ITERS' : 20, 'kernels' : slow_anneal} s = cvpipelineutil.run_exp_pure(data, init, 'anneal_slow_10', 0, kc) # In[49]: res = s['res'] scores, state, times, _ = res # In[50]: a = irm.util.canonicalize_assignment(state['domains']['d1']['assignment']) ca = np.argsort(a).flatten() conmat_sorted = conmat[ca] conmat_sorted = conmat_sorted[:, ca] pylab.imshow(conmat_sorted, interpolation='nearest') pylab.grid(False) pylab.ylabel('e_i') pylab.xlabel('e_j') pylab.title("e_i tries to talk to e_j") pos = np.argwhere(np.diff(a[ca])) for p in pos: pylab.axhline(p, c='b', alpha=0.2) pylab.axvline(p, c='b', alpha=0.2) pylab.xticks([]) pylab.yticks([]) pylab.savefig("nodist.recovered.png") # # synthetic data, distance-dependence # In[51]: true_classes = ['gradstudent', 'postdoc', 'faculty', 'vendor'] tries_talk_probs = {} MAXDIST = 100 tries_talk_probs[('gradstudent', 'gradstudent')] = (2.0, 0.5) tries_talk_probs[('gradstudent', 'postdoc')] = (1.0, 0.5) tries_talk_probs[('gradstudent', 'faculty')] = (3.0, 0.8) tries_talk_probs[('gradstudent', 'vendor')] = (0.8, 0.5) tries_talk_probs[('postdoc', 'postdoc')] = (1.0, 0.8) tries_talk_probs[('postdoc', 'faculty')] = (2.0, 0.8) tries_talk_probs[('faculty', 'faculty')] = (2.0, 0.9) tries_talk_probs[('vendor', 'gradstudent')] = (0.3, 0.2) tries_talk_probs[('vendor', 'postdoc')] = (3.0, 0.5) tries_talk_probs[('vendor', 'faculty')] = (3.0, 0.5) tries_talk_probs[('vendor', 'vendor')] = (1.0, 0.1) # turn into numeric types ttp_numeric = {} for k, v in tries_talk_probs.iteritems(): ttp_numeric[(true_classes.index(k[0]), true_classes.index(k[1]))] = v # In[52]: np.random.seed(1) nodes, conmat = irm.data.generate.c_class_neighbors(10, ttp_numeric, JITTER=0.1) np.fill_diagonal(conmat, 0) # don't try and talk to yourself #randomize the order ca = np.random.permutation(len(nodes)) nodes = nodes[ca] conmat = conmat[ca] conmat = conmat[:, ca] # In[53]: dist_conmat = conmat.copy() pylab.imshow(dist_conmat, interpolation='nearest') pylab.grid(False) pylab.ylabel('e_i') pylab.xlabel('e_j') pylab.xticks([]) pylab.yticks([]) pylab.title("e_i tries to talk to e_j") pylab.savefig("dist.data.png") # ### Inference with no distance dependence # In[54]: conmat = conmat.astype(np.uint8) model_name = "BetaBernoulliNonConj" init, data = irm.irmio.default_graph_init(conmat, model_name) init['domains']['d1']['assignment'] = np.random.permutation(len(conmat)) % 100 \ HPS = {'alpha' : 1.0, 'beta' : 1.0} init['relations']['R1']['hps'] = HPS slow_anneal = irm.runner.default_kernel_anneal(iterations=10) kc ={'ITERS' : 30, 'kernels' : slow_anneal} s = cvpipelineutil.run_exp_pure(data, init, 'anneal_slow_10', 0, kc) res = s['res'] scores, state, times, _ = res # In[55]: a = irm.util.canonicalize_assignment(state['domains']['d1']['assignment']) ca = np.argsort(a).flatten() conmat_sorted = conmat[ca] conmat_sorted = conmat_sorted[:, ca] pylab.imshow(conmat_sorted, interpolation='nearest') pylab.grid(False) pylab.ylabel('e_i') pylab.xlabel('e_j') pos = np.argwhere(np.diff(a[ca])) for p in pos: pylab.axhline(p, c='b', alpha=0.2) pylab.axvline(p, c='b', alpha=0.2) pylab.xticks([]) pylab.yticks([]) pylab.title("e_i tries to talk to e_j") pylab.savefig("dist.nodistrecover.png") # In[56]: print np.unique(a) # In[57]: sklearn.metrics.confusion_matrix(nodes['class'], a) # In[58]: def dist(a, b): return np.sqrt(np.sum((b-a)**2)) conn_and_dist = np.zeros(conmat.shape, dtype=[('link', np.uint8), ('distance', np.float32)]) print "conn_and_dist.dtype", conn_and_dist.dtype for ni, foo in enumerate(nodes): for nj, bar in enumerate(nodes): conn_and_dist[ni, nj]['link'] = conmat[ni, nj] conn_and_dist[ni, nj]['distance'] = dist(nodes[ni]['pos'], nodes[nj]['pos']) model_name = "LogisticDistance" init, data = irm.irmio.default_graph_init(conn_and_dist, model_name) init['domains']['d1']['assignment'] = np.random.permutation(len(conmat)) % 100 mulamb = 1.0 p_max = 0.95 HPS = {'mu_hp' : mulamb, 'lambda_hp' : mulamb, 'p_min' : 0.01, 'p_max' : p_max} init['relations']['R1']['hps'] = HPS slow_anneal = irm.runner.default_kernel_anneal(iterations=10) kc ={'ITERS' : 30, 'kernels' : slow_anneal} s = cvpipelineutil.run_exp_pure(data, init, 'anneal_slow_10', 0, kc) res = s['res'] scores, state, times, _ = res # In[59]: a = irm.util.canonicalize_assignment(state['domains']['d1']['assignment']) ca = np.argsort(a).flatten() conmat_sorted = conn_and_dist['link'][ca] conmat_sorted = conmat_sorted[:, ca] pylab.imshow(conmat_sorted, interpolation='nearest') pylab.grid(False) pylab.ylabel('e_i') pylab.xlabel('e_j') pos = np.argwhere(np.diff(a[ca])) for p in pos: pylab.axhline(p, c='b', alpha=0.2) pylab.axvline(p, c='b', alpha=0.2) pylab.xticks([]) pylab.yticks([]) pylab.title("e_i tries to talk to e_j") pylab.savefig("dist.distrecover.png") # In[60]: sklearn.metrics.confusion_matrix(nodes['class'], a)