%pylab inline import numpy as np import matplotlib.pyplot as plt n = 100 mu_x0, sigma_x0 = 0.5, 0.5 x = np.random.normal(mu_x0, sigma_x0, n) mu_e, sigma_e = 0., 0.3 e = np.random.normal(mu_e, sigma_e, n) y = -x + x**3 + e plt.figure() plt.plot(x,y,'*r') plt.ylabel('x') plt.ylabel('y') min_x, max_x = np.min(x), np.max(x) x_range = np.linspace(min_x, max_x, n) y_true = -x_range + x_range**3 plt.plot(x_range,y_true,'b',label='true') x_add_ones =np.column_stack((x, ones(shape(x)[0]))) x_range_add_ones =np.column_stack((x_range, ones(shape(x_range)[0]))) # OLS for d=1 theta_0 = np.dot(numpy.linalg.inv(np.dot(np.transpose(x_add_ones),x_add_ones)), np.dot(y,x_add_ones)) y_0 = np.dot(x_range_add_ones,theta_0) plt.plot(x_range,y_0,'g',label='OLS') # compute weights from q1(x)/q0(x) mu_w, sigma_w = -0.28, 0.38 w = exp(-((x-mu_w)**2)/float(2*sigma_w**2)) x_w = x * w x_w_add_ones =np.column_stack((x_w, ones(shape(x_w)[0]))) # weighted log-likelihood theta_1 = np.dot(numpy.linalg.inv(np.dot(np.transpose(x_w_add_ones),x_w_add_ones)), np.dot(y,x_w_add_ones)) y_1 = np.dot(x_range_add_ones,theta_1) plt.plot(x_range,y_1,'m',label='WLS') plt.legend() plt.title('Training data set') # test set mu_x0, sigma_x0 = 0., 0.3 x1 = np.random.normal(mu_x0, sigma_x0, n) y1 = -x1 + x1**3 + e plt.figure() plt.plot(x1,y1,'*r') plt.ylabel('x') plt.ylabel('y') min_x, max_x = np.min(x1), np.max(x1) x_range = np.linspace(min_x, max_x, n) y_true = -x_range + x_range**3 plt.plot(x_range,y_true,'b',label='true') x1_add_ones =np.column_stack((x1, ones(shape(x1)[0]))) x1_range_add_ones =np.column_stack((x_range, ones(shape(x1)[0]))) # OLS for d=1 theta_0 = np.dot(numpy.linalg.inv(np.dot(np.transpose(x1_add_ones),x1_add_ones)), np.dot(y1,x1_add_ones)) y_0 = np.dot(x1_range_add_ones,theta_0) plt.plot(x_range,y_0,'g',label='OLS') plt.legend() plt.title('Test data set') I would assign probability of connections based on common features such as location, skills and etc.