rcParams['figure.figsize'] = 12, 8 def dataset(n, k): xy = random.uniform(low=-2, high=2, size=(n, 2)) centers = random.uniform(low=-2, high=2, size=(k, 2)) classes = [] for p in xy: # 1/dist^3, so that lower dist => higher prob dists = asarray([1. / linalg.norm(p - c) ** 3 for c in centers]) dists = cumsum(dists / sum(dists)) classes.append(where(random.random() < dists)[0][0]) return column_stack([xy, classes]), centers random.seed(1) # k=4, seed=12 k = 3 points, centers = dataset(1000, k) colors = 'gbrcmykrcm' markers = 'osd^v1234x' for i in range(k): class_i = points[where(points[:,2] == i)] marker = colors[i % len(colors)] + markers[i % len(markers)] plot(class_i[:,0], class_i[:,1], marker) plot([centers[i,0]], [centers[i,1]], marker, ms=20) def softmax(x, theta): n = len(x) k = len(theta) sum_exps = sum(exp(asarray([dot(x, theta[i]) for i in range(k)])), axis=0) assert sum_exps.shape == (n,) return asarray([exp(dot(x, theta[i])) / sum_exps for i in range(k)]) # k x n, where each col should sum to 1 def train(points, initial_theta, alpha=0.001, stop=1e-4): x, y, c = points[:,0], points[:,1], points[:,2] n = len(points) k = len(initial_theta) bxy = column_stack([ones(n), x, y]) # first component is intercept (i.e. all 1s) thetas = [initial_theta] nll_errors = [] classif_errors = [] while True: theta = thetas[-1][:] # copy h = softmax(bxy, theta) nll_errors.append(-sum(log(h.T[arange(n), c.astype(int)])) / n) classif_errors.append(sum((argmax(h, axis=0) != c).astype(int))) if len(nll_errors) > 1 and nll_errors[-2] - nll_errors[-1] < stop: break for i in range(k): ci = (c == i).astype(int) theta[i][0] -= alpha * sum(h[i] - ci) # intercept component theta[i][1] -= alpha * sum((h[i] - ci) * x) theta[i][2] -= alpha * sum((h[i] - ci) * y) thetas.append(theta) return thetas, nll_errors, classif_errors initial_theta = [random.sample(3) for _ in range(k - 0)] thetas, nll_errors, classif_errors = train(points, initial_theta) #print nll_errors #print classif_errors xlabel('Training iterations') ylabel('NLL') plot(range(len(nll_errors)), nll_errors, 'r-'); xlabel('Training iterations') ylabel('Classification error') plot(range(len(classif_errors)), classif_errors, 'r-') ylim(0); def general_to_slope_intercept(theta): slope = -theta[1] / theta[2] intercept = -theta[0] / theta[2] return slope, intercept best_theta = thetas[-1] for i in range(k): class_i = points[where(points[:,2] == i)] marker = colors[i % len(colors)] + markers[i % len(markers)] plot(class_i[:,0], class_i[:,1], marker) plot([centers[i,0]], [centers[i,1]], marker, ms=20) slope, intercept = general_to_slope_intercept(best_theta[i]) plot(linspace(-2, 2), slope * linspace(-2, 2) + intercept, 'r-', linewidth=2) axis((-2, 2, -2, 2));