!date import numpy as np, pandas as pd, pymc as pm, matplotlib.pyplot as plt, seaborn as sns %matplotlib inline sns.set_context('paper') sns.set_style('darkgrid') np.random.seed(12345) # set random seed for reproducibility #### simulate votes n = 450 # number of voters p = 30 # number of votes # latent variables for how and how often voters vote position = np.random.normal(size=n) activity = np.random.normal(size=n) # simulate complete data of how voters vote # and observed data of how/whether they vote complete_votes = np.empty((n,p)) observed_votes = np.empty((n,p)) for i in range(n): for j in range(p): if np.random.rand() < np.exp(position[i]) / (1 + np.exp(position[i])): complete_votes[i,j] = 0 else: complete_votes[i,j] = 1 if np.random.rand() < np.exp(activity[i]) / (1 + np.exp(activity[i])): observed_votes[i,j] = 9 else: observed_votes[i,j] = complete_votes[i,j] print observed_votes[:10,:10] import sklearn.preprocessing X = sklearn.preprocessing.OneHotEncoder().fit_transform(observed_votes) import sklearn.decomposition X_2d = sklearn.decomposition.PCA(n_components=2).fit_transform(X.toarray()) plt.plot(X_2d[:,0], activity, 'o') plt.plot(X_2d[:,1], position, 'o') plt.plot(X_2d[:,0], X_2d[:,1], 'o')