In [2]:

```
import pandas as pd
import numpy as np
import scipy.stats
from __future__ import division
from sklearn import datasets
from sklearn.decomposition import FactorAnalysis
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
%pylab inline
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) # annoying pandas bug
```

** https://github.com/psztorc/Truthcoin **

- no reputation / stake. uniform vote weights
- only binary events / discrete outcomes.

In [21]:

```
# events are columns of the voter matrix:
# col[0] = Obama is the u.s. president (2014),
# col[1] = Brazil won the 2014 fifa world cup,
# col[2] = Djokovic won the 2014 wimbledon tennis championship,
# col[3] = MtGox exchange goes insolvent (1Q 2014)
# col[4] = Professor bitcorn won his bet ("I predict that Bitcoin will trade for under $10 a share by the first half of 2014")
#
VoterMatrix = np.matrix([
[1, 0, 1, 1, 0], # first voter
[1, 1, 1, 1, 0], # ignorant about sports
[1, 0, 1, 1, 0],
[0, 0, 1, 1, 0], # republican in denial
[1, 0, 1, 1, 1]]) # prof bitcorn
print VoterMatrix
features = ['outcome_1', 'outcome_2', 'outcome_3', 'outcome_4', 'outcome_5']
voteMatrix_pd = pd.DataFrame(VoterMatrix, columns=features)
#raw['class'] = y
print ' voteMatrix_pd:'
print voteMatrix_pd
```

- using a uniform reputation / voter weighting

In [16]:

```
def reWeight(Vec):
"""Get the relative influence of numbers, treat NaN as influence-less."""
vec2 = np.array(Vec, dtype=float)
for i in range(len(Vec)):
if isnan(Vec[i]):
vec2[i] = 0
vec2sum = np.sum(vec2)
for i in range(len(vec2)):
vec2[i] = vec2[i] / vec2sum
return(vec2)
rew = reWeight(np.array([1,1,1,1]))
print "reweighted vector test. uniform vector", rew
def getWeight(Vec, AddMean=0):
"""Takes an array (vector in practice), and returns proportional distance from zero."""
New = abs(Vec) #Absolute Value
if AddMean == 1: #Add the mean to each element of the vector
New = New + mean(New)
if sum(New) == 0: #Catch an error here
New = New + 1
New = New/sum(New) #Normalize
return(New)
uniformWeight = array([[1]]*len(VoterMatrix))
print "\nuniform weights:\n", uniformWeight
uniformReputation = getWeight(uniformWeight)
print "\nuniform reputation:\n", uniformReputation
```

measure each decision by taking a dot product. essentially this just uses the average vote value among all voters.

- an SVD result would change the reputation vector, but without SVD its a simple uniform vector (all votes equal).

In [11]:

```
# port of GetDecisionOutcomes() https://github.com/psztorc/Truthcoin/blob/master/lib/consensus/ConsensusMechanism.r#L139
# VoterMatrix
# we're using the regular matrix here. data is not even zero-centered.
MaskedVoterMatrix = np.ma.masked_array(VoterMatrix, isnan(VoterMatrix))
matrix_mask_thingie = -MaskedVoterMatrix[...,0].mask
# not sure what the mask is for.
# corresponds to https://github.com/psztorc/Truthcoin/blob/master/pylib/consensus/consensus.py#L113-L114
row = reWeight( rep [ matrix_mask_thingie ] )
print "row:", row
col = MaskedVoterMatrix[matrix_mask_thingie, 0]
print "col:", col
decisions = []
for i in range(VoterMatrix.shape[1]):
row = reWeight( rep [ -MaskedVoterMatrix[...,i].mask ] )
col = MaskedVoterMatrix[ -MaskedVoterMatrix[...,i].mask, i]
col = np.array(col, dtype=float)
row = np.transpose(row)[0]
decisions.append(np.dot(col, row))
print "\ndecisions:"
print decisions
```

**that's the output for a simplified multi-decision resolution from votes on binary outcomes.**- map values between [0,1] to one of {0, 0.5, 1}

**in this simplified version there is no reputation or vote stake amounts. every vote is equal ([0.2, 0.2, 0.2, 0.2, 0.2])**an extended method would incorporate vote stake/deposit amounts to weigh votes. if we also add scaled/continous outcomes then the resulting consensus method would be a form of multi-decision SchellingCoin (or equivalently, TruthCoin without reputation).

- SVD operates on a covariance matrix. covariance calc needs data matrix of normalized continuous values

In [23]:

```
# normalize each feature/column to mean = 0, std = 1
# data matrix needs to be normalized to get covariance and SVD
normed = voteMatrix_pd.copy()
for col in features:
#normed[col] = normed[col].apply(lambda x: (x - normed[col].mean()) / normed[col].std())
normed[col] = normed[col].apply(lambda x: (x - normed[col].mean()))
# normed data matrix is only zero-centered (not auto-scaled)
print '\nNormalized dataset:'
print normed[:5]
```

this covariance calc is from the original pca example

In [24]:

```
### since our data is already normalized, cov(x1, x2) = sum(x1*x2) / num_observations
# ^^ old assumption from the original example. valid here??
cov_df = pd.DataFrame(index=features)
for colA in features:
column = []
for colB in features:
cov = normed[colA].cov(normed[colB])
column.append(cov)
cov_df[colA] = column
print 'Covariance matrix:'
print cov_df
# everybody agrees on outcomes 3 & 4 (tennis winner, mtgox solvency), so those columns have zero variance
```

** [U]: Rows are the original features and columns are the PCA 'components'. Each cell gives the 'loading' of the feature on the corresponding component. **

** [S]: Represents how much variance is explained by each component. **

In [8]:

```
# use numpy's SVD implementation
u, s, v = scipy.linalg.svd(cov_df)
print 'U: (feature loading for each component)'
print pd.DataFrame(u, index=features) # first loading
print '\nExplained variance:\n', s
firstScore = np.transpose(np.dot(cov_df, u))[0]
print "\nfirstScore:"
print firstScore
Set1 = firstScore + abs(min(firstScore))
print "\nSet1:"
print Set1
Set2 = firstScore - max(firstScore)
print "Set2:"
print Set2
```

In [10]:

```
# note on these two sets: https://github.com/psztorc/Truthcoin/blob/master/lib/consensus/ConsensusMechanism.r#L40-L51
New1 = getWeight(np.dot(Set1, voteMatrix_pd))
print "\nNew1:"
print New1
New2 = getWeight(np.dot(Set2, voteMatrix_pd))
print "New2:"
print New2
```

In [ ]:

```
```