import pandas as pd
import pandas.rpy.common as com
import numpy as np
from sklearn.feature_extraction import DictVectorizer
%load_ext autoreload
%autoreload 2
%load_ext rmagic
%matplotlib inline
%precision 2
pd.set_option('display.precision', 3)
import ndl
C:\Python27\lib\site-packages\IPython\extensions\rmagic.py:693: UserWarning: The rmagic extension in IPython is deprecated in favour of rpy2.ipython. If available, that will be loaded instead. http://rpy.sourceforge.net/ warnings.warn("The rmagic extension in IPython is deprecated in favour of "
%%R
library(ndl)
data = com.load_data('numbers')
data['Cues'] = [x.split('_') for x in data['Cues']]
data['Number'] = data['Outcomes']
data
Cues | Outcomes | Frequency | Number | |
---|---|---|---|---|
1 | [size, shape, color, 1, exactly1] | 1 | 455 | 1 |
2 | [size, shape, color, 1, 2, exactly2] | 2 | 205 | 2 |
3 | [size, shape, color, 1, 2, 3, exactly3] | 3 | 107 | 3 |
4 | [size, shape, color, 1, 2, 3, 4, exactly4] | 4 | 60 | 4 |
5 | [size, shape, color, 1, 2, 3, 4, 5, exactly5] | 5 | 50 | 5 |
6 | [size, shape, color, 1, 2, 3, 4, 5, 6, exactly6] | 6 | 36 | 6 |
7 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, exac... | 7 | 21 | 7 |
8 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, e... | 8 | 20 | 8 |
9 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | 9 | 13 | 9 |
10 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | 10 | 16 | 10 |
11 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | 11 | 3 | 11 |
12 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | 12 | 4 | 12 |
13 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | 13 | 2 | 13 |
14 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | 14 | 2 | 14 |
15 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | 15 | 4 | 15 |
15 rows × 4 columns
def activation(W):
return pd.DataFrame([ndl.activation(c,W) for c in data.Cues],index=data.index)
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data
Cues | Outcomes | Frequency | Number | |
---|---|---|---|---|
1 | [size, shape, color, 1, exactly1] | singular | 455 | 1 |
2 | [size, shape, color, 1, 2, exactly2] | plural | 205 | 2 |
3 | [size, shape, color, 1, 2, 3, exactly3] | plural | 107 | 3 |
4 | [size, shape, color, 1, 2, 3, 4, exactly4] | plural | 60 | 4 |
5 | [size, shape, color, 1, 2, 3, 4, 5, exactly5] | plural | 50 | 5 |
6 | [size, shape, color, 1, 2, 3, 4, 5, 6, exactly6] | plural | 36 | 6 |
7 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, exac... | plural | 21 | 7 |
8 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, e... | plural | 20 | 8 |
9 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | plural | 13 | 9 |
10 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | plural | 16 | 10 |
11 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | plural | 3 | 11 |
12 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | plural | 4 | 12 |
13 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | plural | 2 | 13 |
14 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | plural | 2 | 14 |
15 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | plural | 4 | 15 |
15 rows × 4 columns
W = ndl.rw(data,M=10)
A = activation(W)
A
plural | singular | |
---|---|---|
1 | 0.09 | 0.29 |
2 | 0.13 | 0.23 |
3 | 0.16 | 0.22 |
4 | 0.14 | 0.23 |
5 | 0.14 | 0.23 |
6 | 0.14 | 0.23 |
7 | 0.14 | 0.23 |
8 | 0.14 | 0.23 |
9 | 0.14 | 0.23 |
10 | 0.14 | 0.23 |
11 | 0.14 | 0.23 |
12 | 0.14 | 0.23 |
13 | 0.14 | 0.23 |
14 | 0.14 | 0.23 |
15 | 0.14 | 0.23 |
15 rows × 2 columns
With these associations, how many of the 15 items will the learner correctly label?
pd.DataFrame([data['Outcomes'], A.idxmax(1), A.idxmax(1) == data['Outcomes']], index = ['Truth', 'Prediction', 'Accurate?']).T
Truth | Prediction | Accurate? | |
---|---|---|---|
1 | singular | singular | True |
2 | plural | singular | False |
3 | plural | singular | False |
4 | plural | singular | False |
5 | plural | singular | False |
6 | plural | singular | False |
7 | plural | singular | False |
8 | plural | singular | False |
9 | plural | singular | False |
10 | plural | singular | False |
11 | plural | singular | False |
12 | plural | singular | False |
13 | plural | singular | False |
14 | plural | singular | False |
15 | plural | singular | False |
15 rows × 3 columns
np.mean(A.idxmax(1) == data['Outcomes'])
0.07
How often are they correct (using relative item frequencies)?
float(sum(data['Frequency'] * (A.idxmax(1) == data['Outcomes']))) / float(sum(data['Frequency']))
0.46
def accuracy(data, M):
W = ndl.rw(data, M=M)
A = activation(W)
return np.mean(A.idxmax(1) == data['Outcomes'])
accuracy(data, 10)
1.00
For a population of 100 learners trying to acquire the number system, what proportion are able to successfully label all 15 items given M trials?
np.mean([accuracy(data, M=10) == 1 for i in xrange(100)])
0.32
def population_accuracy(M=10, pop=100):
return np.mean([accuracy(data, M=M) == 1 for i in xrange(pop)])
MAX_TRIALS = 500
P = {}
P['sg / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
import matplotlib.pyplot as plt
plt.plot(range(1,len(P['sg / pl'])+1), P['sg / pl'], '-', linewidth=2)
plt.title('Singular / plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
<matplotlib.text.Text at 0x1789e780>
data['Outcomes'] = 'notdual'
data['Outcomes'][2] = 'dual'
data
Cues | Outcomes | Frequency | Number | |
---|---|---|---|---|
1 | [size, shape, color, 1, exactly1] | notdual | 455 | 1 |
2 | [size, shape, color, 1, 2, exactly2] | dual | 205 | 2 |
3 | [size, shape, color, 1, 2, 3, exactly3] | notdual | 107 | 3 |
4 | [size, shape, color, 1, 2, 3, 4, exactly4] | notdual | 60 | 4 |
5 | [size, shape, color, 1, 2, 3, 4, 5, exactly5] | notdual | 50 | 5 |
6 | [size, shape, color, 1, 2, 3, 4, 5, 6, exactly6] | notdual | 36 | 6 |
7 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, exac... | notdual | 21 | 7 |
8 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, e... | notdual | 20 | 8 |
9 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | notdual | 13 | 9 |
10 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | notdual | 16 | 10 |
11 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | notdual | 3 | 11 |
12 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | notdual | 4 | 12 |
13 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | notdual | 2 | 13 |
14 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | notdual | 2 | 14 |
15 | [size, shape, color, 1, 2, 3, 4, 5, 6, 7, 8, 9... | notdual | 4 | 15 |
15 rows × 4 columns
P['du / non-du'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
plt.plot(range(1,len(P['du / non-du'])+1), P['du / non-du'], '-', linewidth=2)
plt.title('Dual / non-dual distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
<matplotlib.text.Text at 0x178b8c88>
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data['Outcomes'][2] = 'dual'
P['sg / du / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
plt.plot(range(1,len(P['sg / du / pl'])+1), P['sg / du / pl'], '-', linewidth=2)
plt.title('Singular / dual / plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
<matplotlib.text.Text at 0x17af5fd0>
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data['Outcomes'][2] = 'dual'
data['Outcomes'][3] = 'trial'
P['sg / du / tr / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
plt.plot(range(1,len(P['sg / du / tr / pl'])+1), P['sg / du / tr / pl'], '-', linewidth=2)
plt.title('Singular / dual / trial / plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
<matplotlib.text.Text at 0x17b12668>
data['Outcomes'] = 'plural'
data['Outcomes'][1] = 'singular'
data['Outcomes'][2] = 'dual'
data['Outcomes'][3] = 'trial'
data['Outcomes'][4] = '4ial'
P['sg / du / tr / qu / pl'] = [population_accuracy(M=i) for i in xrange(1,MAX_TRIALS)]
plt.plot(range(1,len(P['sg / du / tr / qu / pl'])+1), P['sg / du / tr / qu / pl'], '-', linewidth=2)
plt.title('Singular / dual / trial / quadral plural distinction')
plt.xlabel('Trial Number')
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
<matplotlib.text.Text at 0x17e23c50>
A prediction is that the typology of number systems should roughly correspond to how learnable each type of number system is
for n in ('sg / pl', 'sg / du / pl', 'sg / du / tr / pl', 'du / non-du', 'sg / du / tr / qu / pl'):
plt.plot(range(1,len(P[n])+1), P[n], '-', linewidth=1.5, label=n)
plt.suptitle('Proportion of 100 learners who label all 15 items correctly')
plt.xlabel('Trials')
plt.legend(loc=(-0.55,0.5))
<matplotlib.legend.Legend at 0x17e436d8>