In [1]:

entities = {'self', 'addressee', 'other'}

1 entity referent¶

self ("me")
addressee ("you here")
other ("somebody else")

2+ entity referent¶

self, addressee ("me and you here" / inclusive we)
self, other ("me and somebody else" / exclusive we)
addressee, addressee ("the two or more of you here")
addressee, other ("one of you here and somebody else")
other, other ("the two or more of them")

3+ entity referent¶

self, addressee, addressee ("me and the two or more of you here")
self, addressee, other ("me, one of you here, and somebody else")
self, other, other ("me and two or more other people")
addressee, addressee, other ("the two or more of you and somebody else")
addressee, other, other ("one of you and two or more other people")

4+ entity referent¶

self, addressee, addressee, other ("me, the two or more of you here, and somebody else")
self, addressee, other, other ("me, one of you here, and two or more other people")
addressee, addressee, other, other ("the two or more of you here and two or more other people")

5+ entity referent¶

self, addressee, addressee, other, other ("me, the two or more of you here, and two or more other people")

There are 17 possible markers if there's no distinction between 2 entities of the same type and 3+ entities of the same type.

a dual or trial entity number could be added to have a 3-way distinction between e.g. [other, other] and [other, other, other]
another entity category besides self, addressee, and other could be added (invisible/divine entities)
multiple self referents could be included (choral we)

Also, what about the issue of mis-identifying the cue as "self" rather than "addressee" (kids calling themselves "you")?

In [2]:

from itertools import combinations, combinations_with_replacement

referents = []

for i in xrange(1, len(entities) * 2):
    for combo in combinations_with_replacement(entities, i):
        
        # choral we is impossible
        if combo.count('self') > 1:
            continue
            
        # only singular vs plural
        if combo.count('addressee') > 2:
            continue
            
        if combo.count('other') > 2:
            continue
            
        # compound cues
        referent = list(combo)
        
        for j in xrange(2, len(combo) + 1):
            for compound in combinations(combo, j):
                
                if compound not in referent:
                    referent.append(compound)
            
        referents.append(referent)

In [3]:

len(referents)

Out[3]:

In [4]:

referents

Out[4]:

[['addressee'],
 ['self'],
 ['other'],
 ['addressee', 'addressee', ('addressee', 'addressee')],
 ['addressee', 'self', ('addressee', 'self')],
 ['addressee', 'other', ('addressee', 'other')],
 ['self', 'other', ('self', 'other')],
 ['other', 'other', ('other', 'other')],
 ['addressee',
  'addressee',
  'self',
  ('addressee', 'addressee'),
  ('addressee', 'self'),
  ('addressee', 'addressee', 'self')],
 ['addressee',
  'addressee',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'other'),
  ('addressee', 'addressee', 'other')],
 ['addressee',
  'self',
  'other',
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('addressee', 'self', 'other')],
 ['addressee',
  'other',
  'other',
  ('addressee', 'other'),
  ('other', 'other'),
  ('addressee', 'other', 'other')],
 ['self',
  'other',
  'other',
  ('self', 'other'),
  ('other', 'other'),
  ('self', 'other', 'other')],
 ['addressee',
  'addressee',
  'self',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('addressee', 'addressee', 'self'),
  ('addressee', 'addressee', 'other'),
  ('addressee', 'self', 'other'),
  ('addressee', 'addressee', 'self', 'other')],
 ['addressee',
  'addressee',
  'other',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'other'),
  ('other', 'other'),
  ('addressee', 'addressee', 'other'),
  ('addressee', 'other', 'other'),
  ('addressee', 'addressee', 'other', 'other')],
 ['addressee',
  'self',
  'other',
  'other',
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('other', 'other'),
  ('addressee', 'self', 'other'),
  ('addressee', 'other', 'other'),
  ('self', 'other', 'other'),
  ('addressee', 'self', 'other', 'other')],
 ['addressee',
  'addressee',
  'self',
  'other',
  'other',
  ('addressee', 'addressee'),
  ('addressee', 'self'),
  ('addressee', 'other'),
  ('self', 'other'),
  ('other', 'other'),
  ('addressee', 'addressee', 'self'),
  ('addressee', 'addressee', 'other'),
  ('addressee', 'self', 'other'),
  ('addressee', 'other', 'other'),
  ('self', 'other', 'other'),
  ('addressee', 'addressee', 'self', 'other'),
  ('addressee', 'addressee', 'other', 'other'),
  ('addressee', 'self', 'other', 'other'),
  ('addressee', 'addressee', 'self', 'other', 'other')]]

Spoken English collapses these to 6 possibilities: I, you, s/he, we, you guys, they

In [5]:

def english(referents):
    # first-person
    if 'self' in referents:
        
        if 'addressee' in referents: # inclusive we
            # doesn't matter who else is being referred to
            return 'we'
        
        if 'other' in referents: # exclusive we
            # doesn't matter who else is being referred to
            return 'we'    
            
        return 'I'
    
    # second-person, if the speaker isn't included
    elif 'addressee' in referents:
        
        if referents.count('addressee') > 1: # inclusive you
            return 'you guys'
        
        if 'other' in referents: # exclusive you
            return 'you guys'
        
        return 'you'
    
    # third-person, if the addressee isn't included either
    elif 'other' in referents:
        
        if referents.count('other') > 1:
            return 'they'
        
        return 's/he'

In [6]:

english(['self', 'addressee'])

Out[6]:

'we'

In [7]:

english(['self', 'other'])

Out[7]:

'we'

In [8]:

english(['addressee', 'other'])

Out[8]:

'you guys'

In [9]:

english(['addressee', 'addressee']) # also ('addressee', 'addressee') compound

Out[9]:

'you guys'

In [10]:

import pandas

data = pandas.DataFrame()

data['Cues'] = referents
data['Outcomes'] = [english(referent) for referent in referents]
data

Out[10]:

	Cues	Outcomes
0	[addressee]	you
1	[self]	I
2	[other]	s/he
3	[addressee, addressee, (addressee, addressee)]	you guys
4	[addressee, self, (addressee, self)]	we
5	[addressee, other, (addressee, other)]	you guys
6	[self, other, (self, other)]	we
7	[other, other, (other, other)]	they
8	[addressee, addressee, self, (addressee, addre...	we
9	[addressee, addressee, other, (addressee, addr...	you guys
10	[addressee, self, other, (addressee, self), (a...	we
11	[addressee, other, other, (addressee, other), ...	you guys
12	[self, other, other, (self, other), (other, ot...	we
13	[addressee, addressee, self, other, (addressee...	we
14	[addressee, addressee, other, other, (addresse...	you guys
15	[addressee, self, other, other, (addressee, se...	we
16	[addressee, addressee, self, other, other, (ad...	we

17 rows × 2 columns

Assume that the distribution of referent sets is uniform, which is probably not true.

In [11]:

import numpy

def sampler(p):
    
    def uniform():
        return numpy.random.choice(p)
    
    return uniform

referent_sampler = sampler(len(data))

In [12]:

import ndl

def activation(W):
    return pandas.DataFrame([ndl.activation(c, W) for c in data.Cues], index=data.index)

In [13]:

W = ndl.rw(data, M=100, distribution=referent_sampler)
A = activation(W)
A

Out[13]:

	I	s/he	they	we	you	you guys
0	-0.000848	-0.004426	-0.010796	0.094586	0.051966	0.115364
1	0.038021	-0.004950	-0.012859	0.244965	-0.003339	-0.044676
2	-0.000927	0.055316	0.066743	0.114067	-0.002913	0.074110
3	-0.001350	-0.006548	-0.015506	0.147016	0.048047	0.193614
4	0.036280	-0.010656	-0.027279	0.482914	0.044615	0.028538
5	-0.002198	0.044708	0.041887	0.229084	0.045385	0.275810
6	0.036122	0.043941	0.038201	0.528713	-0.007516	-0.007599
7	-0.001687	0.048333	0.142420	0.160588	-0.004411	0.100827
8	0.035268	-0.013435	-0.033228	0.616866	0.038504	0.078863
9	-0.002760	0.039354	0.030369	0.304269	0.039766	0.394813
10	0.033516	0.029490	0.003721	0.845571	0.034954	0.119107
11	-0.003384	0.035624	0.107613	0.277364	0.041903	0.337897
12	0.034578	0.034353	0.102137	0.654354	-0.009695	-0.001124
13	0.032382	0.021968	-0.011912	1.048176	0.026274	0.188487
14	-0.004007	0.029325	0.091090	0.359332	0.035411	0.474638
15	0.031102	0.017140	0.053163	0.995321	0.029755	0.143911
16	0.029846	0.008438	0.030560	1.225235	0.019831	0.220925

17 rows × 6 columns

In [14]:

pandas.DataFrame([data['Outcomes'], A.idxmax(1), A.idxmax(1) == data['Outcomes']], 
                 index = ['Truth', 'Prediction', 'Accurate?']).T

Out[14]:

	Truth	Prediction	Accurate?
0	you	you guys	False
1	I	we	False
2	s/he	we	False
3	you guys	you guys	True
4	we	we	True
5	you guys	you guys	True
6	we	we	True
7	they	we	False
8	we	we	True
9	you guys	you guys	True
10	we	we	True
11	you guys	you guys	True
12	we	we	True
13	we	we	True
14	you guys	you guys	True
15	we	we	True
16	we	we	True

17 rows × 3 columns

With 100 trials, the learner is getting a lot of them right, but just by predicting 'you guys' or 'we' (if self is a referent) all of the time, since those cover most of the referent sets.

In [15]:

import sim

In [16]:

english_learning = sim.Simulation(english, data, referent_sampler, 2000)

In [17]:

import matplotlib.pyplot as plt
%matplotlib inline

In [18]:

trajectory = [english_learning.accuracy(i) for i in xrange(1, english_learning.MAX_M)]

plt.plot(range(1, len(trajectory) + 1), trajectory, '-')
plt.xlabel('Trial Number')

Out[18]:

<matplotlib.text.Text at 0xf6fb198>

In [19]:

%load_ext rpy2.ipython

%Rpush trajectory

In [20]:

%%R

trajectory = data.frame(trial=1:length(trajectory), learned=trajectory)

library('ggplot2')

ggplot(trajectory, aes(trial, learned)) + 
    geom_point(alpha=0.25) + 
    stat_smooth() +
    coord_cartesian(ylim=c(0,1))

In [ ]: