0. Loading the data¶

The data was exported from an Excel file, in Open Office, with "Save As"->"Text CSV", as "UTF-8", "{Tabulator}" as field seperator and double quotes (") as text seperator.

In [1]:

%load_ext autoreload
%autoreload 2

In [2]:

import helpers.diana

skip_lines = [
    "Satire_and_humor_01_Two travellers",
    "Satire_and_humor_02_The king's dispute",
    "Satire_and_humor_03_King and slave",
    "Satire_and_humor_04_Herdsman and Shamkhal",
    "Satire_and_humor_05_Bratak and the rich",
    "Satire_and_humor_06_Shephard and khan",
    "Satire_and_humor_07_Bolo and the old woman",
    "Satire_and_humor_08_King and fool",
    "Satire_and_humor_09_Shax Abbas and the widower's son",
    "Satire_and_humor_10_Shakh Abbas and the mother",
    "Satire_and_humor_11_Shakh Abbas and the man",
    "Satire_and_humor_12_Master and boat",
    "Satire_and_humor_13_Craft of the smith",
    "Satire_and_humor_14_Mulla and suslik",
    "Satire_and_humor_15_Mullah and the poor",
    "Satire_and_humor_16_Mention me in the prayer",
    "Satire_and_humor_17_Big alkham",
    "Satire_and_humor_18_Chief and burried",
    "Satire_and_humor_19_Khinkal in paradise",
    "Satire_and_humor_20_The horse's pace",
    "Satire_and_humor_21_Cat",
    "Satire_and_humor_22_Man_bought_kuvshin",
    "Satire_and_humor_23_Crying for the son",
    "Satire_and_humor_24_The escaping hare",
    "Satire_and_humor_25_broken",
    "Satire_and_humor_26_reversed legs",
    "Satire_and_humor_27_friendship with the snow",
    "Satire_and_humor_28_Man and bird",
    "nice reflexive, but not clear whether the second one is in a relative clause or in the main clause",
    "looks like reflexive in a postpositional phrase"
]
tier_numbers = {
    "clause_id": 5,
    "clause_type": 6,
    "grammatical_relation": 7,
    "pos_agreement": 9,
    "last_line": 11
}
ag = helpers.diana.from_excel("data/AvarAnnotation.csv", skip_lines=skip_lines, tier_numbers=tier_numbers)

Error: duplicate clause ID: #3
Error: duplicate clause ID: #627
Error: duplicate clause ID: #628

1. Linear order¶

In [3]:

import collections

verbs = [ 'COP', 'SAY', 'v.tr', 'v.intr', 'v.aff' ]
verb_map = { v: "V" for v in verbs}
others = [ 'A', 'S', 'P', 'EXP', 'STIM', 'zero-A', 'zero-S', 'zero-P', 'zero-EXP', 'zero-STIM' ]
search_terms = verbs + others

word_orders = collections.defaultdict(int)
word_orders_ids = collections.defaultdict(list)

for wo in helpers.diana.word_orders(ag, search_terms):
    word_orders[tuple(wo.word_order)] += 1
    word_orders_ids[tuple(wo.word_order)].append(wo.clause_id)

for word_order, count in word_orders.items():
    print("{0} => {1}".format(word_order, count))
    if count < 5:
        print("    {1}".format(word_order, word_orders_ids[word_order]))

('S', 'v.intr') => 172
('SAY', 'A') => 50
('EXP', 'zero-STIM', 'v.aff') => 4
    ['clause_id..n#78', 'clause_id..n#611', 'clause_id..n#64', 'clause_id..n#57']
('zero-EXP', 'STIM', 'v.aff') => 10
('zero-EXP', 'zero-STIM', 'v.aff') => 2
    ['clause_id..n#706', 'clause_id..n#295']
('zero-S', 'v.intr') => 125
('A', 'SAY') => 61
('EXP', 'STIM', 'v.aff') => 13
('v.aff', 'EXP', 'STIM') => 1
    ['clause_id..n#270']
('STIM', 'v.aff', 'EXP') => 3
    ['clause_id..n#97', 'clause_id..n#98', 'clause_id..n#417']
('v.intr', 'S') => 34
('zero-S', 'COP') => 20
('v.tr', 'P', 'A') => 1
    ['clause_id..n#443']
('SAY', 'zero-A') => 2
    ['clause_id..n#244', 'clause_id..n#688']
('zero-A', 'v.tr', 'zero-P') => 1
    ['clause_id..n#120']
('zero-A', 'P', 'v.tr') => 134
('STIM', 'EXP', 'v.aff') => 4
    ['clause_id..n#359', 'clause_id..n#99', 'clause_id..n#52', 'clause_id..n#997']
('zero-P', 'v.tr', 'A') => 3
    ['clause_id..n#880', 'clause_id..n#236', 'clause_id..n#274']
('A', 'P', 'v.tr') => 85
('P', 'v.tr', 'A') => 16
('P', 'v.tr', 'zero-A') => 1
    ['clause_id..n#6']
('S', 'COP') => 101
('v.tr', 'A', 'P') => 10
('A', 'zero-P', 'v.tr') => 17
('P', 'zero-A', 'v.tr') => 1
    ['clause_id..n#185']
('zero-A', 'SAY') => 27
('A', 'v.tr', 'P') => 12
('zero-A', 'zero-P', 'v.tr') => 41
('COP', 'S') => 15
('zero-A', 'v.tr', 'P') => 14
('P', 'A', 'v.tr') => 13
('EXP', 'v.aff', 'STIM') => 6

1.1. Are subordinate clauses significantly more often verb-final than main clauses?¶

In [4]:

word_orders_main = []
word_orders_main_count = collections.defaultdict(int)
word_orders_sub = []
word_orders_sub_count = collections.defaultdict(int)
main_clause_types = [ "m", "m.rs" ]
sub_clause_types = [ "sub", "sub.rs" ]
clause_types = main_clause_types + sub_clause_types
search_terms = verbs + ['A', 'S', 'P', 'EXP', 'STIM']

for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    if "V" in wo.word_order and wo.clause_type in clause_types and len(wo.word_order) > 1:
        if wo.clause_type in sub_clause_types:
            word_orders_sub.append(wo.word_order)
            word_orders_sub_count[tuple(wo.word_order)] += 1
        else:
            word_orders_main.append(wo.word_order)
            word_orders_main_count[tuple(wo.word_order)] += 1 

Statistical test¶

Hypothesis H0: It does not depend on the clause type (sub vs. main) whether the clause unit is verb final.

In [6]:

main_v_fin = 0; main_v_nonfin = 0; sub_v_fin = 0; sub_v_nonfin = 0;
for wo, c in word_orders_main_count.items():
    if wo[-1] == "V":
        main_v_fin += c
    else:
        main_v_nonfin += c
for wo, c in word_orders_sub_count.items():
    if wo[-1] == "V":
        sub_v_fin += c
    else:
        sub_v_nonfin += c
cont_table = [ [main_v_fin, main_v_nonfin], [sub_v_fin, sub_v_nonfin] ]
cont_table

Out[6]:

[[454, 149], [160, 15]]

In [7]:

import scipy.stats
oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[7]:

1.0194909237201304e-06

We reject the null hypothesis, as the chances of getting a distribution as the observed one are p < 0.05. The clause type affects the verb "finalness". In this case subordinate clauses have a sifgnificant higher count of verb final word orders.

1.2. How differ main clauses and subordinate clauses in their word orders?¶

Here are are the basic counts:

In [8]:

print("Counts for main clauses:")
for wo, c in word_orders_main_count.items():
    print("{0} => {1}".format(wo, c))
print("\nCounts for sub clauses:")
for wo, c in word_orders_sub_count.items():
    print("{0} => {1}".format(wo, c))

Counts for main clauses:
('V', 'A') => 51
('V', 'P') => 12
('STIM', 'EXP', 'V') => 3
('V', 'P', 'A') => 1
('STIM', 'V') => 7
('P', 'V', 'A') => 14
('S', 'V') => 215
('EXP', 'V') => 4
('A', 'V') => 70
('A', 'V', 'P') => 10
('EXP', 'STIM', 'V') => 11
('V', 'S') => 42
('V', 'A', 'P') => 10
('P', 'A', 'V') => 9
('EXP', 'V', 'STIM') => 5
('A', 'P', 'V') => 73
('P', 'V') => 62
('V', 'EXP', 'STIM') => 1
('STIM', 'V', 'EXP') => 3

Counts for sub clauses:
('V', 'A') => 1
('V', 'P') => 2
('STIM', 'EXP', 'V') => 1
('P', 'V', 'A') => 2
('STIM', 'V') => 3
('A', 'P', 'V') => 11
('S', 'V') => 57
('A', 'V') => 8
('EXP', 'STIM', 'V') => 2
('V', 'S') => 7
('EXP', 'V', 'STIM') => 1
('A', 'V', 'P') => 2
('P', 'V') => 74
('P', 'A', 'V') => 4

1.3. Where are G, BEN, G, TIME, LOC, ADD usually positioned? (e.g. before or after the verb)¶

In [9]:

particles = [ 'G', 'BEN', 'TIME', 'LOC', 'ADD' ]
pos_counts = [ [0, 0] for _ in particles ]
search_terms = verbs + particles

for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    for i, p in enumerate(particles):
        if "V" in wo.word_order and p in wo.word_order:
            if wo.word_order.index("V") < wo.word_order.index(p):
                pos_counts[i][0] += 1
            else:
                pos_counts[i][1] += 1
                
for i, p in enumerate(particles):
    print(p)
    print("    Count after verb:  {0}".format(pos_counts[i][0]))
    print("    Count before verb: {0}".format(pos_counts[i][1]))

G
    Count after verb:  25
    Count before verb: 155
BEN
    Count after verb:  9
    Count before verb: 39
TIME
    Count after verb:  5
    Count before verb: 64
LOC
    Count after verb:  12
    Count before verb: 55
ADD
    Count after verb:  22
    Count before verb: 22

1.4. In main clauses: are BEN arguments significantly more often positioned before the verb than after the verb?¶

In [10]:

particles = [ 'BEN', 'G', 'ADD' ]
pos_counts = [ [0, 0] for _ in particles ]
search_terms = verbs + particles

before = 0
after = 0
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    for i, p in enumerate(particles):
        if wo.clause_type in main_clause_types and "V" in wo.word_order and p in wo.word_order:
            if wo.word_order.index("V") < wo.word_order.index(p):
                pos_counts[i][0] += 1
            else:
                pos_counts[i][1] += 1

for i, p in enumerate(particles):
    print(p)
    print("    Count after verb:  {0}".format(pos_counts[i][0]))
    print("    Count before verb: {0}".format(pos_counts[i][1]))

BEN
    Count after verb:  8
    Count before verb: 31
G
    Count after verb:  23
    Count before verb: 90
ADD
    Count after verb:  20
    Count before verb: 20

Statistical test¶

I am using a binomial test here: http://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.binom_test.html#scipy.stats.binom_test

In [11]:

part_sum = [ 0, 0 ]
for i, p in enumerate(particles):
    part_sum[0] += pos_counts[i][0]
    part_sum[1] += pos_counts[i][1]
    print("Test for '{0}'".format(p))
    print(scipy.stats.binom_test(pos_counts[i]))
print("Test for 'BEN+G+ADD'")
print(scipy.stats.binom_test(part_sum))

Test for 'BEN'
0.000294076875434
Test for 'G'
1.48849064758e-10
Test for 'ADD'
1.0
Test for 'BEN+G+ADD'
5.94769311956e-11

Except for "ADD" it is very unlikely that those counts are random. So the difference for "BEN" and "G" and "BEN+G+ADD" is significant.

1.5. In main and sub clauses: Are BEN + G + ADD significantly more often occurring after the verb than A or P or LOC, A + P or A + P + LOC?¶

In [12]:

particles = [ 'BEN', 'G', 'ADD', 'A', 'P', 'LOC' ]
pos_counts = [ [0, 0] for _ in particles ]
search_terms = verbs + particles

before = 0
after = 0
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    for i, p in enumerate(particles):
        if "V" in wo.word_order and p in wo.word_order:
            if wo.word_order.index("V") < wo.word_order.index(p):
                pos_counts[i][0] += 1
            else:
                pos_counts[i][1] += 1

Hypothesis H0: It does not depend on the grammatical relation type if a participant appears before or after the verb.

For the test we use the Fisher exact test, as this test also works for small numbers (http://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.stats.fisher_exact.html).

BEN + G + ADD vs. A¶

In [13]:

BEN_G_ADD = [ 0, 0 ]
BEN_G_ADD[0] = pos_counts[0][0] + pos_counts[1][0] + pos_counts[2][0]
BEN_G_ADD[1] = pos_counts[0][1] + pos_counts[1][1] + pos_counts[2][1]
cont_table = [ BEN_G_ADD, pos_counts[3] ]
cont_table

Out[13]:

[[56, 216], [80, 188]]

In [14]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[14]:

0.013473954825883712

We reject the null hpythesis H0 because p < 0.05. The grammatical relation type does have influence on whether a participant appears before or after the verb. In this case, A appears significantly more often before the verb then BEN + G + ADD.

BEN + G + ADD vs. P¶

In [15]:

cont_table = [ BEN_G_ADD, pos_counts[4] ]
cont_table

Out[15]:

[[56, 216], [37, 250]]

In [16]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[16]:

0.016904389278624213

Again, we reject the null hypothesis because p < 0.05. P occurs more often before the verb then BEN + G + ADD.

BEN + G + ADD vs. LOC¶

In [17]:

cont_table = [ BEN_G_ADD, pos_counts[5] ]
cont_table

Out[17]:

[[56, 216], [12, 55]]

In [18]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[18]:

0.73423084920817272

We cannot reject the null hypothesis because p > 0.05. LOC does not occurs more often before the verb then BEN + G + ADD.

BEN + G + ADD vs. A + P¶

In [19]:

A_P = [ 0, 0 ]
A_P[0] = pos_counts[3][0] + pos_counts[4][0]
A_P[1] = pos_counts[3][1] + pos_counts[4][1]
cont_table = [ BEN_G_ADD, A_P ]
cont_table

Out[19]:

[[56, 216], [117, 438]]

In [20]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[20]:

0.92756787940652896

We cannot reject H0. There is no difference between BEN + G + ADD vs. A + P.

BEN + G + ADD vs. A + P + LOC¶

In [21]:

A_P_LOC = [ 0, 0 ]
A_P_LOC[0] = pos_counts[3][0] + pos_counts[4][0] + pos_counts[5][0]
A_P_LOC[1] = pos_counts[3][1] + pos_counts[4][1] + pos_counts[5][1]
cont_table = [ BEN_G_ADD, A_P_LOC ]
cont_table

Out[21]:

[[56, 216], [129, 493]]

In [22]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[22]:

1.0

We cannot reject H0. There is no difference between BEN + G + ADD vs. A + P + LOC

1.5.1. In main clauses only: BEN vs. P, ADD vs. P, BEN + ADD vs. P, A vs. P, A vs. S¶

In [23]:

particles = [ 'BEN', 'G', 'ADD', 'A', 'P', 'LOC', 'S' ]
pos_counts = [ [0, 0] for _ in particles ]
search_terms = verbs + particles

before = 0
after = 0
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    for i, p in enumerate(particles):
        if wo.clause_type in main_clause_types and "V" in wo.word_order and p in wo.word_order:
            if wo.word_order.index("V") < wo.word_order.index(p):
                pos_counts[i][0] += 1
            else:
                pos_counts[i][1] += 1

BEN vs. P¶

In [24]:

cont_table = [ pos_counts[0], pos_counts[4] ]
cont_table

Out[24]:

[[8, 31], [33, 158]]

In [25]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[25]:

0.64794235656672949

We cannot reject H0. There is no significant difference between BEN and P if they appear before or after the verb.

ADD vs. P¶

In [26]:

cont_table = [ pos_counts[2], pos_counts[4] ]
cont_table

Out[26]:

[[20, 20], [33, 158]]

In [27]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[27]:

3.8451757980430857e-05

We can reject H0. P occurs more often before the verb then ADD.

BEN + ADD vs. P¶

In [28]:

BEN_ADD = [ 0, 0 ]
BEN_ADD[0] = pos_counts[0][0] + pos_counts[2][0]
BEN_ADD[1] = pos_counts[0][1] + pos_counts[2][1]
cont_table = [ BEN_ADD, pos_counts[4] ]
cont_table

Out[28]:

[[28, 51], [33, 158]]

In [29]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[29]:

0.0021392471158436389

We reject reject H0 as p < 0.05. P appears significantly more often before the verb then BEN + ADD.

A vs. P¶

In [30]:

cont_table = [ pos_counts[3], pos_counts[4] ]
cont_table

Out[30]:

[[76, 162], [33, 158]]

In [31]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[31]:

0.00053348950732782618

We reject H0 as p < 0.05. P apppears significantly more often before the verb than P.

A vs. S¶

In [32]:

cont_table = [ pos_counts[3], pos_counts[5] ]
cont_table

Out[32]:

[[76, 162], [9, 36]]

In [33]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[33]:

0.15502514539950046

We cannot reject H0. There is no significant difference between A and S if they appear before or after the verb.

1.6. SAY: Does SAY have more often an overtly expressed A than all other transitive verbs?¶

Hypothesis 1 (H0): It does not depend on the type of the verb (SAY vs. others) if the A is expressed ouvertly.

For the test we use the Fisher exact test, as this test also works for small numbers (http://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.stats.fisher_exact.html).

In [34]:

other_verbs = [ 'COP', 'v.tr', 'v.intr', 'v.aff' ]
search_terms = other_verbs + [ 'SAY', 'A', 'zero-A' ]
verb_map = { v: "V" for v in other_verbs }
SAY_counts = [ 0, 0 ]
others_counts = [ 0, 0 ]
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    if 'SAY' in wo.word_order:
        if 'A' in wo.word_order:
            SAY_counts[0] += 1
        elif 'zero-A' in wo.word_order:
            SAY_counts[1] += 1
    if 'V' in wo.word_order:
        if 'A' in wo.word_order:
            others_counts[0] += 1
        elif 'zero-A' in wo.word_order:
            others_counts[1] += 1

In [35]:

cont_table = [ SAY_counts, others_counts ]
cont_table

Out[35]:

[[111, 29], [157, 192]]

In [36]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[36]:

2.0383277596699758e-12

We reject the null hypothesis because p < 0.05. A is more often ouvert in SAY sentences than in any other sentence with other verb types.

1.7. Does the A of say more often follow its verb than the A of all other transitive verbs?¶

H0: It does not depend on the verb type (A vs. others) if A is before or after the verb.

In [37]:

search_terms = other_verbs + [ 'SAY', 'A' ]
SAY_counts = [ 0, 0 ]
others_counts = [ 0, 0 ]
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    if 'SAY' in wo.word_order and 'A' in wo.word_order:
        if wo.word_order.index("SAY") < wo.word_order.index("A"):
            SAY_counts[0] += 1
        else:
            SAY_counts[1] += 1
    if "V" in wo.word_order and "A" in wo.word_order:
        if wo.word_order.index("V") < wo.word_order.index("A"):
            others_counts[0] += 1
        else:
            others_counts[1] += 1

In [38]:

cont_table = [ SAY_counts, others_counts ]
cont_table

Out[38]:

[[50, 61], [30, 127]]

In [39]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[39]:

6.633759085937411e-06

We reject the null hypothesis as p < 0.05. A does significantly more often preceed the verb in non-SAY sentences.

1.8. Does the overt A of SAY more often precede or follow the verb (and what are the frequencies)?¶

In [40]:

print("A after SAY: {0}".format(SAY_counts[0]))
print("A before SAY: {0}".format(SAY_counts[1]))

A after SAY: 50
A before SAY: 61

2. Agreement¶

2.1. How frequent is agreement in texts (in main and subordinate clauses)?¶

In [41]:

verbs = [ 'v.tr', 'v.intr', 'v.aff' ]
agreements = [0, 0]; noagreements = [0, 0];
for wo in helpers.diana.word_orders(ag, verbs, with_agreement = True):
    for agr in wo.agreement:
        if wo.clause_type in sub_clause_types:
            if agr == "noagr":
                noagreements[0] += 1
            else:
                agreements[0] += 1
        else:
            if agr == "noagr":
                noagreements[1] += 1
            else:
                agreements[1] += 1

print("I found {0} verbs with and {1} verbs without agreement.".format(agreements[0]+agreements[1], noagreements[0]+noagreements[1]))
print("In main clauses: I found {0} verbs with and {1} verbs without agreement.".format(agreements[1], noagreements[1]))
print("In sub clauses: I found {0} verbs with and {1} verbs without agreement.".format(agreements[0], noagreements[0]))

no agreement annotation in clause unit clause_id..n#737 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#718 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#679 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#131 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#645 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#465 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#666 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#370 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#762 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#377 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#268 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#274 for grammatical relation 'v.tr'
I found 397 verbs with and 314 verbs without agreement.
In main clauses: I found 280 verbs with and 191 verbs without agreement.
In sub clauses: I found 117 verbs with and 123 verbs without agreement.

2.2. Do non-agreeing verbs occur more often with overtly expressed S or P or STIM arguments than agreeing verbs (because the agreement prefixes are enough to track the reference)?¶

Hypothesis H0: It does not depend on the verbal agreement if S (or P or STIM) arguments are expressed overtly.

In [42]:

search_terms = verbs + [ 'S', 'P', 'STIM',  'zero-S', 'zero-P', 'zero-STIM' ]
cont_table_S = [ [0, 0], [0, 0] ]
cont_table_P = [ [0, 0], [0, 0] ]
cont_table_STIM = [ [0, 0], [0, 0] ]
cont_table_S_P_STIM =  [ [0, 0], [0, 0] ]
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    agreeing = 0
    if len(wo.word_order) != len(wo.agreement):
        continue
    for i, w in enumerate(wo.word_order):
        if w in verbs:
            if wo.agreement[i] == "noagr":
                agreeing = 1
    if "zero-S" in wo.word_order:
        cont_table_S[agreeing][0] += 1
        cont_table_S_P_STIM[agreeing][0] += 1
    elif "S" in wo.word_order:
        cont_table_S[agreeing][1] += 1
        cont_table_S_P_STIM[agreeing][1] += 1
    if "zero-P" in wo.word_order:
        cont_table_P[agreeing][0] += 1
        cont_table_S_P_STIM[agreeing][0] += 1
    elif "P" in wo.word_order:
        cont_table_P[agreeing][1] += 1
        cont_table_S_P_STIM[agreeing][1] += 1
    if "zero-STIM" in wo.word_order:
        cont_table_STIM[agreeing][0] += 1
        cont_table_S_P_STIM[agreeing][0] += 1
    elif "STIM" in wo.word_order:
        cont_table_STIM[agreeing][1] += 1
        cont_table_S_P_STIM[agreeing][1] += 1

no agreement annotation in clause unit clause_id..n#47 for grammatical relation 'S'
no agreement annotation in clause unit clause_id..n#737 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#718 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#679 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#131 for grammatical relation 'S'
no agreement annotation in clause unit clause_id..n#131 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#645 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#275 for grammatical relation 'P'
no agreement annotation in clause unit clause_id..n#465 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#666 for grammatical relation 'S'
no agreement annotation in clause unit clause_id..n#666 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#370 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#762 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#762 for grammatical relation 'S'
no agreement annotation in clause unit clause_id..n#377 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#268 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#274 for grammatical relation 'v.tr'

Statistical test for S¶

In [43]:

cont_table_S

Out[43]:

[[81, 216], [64, 101]]

In [44]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table_S)
pvalue

Out[44]:

0.012097485903857692

We reject the null hypothesis, because p < 0.05. S is signifantly more often overt when there is agreement.

Statistical test for P¶

In [45]:

cont_table_P

Out[45]:

[[38, 164], [22, 116]]

In [46]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table_P)
pvalue

Out[46]:

0.56320828600188433

We cannot reject H0 as p > 0.05. There is no influence of agreement on overt P (or vice versa).

Staistical test for STIM¶

In [47]:

cont_table_STIM

Out[47]:

[[5, 27], [1, 10]]

In [48]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table_STIM)
pvalue

Out[48]:

1.0

Here it is not possible to calculate, as the number of non-agreements is too low. Practically all verbs that have a STIM argument show agreement, whether the argument is expressed or not.

Statistical test for S + P + STIM¶

In [49]:

cont_table_S_P_STIM

Out[49]:

[[124, 407], [87, 227]]

In [50]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table_S_P_STIM)
pvalue

Out[50]:

0.16283038815723336

We cannot reject the hypothesis as p > 0.05. It does not depend on the verbal agreement whether S + P + STIM is expressed overtly.

2.3. Differences between the prefixes or not?¶

2.3.1. How often do the prefixes occur at all in texts¶

In [51]:

verbs = [ 'v.tr', 'v.intr', 'v.aff' ]
agreement_sum = collections.defaultdict(int)
for wo in helpers.diana.word_orders(ag, verbs, with_agreement = True):
    for agr in wo.agreement:
        agreement_sum[agr] += 1
for agr, count in agreement_sum.items():
    print("{} => {}".format(agr, count))

no agreement annotation in clause unit clause_id..n#737 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#718 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#679 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#131 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#645 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#465 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#666 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#370 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#762 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#377 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#268 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#274 for grammatical relation 'v.tr'
b+b-3 => 8
v.tr => 1
w+w-1 => 10
y-1 => 1
l-pl => 1
r-pl => 37
l+r-pl => 1
y+y-2 => 2
y-2 => 4
w-1 => 107
b-3 => 223
r+l-pl => 1
noagrt => 1
noagr => 314

2.3.2. How often do the prefixes occur with overt arguments¶

We check here first, if the agreement makes sense, i.e. whether the class marker on P, S or STIM is the same as on the verb. All other cases are printed with clause ID, to check manually.

In [52]:

verbs = [ 'v.tr', 'v.intr', 'v.aff' ]
search_terms = verbs + [ 'S', 'P', 'STIM', 'zero-S', 'zero-P', 'zero-STIM' ]
agreements = collections.defaultdict(int)
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    v_class = None; n_class = None; v_marker = None; n_type = None;
    agreement = False
    zero = False
    if len(wo.word_order) != len(wo.agreement):
        print("length on blue and yellow line different in ID {}".format(wo.clause_id))
        continue
    for i, w in enumerate(wo.word_order):
        if w in verbs:
            if wo.agreement[i] != "noagr":
                agreement = True
                if "-" in wo.agreement[i]:
                    v_marker, v_class = wo.agreement[i].split("-")
                else:
                    print("no dash in v agr in ID {}".format(wo.clause_id))
        else:
            if "-" in wo.agreement[i]:
                n_split = wo.agreement[i].split("-")
                if len(n_split) > 2:
                    print("more than one dash in n agr in ID {}".format(wo.clause_id))
                n_class = n_split[1]
                #n_type = n_split[1]
                if "." in n_class:
                    n_class, _ = n_class.split(".")
            else:
                print("no dash in n agr in ID {}".format(wo.clause_id))
            if w.startswith("zero-"):
                zero = True
                
    if v_class != n_class and agreement:
        print("n class does not equal v class in ID {} (n_class: {} vs. v_class: {})".format(wo.clause_id, n_class, v_class))
    elif v_class is not None and n_class is not None and not zero:
            agreements["{}-{}".format(v_marker, v_class)] += 1

no dash in n agr in ID clause_id..n#2
no agreement annotation in clause unit clause_id..n#47 for grammatical relation 'S'
length on blue and yellow line different in ID clause_id..n#47
n class does not equal v class in ID clause_id..n#978 (n_class: 2 vs. v_class: 1)
no agreement annotation in clause unit clause_id..n#737 for grammatical relation 'v.intr'
length on blue and yellow line different in ID clause_id..n#737
no agreement annotation in clause unit clause_id..n#718 for grammatical relation 'v.tr'
length on blue and yellow line different in ID clause_id..n#718
no agreement annotation in clause unit clause_id..n#679 for grammatical relation 'v.tr'
length on blue and yellow line different in ID clause_id..n#679
no dash in v agr in ID clause_id..n#373
n class does not equal v class in ID clause_id..n#373 (n_class: 3 vs. v_class: None)
no agreement annotation in clause unit clause_id..n#131 for grammatical relation 'S'
no agreement annotation in clause unit clause_id..n#131 for grammatical relation 'v.intr'
length on blue and yellow line different in ID clause_id..n#131
no agreement annotation in clause unit clause_id..n#645 for grammatical relation 'v.tr'
length on blue and yellow line different in ID clause_id..n#645
no agreement annotation in clause unit clause_id..n#275 for grammatical relation 'P'
length on blue and yellow line different in ID clause_id..n#275
no agreement annotation in clause unit clause_id..n#465 for grammatical relation 'v.tr'
length on blue and yellow line different in ID clause_id..n#465
no agreement annotation in clause unit clause_id..n#666 for grammatical relation 'S'
no agreement annotation in clause unit clause_id..n#666 for grammatical relation 'v.intr'
length on blue and yellow line different in ID clause_id..n#666
no agreement annotation in clause unit clause_id..n#370 for grammatical relation 'v.tr'
length on blue and yellow line different in ID clause_id..n#370
no agreement annotation in clause unit clause_id..n#762 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#762 for grammatical relation 'S'
length on blue and yellow line different in ID clause_id..n#762
no agreement annotation in clause unit clause_id..n#377 for grammatical relation 'v.tr'
length on blue and yellow line different in ID clause_id..n#377
no agreement annotation in clause unit clause_id..n#268 for grammatical relation 'v.tr'
length on blue and yellow line different in ID clause_id..n#268
n class does not equal v class in ID clause_id..n# (n_class: pl vs. v_class: 3)
no dash in v agr in ID clause_id..n#716
n class does not equal v class in ID clause_id..n#716 (n_class: 3 vs. v_class: None)
no agreement annotation in clause unit clause_id..n#274 for grammatical relation 'v.tr'
length on blue and yellow line different in ID clause_id..n#274

Here are the counts for all overt arguments where class markers were equal:

In [53]:

for agr, count in agreements.items():
    print("{} => {}".format(agr, count))

l+r-pl => 1
w+w-1 => 6
y-2 => 4
w-1 => 54
b-3 => 184
b+b-3 => 6
l-pl => 1
r-pl => 30
y+y-2 => 2

3. Referential density and distance¶

3.1. What is the referential density for COP, v.intr, v.tr and v.aff¶

COP¶

In [54]:

search_terms = [ "COP", "S" ]
possible = 0
overt = 0
for wo in helpers.diana.word_orders(ag, search_terms):
    if "COP" in wo.word_order:
        possible += 1
        if "S" in wo.word_order:
            overt += 1
print("{} / {} = {}".format(overt, possible, float(overt)/possible))

116 / 136 = 0.8529411764705882

v.intr¶

In [55]:

search_terms = [ "v.intr", "S" ]
possible = 0
overt = 0
for wo in helpers.diana.word_orders(ag, search_terms):
    if "v.intr" in wo.word_order:
        possible += 1
        if "S" in wo.word_order:
            overt += 1
print("{} / {} = {}".format(overt, possible, float(overt)/possible))

206 / 331 = 0.622356495468278

v.tr¶

In [56]:

search_terms = [ "v.tr", "A", "P" ]
possible = 0
overt = 0
for wo in helpers.diana.word_orders(ag, search_terms):
    if "v.tr" in wo.word_order:
        possible += 2
        if "A" in wo.word_order:
            overt += 1
        if "P" in wo.word_order:
            overt += 1
print("{} / {} = {}".format(overt, possible, float(overt)/possible))

444 / 698 = 0.6361031518624641

v.aff¶

In [57]:

search_terms = [ "v.aff", "EXP", "STIM" ]
possible = 0
overt = 0
for wo in helpers.diana.word_orders(ag, search_terms):
    if "v.aff" in wo.word_order:
        possible += 2
        if "EXP" in wo.word_order:
            overt += 1
        if "STIM" in wo.word_order:
            overt += 1
print("{} / {} = {}".format(overt, possible, float(overt)/possible))

68 / 86 = 0.7906976744186046

3.2. Is the referential density for agreeing X higher than for non-agreeing X?¶

v.intr¶

In [58]:

search_terms = [ "v.intr", "S" ]
agr_possible = 0
agr_overt = 0
noagr_possible = 0
noagr_overt = 0
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    if len(wo.word_order) != len(wo.agreement):
        continue
    if "v.intr" in wo.word_order:
        v_index = wo.word_order.index("v.intr")
        if wo.agreement[v_index] == "noagr":
            noagr_possible += 1
            if "S" in wo.word_order:
                noagr_overt += 1
        else:
            agr_possible += 1
            if "S" in wo.word_order:
                agr_overt += 1
print("with agreement: {} / {} = {}".format(agr_overt, agr_possible, float(agr_overt)/agr_possible))
print("without agreement: {} / {} = {}".format(noagr_overt, noagr_possible, float(noagr_overt)/noagr_possible))

no agreement annotation in clause unit clause_id..n#47 for grammatical relation 'S'
no agreement annotation in clause unit clause_id..n#737 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#131 for grammatical relation 'S'
no agreement annotation in clause unit clause_id..n#131 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#666 for grammatical relation 'S'
no agreement annotation in clause unit clause_id..n#666 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#762 for grammatical relation 'v.intr'
no agreement annotation in clause unit clause_id..n#762 for grammatical relation 'S'
with agreement: 100 / 161 = 0.6211180124223602
without agreement: 101 / 165 = 0.6121212121212121

Statistical test¶

Hypthesis H0: It does not depend on agreement whether S in v.intr sentences is expressed overtly?

In [59]:

cont_table = [ [agr_overt, agr_possible-agr_overt], [noagr_overt, noagr_possible-noagr_overt] ]
oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
print(pvalue)

0.909500375789

We cannot reject H0 as p > 0.05. It does not depent on agreement wether the argument is expressed overtly.

v.tr¶

In [60]:

search_terms = [ "v.tr", "A", "P" ]
agr_possible = 0
agr_overt = 0
noagr_possible = 0
noagr_overt = 0
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    if len(wo.word_order) != len(wo.agreement):
        continue
    if "v.tr" in wo.word_order:
        v_index = wo.word_order.index("v.tr")
        if wo.agreement[v_index] == "noagr":
            noagr_possible += 2
            if "A" in wo.word_order:
                noagr_overt += 1
            if "P" in wo.word_order:
                noagr_overt += 1
        else:
            agr_possible += 2
            if "A" in wo.word_order:
                agr_overt += 1
            if "P" in wo.word_order:
                agr_overt += 1
print("with agreement: {} / {} = {}".format(agr_overt, agr_possible, float(agr_overt)/agr_possible))
print("without agreement: {} / {} = {}".format(noagr_overt, noagr_possible, float(noagr_overt)/noagr_possible))

no agreement annotation in clause unit clause_id..n#718 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#679 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#169 for grammatical relation 'A'
no agreement annotation in clause unit clause_id..n#177 for grammatical relation 'A'
no agreement annotation in clause unit clause_id..n#501 for grammatical relation 'A'
no agreement annotation in clause unit clause_id..n#645 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#275 for grammatical relation 'P'
no agreement annotation in clause unit clause_id..n#465 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#370 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#377 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#268 for grammatical relation 'v.tr'
no agreement annotation in clause unit clause_id..n#274 for grammatical relation 'v.tr'
with agreement: 257 / 404 = 0.6361386138613861
without agreement: 172 / 270 = 0.6370370370370371

Statistical test¶

Hypthesis H0: It does not depend on agreement whether S in v.intr sentences is expressed overtly?

In [61]:

cont_table = [ [agr_overt, agr_possible-agr_overt], [noagr_overt, noagr_possible-noagr_overt] ]
oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
print(pvalue)

1.0

We cannot reject H0 as p > 0.05. It does not depent on agreement wether the argument is expressed overtly.

v.aff¶

In [62]:

search_terms = [ "v.aff", "EXP", "STIM" ]
agr_possible = 0
agr_overt = 0
noagr_possible = 0
noagr_overt = 0
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    if len(wo.word_order) != len(wo.agreement):
        continue
    if "v.aff" in wo.word_order:
        v_index = wo.word_order.index("v.aff")
        if wo.agreement[v_index] == "noagr":
            noagr_possible += 2
            if "EXP" in wo.word_order:
                noagr_overt += 1
            if "STIM" in wo.word_order:
                noagr_overt += 1
        else:
            agr_possible += 2
            if "EXP" in wo.word_order:
                agr_overt += 1
            if "STIM" in wo.word_order:
                agr_overt += 1
print("with agreement: {} / {} = {}".format(agr_overt, agr_possible, float(agr_overt)/agr_possible))
print("without agreement: {} / {} = {}".format(noagr_overt, noagr_possible, float(noagr_overt)/noagr_possible))

with agreement: 49 / 64 = 0.765625
without agreement: 19 / 22 = 0.8636363636363636

Statistical test¶

Hypthesis H0: It does not depend on agreement whether S in v.intr sentences is expressed overtly?

In [63]:

cont_table = [ [agr_overt, agr_possible-agr_overt], [noagr_overt, noagr_possible-noagr_overt] ]
oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
print(pvalue)

0.543844035181

We cannot reject H0 as p > 0.05. It does not depent on agreement wether the argument is expressed overtly.

4. Arguments in "v.aff" vs. "v.tr" sentences: ouvert vs. zero¶

Counts¶

In [64]:

v_tree = { "v.aff": collections.defaultdict(int), "v.tr": collections.defaultdict(int) }
for wo in word_orders:
    v = None
    if "v.tr" in wo:
        v = "v.tr"
    if "v.aff" in wo:
        v = "v.aff"
    if v is not None:
        wo2 = tuple([e for e in sorted(wo) if e != "v.aff" and e != "v.tr"])
        v_tree[v][wo2] += word_orders[wo]

for v in ["v.aff", "v.tr"]:
    print(v)
    for e in v_tree[v]:
        print("{0} => {1}".format(e, v_tree[v][e]))

v.aff
('EXP', 'zero-STIM') => 4
('EXP', 'STIM') => 27
('zero-EXP', 'zero-STIM') => 2
('STIM', 'zero-EXP') => 10
v.tr
('P', 'zero-A') => 150
('zero-A', 'zero-P') => 42
('A', 'P') => 137
('A', 'zero-P') => 20

Statistical test for A/EXP¶

Hypothesis 1 (H0): It does not depend on the type of the verb ("v.tr" vs. "v.aff") if the A/EXP is expressed ouvertly.

For the test we use the Fisher exact test, as this test also works for small numbers (http://docs.scipy.org/doc/scipy-0.13.0/reference/generated/scipy.stats.fisher_exact.html).

In [65]:

import scipy.stats
cont_table = [
    [ v_tree["v.aff"][('EXP', 'zero-STIM')] + v_tree["v.aff"][('EXP', 'STIM')],
      v_tree["v.aff"][('zero-EXP', 'zero-STIM')] + v_tree["v.aff"][('STIM', 'zero-EXP')] ],
    [ v_tree["v.tr"][('A', 'P')] + v_tree["v.tr"][('A', 'zero-P')], 
      v_tree["v.tr"][('P', 'zero-A')] + v_tree["v.tr"][('zero-A', 'zero-P')] ]
]
cont_table

Out[65]:

[[31, 12], [157, 192]]

In [66]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[66]:

0.0010331791250171565

We reject the null hypothesis, as the chances of getting a distribution as the observed one are p < 0.05. The verb type affects the ouvertness of the A/EXP argument. In this case the "v.aff" sentences have sigificantly more ouvert arguments EXP then the "v.tr" sentences have ouvert A.

Statistical test for P/STIM¶

Hypothesis 2 (H0): It does not depend on the type of the verb ("v.tr" vs. "v.aff") if the P/STIM is expressed ouvertly.

In [67]:

import scipy.stats
cont_table = [
    [ v_tree["v.aff"][('STIM', 'zero-EXP')] + v_tree["v.aff"][('EXP', 'STIM')],
      v_tree["v.aff"][('zero-EXP', 'zero-STIM')] + v_tree["v.aff"][('EXP', 'zero-STIM')] ],
    [ v_tree["v.tr"][('A', 'P')] + v_tree["v.tr"][('P', 'zero-A')], 
      v_tree["v.tr"][('A', 'zero-P')] + v_tree["v.tr"][('zero-A', 'zero-P')] ]
]
cont_table

Out[67]:

[[37, 6], [287, 62]]

In [68]:

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

Out[68]:

0.67113505746109936

In this case we cannot reject the null hypothesis, as p > 0.05. There is no statistical evidence that the verb type affects the ouvertness of P/STIM.

5. Positions of S, A and P¶

In [69]:

other_verbs = [ 'COP', 'v.tr', 'v.intr', 'v.aff' ]
verb_map = { v: "V" for v in other_verbs }

A_values = []
P_values = []
S_values = []
for wo in helpers.diana.word_orders(ag, annotation_map = verb_map):
    word_order = [w for w in wo.word_order if not w.startswith("zero-")]
    if "V" in word_order:
        v_index = word_order.index("V")
        if "A" in word_order:
            A_values.append(word_order.index("A") - v_index)
        if "P" in word_order:
            P_values.append(word_order.index("P") - v_index)
        if "S" in word_order:
            S_values.append(word_order.index("S") - v_index)

In [70]:

%matplotlib inline
import matplotlib.pyplot as plt

In [71]:

fig, axs = plt.subplots(1, 3, figsize=(14,4))
axs[0].hist(S_values, range(min(S_values), max(S_values)+2))
axs[0].set_title("Positions of S")
axs[1].hist(A_values, range(min(A_values), max(A_values)+2))
axs[1].set_title("Positions of A")
axs[2].hist(P_values, range(min(P_values), max(P_values)+2))
ret = axs[2].set_title("Positions of P")

Box plots of positions¶

In [72]:

plt.figure(figsize=(10,6))
plt.boxplot([S_values, A_values, P_values])
plt.title("Positions of S, A and P")
ret = plt.xticks([1, 2, 3], ["S", "A", "P"])

Plots by clause type¶

In [73]:

A_values = [[], []]
P_values = [[], []]
S_values = [[], []]
clause_types = ["m", "m.rs", "sub", "sub.rs"]
for wo in helpers.diana.word_orders(ag, annotation_map = verb_map):
    word_order = [w for w in wo.word_order if not w.startswith("zero-")]

    if "V" in word_order and wo.clause_type in clause_types:
        ind = 0
        if wo.clause_type == "sub" or wo.clause_type == "sub.rs":
            ind = 1
        v_index = word_order.index("V")
        if "A" in word_order:
            A_values[ind].append(word_order.index("A") - v_index)
        if "P" in word_order:
            P_values[ind].append(word_order.index("P") - v_index)
        if "S" in word_order:
            S_values[ind].append(word_order.index("S") - v_index)

In [73]:

fig, axs = plt.subplots(2, 3, figsize=(14,10))
for ind in [0, 1]:
    type_text = "main"
    if ind == 1:
        type_text = "sub"
    axs[ind][0].hist(S_values[ind], range(min(S_values[ind]), max(S_values[ind])+2))
    axs[ind][0].set_title("Positions of S in {0} clauses".format(type_text))
    axs[ind][1].hist(A_values[ind], range(min(A_values[ind]), max(A_values[ind])+2))
    axs[ind][1].set_title("Positions of A {0} clauses".format(type_text))
    axs[ind][2].hist(P_values[ind], range(min(P_values[ind]), max(P_values[ind])+2))
    ret = axs[ind][2].set_title("Positions of P {0} clauses".format(type_text))

Box plots by clause type¶

In [74]:

fig, axs = plt.subplots(1, 2, figsize=(14,6))
for ind in [0, 1]:
    type_text = "main"
    if ind == 1:
        type_text = "sub"

    axs[ind].boxplot([S_values[ind], A_values[ind], P_values[ind]])
    axs[ind].set_title("Positions of S, A and P in {0} clauses".format(type_text))
    ret = plt.xticks([1, 2, 3], ["S", "A", "P"])

In [ ]: