%load_ext autoreload
%autoreload 2

import helpers.diana

tier_numbers = {
    "clause_id": 2,
    "clause_type": 3,
    "grammatical_relation": 4,
    "pos_agreement": 5,
    "last_line": 7
}
ag = helpers.diana.from_excel("data/Hinuq3.csv", tier_numbers=tier_numbers)

import collections

verbs = [ 'COP', 'SAY', 'v.tr', 'v.intr', 'v.aff' ]
verb_map = { v: "V" for v in verbs}
others = [ 'A', 'S', 'P', 'EXP', 'STIM', 'zero-A', 'zero-S', 'zero-P', 'zero-EXP', 'zero-STIM' ]
search_terms = verbs + others

word_orders = collections.defaultdict(int)
word_orders_ids = collections.defaultdict(list)

for wo in helpers.diana.word_orders(ag, search_terms):
    word_orders[tuple(wo.word_order)] += 1
    word_orders_ids[tuple(wo.word_order)].append(wo.clause_id)

for word_order, count in word_orders.items():
    print("{0} => {1}".format(word_order, count))
    if count < 5:
        print("    {1}".format(word_order, word_orders_ids[word_order]))

word_orders_main = []
word_orders_main_count = collections.defaultdict(int)
word_orders_sub = []
word_orders_sub_count = collections.defaultdict(int)
main_clause_types = [ "m", "m.rs" ]
sub_clause_types = [ "sub", "sub.rs" ]
clause_types = main_clause_types + sub_clause_types
search_terms = verbs + ['A', 'S', 'P', 'EXP', 'STIM']

for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    if "V" in wo.word_order and wo.clause_type in clause_types and len(wo.word_order) > 1:
        if wo.clause_type in sub_clause_types:
            word_orders_sub.append(wo.word_order)
            word_orders_sub_count[tuple(wo.word_order)] += 1
        else:
            word_orders_main.append(wo.word_order)
            word_orders_main_count[tuple(wo.word_order)] += 1 

main_v_fin = 0; main_v_nonfin = 0; sub_v_fin = 0; sub_v_nonfin = 0;
for wo, c in word_orders_main_count.items():
    if wo[-1] == "V":
        main_v_fin += c
    else:
        main_v_nonfin += c
for wo, c in word_orders_sub_count.items():
    if wo[-1] == "V":
        sub_v_fin += c
    else:
        sub_v_nonfin += c
cont_table = [ [main_v_fin, main_v_nonfin], [sub_v_fin, sub_v_nonfin] ]
cont_table

import scipy.stats
oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

print("Counts for main clauses:")
for wo, c in word_orders_main_count.items():
    print("{0} => {1}".format(wo, c))
print("\nCounts for sub clauses:")
for wo, c in word_orders_sub_count.items():
    print("{0} => {1}".format(wo, c))


particles = [ 'G', 'BEN', 'TIME', 'LOC', 'ADD' ]
pos_counts = [ [0, 0] for _ in particles ]
search_terms = verbs + particles

for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    for i, p in enumerate(particles):
        if "V" in wo.word_order and p in wo.word_order:
            if wo.word_order.index("V") < wo.word_order.index(p):
                pos_counts[i][0] += 1
            else:
                pos_counts[i][1] += 1
                
for i, p in enumerate(particles):
    print(p)
    print("    Count after verb:  {0}".format(pos_counts[i][0]))
    print("    Count before verb: {0}".format(pos_counts[i][1]))

particles = [ 'BEN', 'G', 'ADD' ]
pos_counts = [ [0, 0] for _ in particles ]
search_terms = verbs + particles

before = 0
after = 0
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    for i, p in enumerate(particles):
        if wo.clause_type in main_clause_types and "V" in wo.word_order and p in wo.word_order:
            if wo.word_order.index("V") < wo.word_order.index(p):
                pos_counts[i][0] += 1
            else:
                pos_counts[i][1] += 1

for i, p in enumerate(particles):
    print(p)
    print("    Count after verb:  {0}".format(pos_counts[i][0]))
    print("    Count before verb: {0}".format(pos_counts[i][1]))

part_sum = [ 0, 0 ]
for i, p in enumerate(particles):
    part_sum[0] += pos_counts[i][0]
    part_sum[1] += pos_counts[i][1]
    print("Test for '{0}'".format(p))
    print(scipy.stats.binom_test(pos_counts[i]))
print("Test for 'BEN+G+ADD'")
print(scipy.stats.binom_test(part_sum))

particles = [ 'BEN', 'G', 'ADD', 'A', 'P', 'LOC' ]
pos_counts = [ [0, 0] for _ in particles ]
search_terms = verbs + particles

before = 0
after = 0
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    for i, p in enumerate(particles):
        if "V" in wo.word_order and p in wo.word_order:
            if wo.word_order.index("V") < wo.word_order.index(p):
                pos_counts[i][0] += 1
            else:
                pos_counts[i][1] += 1

BEN_G_ADD = [ 0, 0 ]
BEN_G_ADD[0] = pos_counts[0][0] + pos_counts[1][0] + pos_counts[2][0]
BEN_G_ADD[1] = pos_counts[0][1] + pos_counts[1][1] + pos_counts[2][1]
cont_table = [ BEN_G_ADD, pos_counts[3] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

cont_table = [ BEN_G_ADD, pos_counts[4] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

cont_table = [ BEN_G_ADD, pos_counts[5] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

A_P = [ 0, 0 ]
A_P[0] = pos_counts[3][0] + pos_counts[4][0]
A_P[1] = pos_counts[3][1] + pos_counts[4][1]
cont_table = [ BEN_G_ADD, A_P ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

A_P_LOC = [ 0, 0 ]
A_P_LOC[0] = pos_counts[3][0] + pos_counts[4][0] + pos_counts[5][0]
A_P_LOC[1] = pos_counts[3][1] + pos_counts[4][1] + pos_counts[5][1]
cont_table = [ BEN_G_ADD, A_P_LOC ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

particles = [ 'BEN', 'G', 'ADD', 'A', 'P', 'LOC', 'S' ]
pos_counts = [ [0, 0] for _ in particles ]
search_terms = verbs + particles

before = 0
after = 0
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    for i, p in enumerate(particles):
        if wo.clause_type in main_clause_types and "V" in wo.word_order and p in wo.word_order:
            if wo.word_order.index("V") < wo.word_order.index(p):
                pos_counts[i][0] += 1
            else:
                pos_counts[i][1] += 1

cont_table = [ pos_counts[0], pos_counts[4] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

cont_table = [ pos_counts[2], pos_counts[4] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

BEN_ADD = [ 0, 0 ]
BEN_ADD[0] = pos_counts[0][0] + pos_counts[2][0]
BEN_ADD[1] = pos_counts[0][1] + pos_counts[2][1]
cont_table = [ BEN_ADD, pos_counts[4] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

cont_table = [ pos_counts[3], pos_counts[4] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

cont_table = [ pos_counts[3], pos_counts[5] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

other_verbs = [ 'COP', 'v.tr', 'v.intr', 'v.aff' ]
search_terms = other_verbs + [ 'SAY', 'A', 'zero-A' ]
verb_map = { v: "V" for v in other_verbs }
SAY_counts = [ 0, 0 ]
others_counts = [ 0, 0 ]
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    if 'SAY' in wo.word_order:
        if 'A' in wo.word_order:
            SAY_counts[0] += 1
        elif 'zero-A' in wo.word_order:
            SAY_counts[1] += 1
    if 'V' in wo.word_order:
        if 'A' in wo.word_order:
            others_counts[0] += 1
        elif 'zero-A' in wo.word_order:
            others_counts[1] += 1

cont_table = [ SAY_counts, others_counts ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

search_terms = other_verbs + [ 'SAY', 'A' ]
SAY_counts = [ 0, 0 ]
others_counts = [ 0, 0 ]
for wo in helpers.diana.word_orders(ag, search_terms, verb_map):
    if 'SAY' in wo.word_order and 'A' in wo.word_order:
        if wo.word_order.index("SAY") < wo.word_order.index("A"):
            SAY_counts[0] += 1
        else:
            SAY_counts[1] += 1
    if "V" in wo.word_order and "A" in wo.word_order:
        if wo.word_order.index("V") < wo.word_order.index("A"):
            others_counts[0] += 1
        else:
            others_counts[1] += 1

cont_table = [ SAY_counts, others_counts ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

print("A after SAY: {0}".format(SAY_counts[0]))
print("A before SAY: {0}".format(SAY_counts[1]))

verbs = [ 'v.tr', 'v.intr', 'v.aff' ]
agreements = [0, 0]; noagreements = [0, 0];
for wo in helpers.diana.word_orders(ag, verbs, with_agreement = True):
    for agr in wo.agreement:
        if wo.clause_type in sub_clause_types:
            if agr == "noagr":
                noagreements[0] += 1
            else:
                agreements[0] += 1
        else:
            if agr == "noagr":
                noagreements[1] += 1
            else:
                agreements[1] += 1

print("I found {0} verbs with and {1} verbs without agreement.".format(agreements[0]+agreements[1], noagreements[0]+noagreements[1]))
print("In main clauses: I found {0} verbs with and {1} verbs without agreement.".format(agreements[1], noagreements[1]))
print("In sub clauses: I found {0} verbs with and {1} verbs without agreement.".format(agreements[0], noagreements[0]))

search_terms = verbs + [ 'S', 'P', 'STIM',  'zero-S', 'zero-P', 'zero-STIM' ]
cont_table_S = [ [0, 0], [0, 0] ]
cont_table_P = [ [0, 0], [0, 0] ]
cont_table_STIM = [ [0, 0], [0, 0] ]
cont_table_S_P_STIM =  [ [0, 0], [0, 0] ]
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    agreeing = 0
    if len(wo.word_order) != len(wo.agreement):
        continue
    for i, w in enumerate(wo.word_order):
        if w in verbs:
            if wo.agreement[i] == "noagr":
                agreeing = 1
    if "zero-S" in wo.word_order:
        cont_table_S[agreeing][0] += 1
        cont_table_S_P_STIM[agreeing][0] += 1
    elif "S" in wo.word_order:
        cont_table_S[agreeing][1] += 1
        cont_table_S_P_STIM[agreeing][1] += 1
    if "zero-P" in wo.word_order:
        cont_table_P[agreeing][0] += 1
        cont_table_S_P_STIM[agreeing][0] += 1
    elif "P" in wo.word_order:
        cont_table_P[agreeing][1] += 1
        cont_table_S_P_STIM[agreeing][1] += 1
    if "zero-STIM" in wo.word_order:
        cont_table_STIM[agreeing][0] += 1
        cont_table_S_P_STIM[agreeing][0] += 1
    elif "STIM" in wo.word_order:
        cont_table_STIM[agreeing][1] += 1
        cont_table_S_P_STIM[agreeing][1] += 1

cont_table_S

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table_S)
pvalue

cont_table_P

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table_P)
pvalue

cont_table_STIM

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table_STIM)
pvalue

cont_table_S_P_STIM

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table_S_P_STIM)
pvalue

verbs = [ 'v.tr', 'v.intr', 'v.aff' ]
agreement_sum = collections.defaultdict(int)
for wo in helpers.diana.word_orders(ag, verbs, with_agreement = True):
    for agr in wo.agreement:
        agreement_sum[agr] += 1
for agr, count in agreement_sum.items():
    print("{} => {}".format(agr, count))

verbs = [ 'v.tr', 'v.intr', 'v.aff' ]
search_terms = verbs + [ 'S', 'P', 'STIM', 'zero-S', 'zero-P', 'zero-STIM' ]
agreements = collections.defaultdict(int)
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    v_class = None; n_class = None; v_marker = None; n_type = None;
    agreement = False
    zero = False
    if len(wo.word_order) != len(wo.agreement):
        print("length on blue and yellow line different in ID {}".format(wo.clause_id))
        continue
    for i, w in enumerate(wo.word_order):
        if w in verbs:
            if wo.agreement[i] != "noagr":
                agreement = True
                if "-" in wo.agreement[i]:
                    v_marker, v_class = wo.agreement[i].split("-")
                else:
                    print("no dash in v agr in ID {}".format(wo.clause_id))
        else:
            if "-" in wo.agreement[i]:
                n_split = wo.agreement[i].split("-")
                if len(n_split) > 2:
                    print("more than one dash in n agr in ID {}".format(wo.clause_id))
                n_class = n_split[1]
                #n_type = n_split[1]
                if "." in n_class:
                    n_class, _ = n_class.split(".")
            else:
                print("no dash in n agr in ID {}".format(wo.clause_id))
            if w.startswith("zero-"):
                zero = True
                
    if v_class != n_class and agreement:
        print("n class does not equal v class in ID {} (n_class: {} vs. v_class: {})".format(wo.clause_id, n_class, v_class))
    elif v_class is not None and n_class is not None and not zero:
            agreements["{}-{}".format(v_marker, v_class)] += 1

for agr, count in agreements.items():
    print("{} => {}".format(agr, count))

cont_table = [ [ agreements["o-1"], agreement_sum["o-1"]-agreements["o-1"] ],
            [ agreements["b-3"]+agreements["b-hpl"],
              agreement_sum["b-3"]+agreement_sum["b-hpl"]-agreements["b-3"]-agreements["b-hpl"] ] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

cont_table = [ [ agreements["o-1"], agreement_sum["o-1"]-agreements["o-1"] ],
            [ agreements["r-5"]+agreements["r-hpl"]+agreements["r-nhpl"],
              agreement_sum["r-5"]+agreement_sum["r-hpl"]+agreement_sum["r-nhpl"]-agreements["r-5"]-agreements["r-hpl"]-agreements["r-nhpl"] ] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

cont_table = [ [ agreements["o-1"], agreement_sum["o-1"]-agreements["o-1"] ],
            [ agreements["y-2"]+agreements["y-4"],
              agreement_sum["y-2"]+agreement_sum["y-4"]-agreements["y-2"]-agreements["y-4"] ] ]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

search_terms = [ "COP", "S" ]
possible = 0
overt = 0
for wo in helpers.diana.word_orders(ag, search_terms):
    if "COP" in wo.word_order:
        possible += 1
        if "S" in wo.word_order:
            overt += 1
print("{} / {} = {}".format(overt, possible, float(overt)/possible))

search_terms = [ "v.intr", "S" ]
possible = 0
overt = 0
for wo in helpers.diana.word_orders(ag, search_terms):
    if "v.intr" in wo.word_order:
        possible += 1
        if "S" in wo.word_order:
            overt += 1
print("{} / {} = {}".format(overt, possible, float(overt)/possible))

search_terms = [ "v.tr", "A", "P" ]
possible = 0
overt = 0
for wo in helpers.diana.word_orders(ag, search_terms):
    if "v.tr" in wo.word_order:
        possible += 2
        if "A" in wo.word_order:
            overt += 1
        if "P" in wo.word_order:
            overt += 1
print("{} / {} = {}".format(overt, possible, float(overt)/possible))

search_terms = [ "v.aff", "EXP", "STIM" ]
possible = 0
overt = 0
for wo in helpers.diana.word_orders(ag, search_terms):
    if "v.aff" in wo.word_order:
        possible += 2
        if "EXP" in wo.word_order:
            overt += 1
        if "STIM" in wo.word_order:
            overt += 1
print("{} / {} = {}".format(overt, possible, float(overt)/possible))

search_terms = [ "v.intr", "S" ]
agr_possible = 0
agr_overt = 0
noagr_possible = 0
noagr_overt = 0
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    if len(wo.word_order) != len(wo.agreement):
        continue
    if "v.intr" in wo.word_order:
        v_index = wo.word_order.index("v.intr")
        if wo.agreement[v_index] == "noagr":
            noagr_possible += 1
            if "S" in wo.word_order:
                noagr_overt += 1
        else:
            agr_possible += 1
            if "S" in wo.word_order:
                agr_overt += 1
print("with agreement: {} / {} = {}".format(agr_overt, agr_possible, float(agr_overt)/agr_possible))
print("without agreement: {} / {} = {}".format(noagr_overt, noagr_possible, float(noagr_overt)/noagr_possible))

cont_table = [ [agr_overt, agr_possible-agr_overt], [noagr_overt, noagr_possible-noagr_overt] ]
oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
print(pvalue)

search_terms = [ "v.tr", "A", "P" ]
agr_possible = 0
agr_overt = 0
noagr_possible = 0
noagr_overt = 0
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    if len(wo.word_order) != len(wo.agreement):
        continue
    if "v.tr" in wo.word_order:
        v_index = wo.word_order.index("v.tr")
        if wo.agreement[v_index] == "noagr":
            noagr_possible += 2
            if "A" in wo.word_order:
                noagr_overt += 1
            if "P" in wo.word_order:
                noagr_overt += 1
        else:
            agr_possible += 2
            if "A" in wo.word_order:
                agr_overt += 1
            if "P" in wo.word_order:
                agr_overt += 1
print("with agreement: {} / {} = {}".format(agr_overt, agr_possible, float(agr_overt)/agr_possible))
print("without agreement: {} / {} = {}".format(noagr_overt, noagr_possible, float(noagr_overt)/noagr_possible))

cont_table = [ [agr_overt, agr_possible-agr_overt], [noagr_overt, noagr_possible-noagr_overt] ]
oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
print(pvalue)

search_terms = [ "v.aff", "EXP", "STIM" ]
agr_possible = 0
agr_overt = 0
noagr_possible = 0
noagr_overt = 0
for wo in helpers.diana.word_orders(ag, search_terms, with_agreement = True):
    if len(wo.word_order) != len(wo.agreement):
        continue
    if "v.aff" in wo.word_order:
        v_index = wo.word_order.index("v.aff")
        if wo.agreement[v_index] == "noagr":
            noagr_possible += 2
            if "EXP" in wo.word_order:
                noagr_overt += 1
            if "STIM" in wo.word_order:
                noagr_overt += 1
        else:
            agr_possible += 2
            if "EXP" in wo.word_order:
                agr_overt += 1
            if "STIM" in wo.word_order:
                agr_overt += 1
print("with agreement: {} / {} = {}".format(agr_overt, agr_possible, float(agr_overt)/agr_possible))
print("without agreement: {} / {} = {}".format(noagr_overt, noagr_possible, float(noagr_overt)/noagr_possible))

cont_table = [ [agr_overt, agr_possible-agr_overt], [noagr_overt, noagr_possible-noagr_overt] ]
oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
print(pvalue)

v_tree = { "v.aff": collections.defaultdict(int), "v.tr": collections.defaultdict(int) }
for wo in word_orders:
    v = None
    if "v.tr" in wo:
        v = "v.tr"
    if "v.aff" in wo:
        v = "v.aff"
    if v is not None:
        wo2 = tuple([e for e in sorted(wo) if e != "v.aff" and e != "v.tr"])
        v_tree[v][wo2] += word_orders[wo]

for v in ["v.aff", "v.tr"]:
    print(v)
    for e in v_tree[v]:
        print("{0} => {1}".format(e, v_tree[v][e]))

import scipy.stats
cont_table = [
    [ v_tree["v.aff"][('EXP', 'zero-STIM')] + v_tree["v.aff"][('EXP', 'STIM')],
      v_tree["v.aff"][('zero-EXP', 'zero-STIM')] + v_tree["v.aff"][('STIM', 'zero-EXP')] ],
    [ v_tree["v.tr"][('A', 'P')] + v_tree["v.tr"][('A', 'zero-P')], 
      v_tree["v.tr"][('P', 'zero-A')] + v_tree["v.tr"][('zero-A', 'zero-P')] ]
]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

import scipy.stats
cont_table = [
    [ v_tree["v.aff"][('STIM', 'zero-EXP')] + v_tree["v.aff"][('EXP', 'STIM')],
      v_tree["v.aff"][('zero-EXP', 'zero-STIM')] + v_tree["v.aff"][('EXP', 'zero-STIM')] ],
    [ v_tree["v.tr"][('A', 'P')] + v_tree["v.tr"][('P', 'zero-A')], 
      v_tree["v.tr"][('A', 'zero-P')] + v_tree["v.tr"][('zero-A', 'zero-P')] ]
]
cont_table

oddsratio, pvalue = scipy.stats.fisher_exact(cont_table)
pvalue

other_verbs = [ 'COP', 'v.tr', 'v.intr', 'v.aff' ]
verb_map = { v: "V" for v in other_verbs }

A_values = []
P_values = []
S_values = []
for wo in helpers.diana.word_orders(ag, annotation_map = verb_map):
    word_order = [w for w in wo.word_order if not w.startswith("zero-")]
    if "V" in word_order:
        v_index = word_order.index("V")
        if "A" in word_order:
            A_values.append(word_order.index("A") - v_index)
        if "P" in word_order:
            P_values.append(word_order.index("P") - v_index)
        if "S" in word_order:
            S_values.append(word_order.index("S") - v_index)

%matplotlib inline
import matplotlib.pyplot as plt

fig, axs = plt.subplots(1, 3, figsize=(14,4))
axs[0].hist(S_values, range(min(S_values), max(S_values)+2))
axs[0].set_title("Positions of S")
axs[1].hist(A_values, range(min(A_values), max(A_values)+2))
axs[1].set_title("Positions of A")
axs[2].hist(P_values, range(min(P_values), max(P_values)+2))
ret = axs[2].set_title("Positions of P")

plt.figure(figsize=(10,6))
plt.boxplot([S_values, A_values, P_values])
plt.title("Positions of S, A and P")
ret = plt.xticks([1, 2, 3], ["S", "A", "P"])

A_values = [[], []]
P_values = [[], []]
S_values = [[], []]
clause_types = ["m", "m.rs", "sub", "sub.rs"]
for wo in helpers.diana.word_orders(ag, annotation_map = verb_map):
    word_order = [w for w in wo.word_order if not w.startswith("zero-")]

    if "V" in word_order and wo.clause_type in clause_types:
        ind = 0
        if wo.clause_type == "sub" or wo.clause_type == "sub.rs":
            ind = 1
        v_index = word_order.index("V")
        if "A" in word_order:
            A_values[ind].append(word_order.index("A") - v_index)
        if "P" in word_order:
            P_values[ind].append(word_order.index("P") - v_index)
        if "S" in word_order:
            S_values[ind].append(word_order.index("S") - v_index)

fig, axs = plt.subplots(2, 3, figsize=(14,10))
for ind in [0, 1]:
    type_text = "main"
    if ind == 1:
        type_text = "sub"
    axs[ind][0].hist(S_values[ind], range(min(S_values[ind]), max(S_values[ind])+2))
    axs[ind][0].set_title("Positions of S in {0} clauses".format(type_text))
    axs[ind][1].hist(A_values[ind], range(min(A_values[ind]), max(A_values[ind])+2))
    axs[ind][1].set_title("Positions of A {0} clauses".format(type_text))
    axs[ind][2].hist(P_values[ind], range(min(P_values[ind]), max(P_values[ind])+2))
    ret = axs[ind][2].set_title("Positions of P {0} clauses".format(type_text))

fig, axs = plt.subplots(1, 2, figsize=(14,6))
for ind in [0, 1]:
    type_text = "main"
    if ind == 1:
        type_text = "sub"

    axs[ind].boxplot([S_values[ind], A_values[ind], P_values[ind]])
    axs[ind].set_title("Positions of S, A and P in {0} clauses".format(type_text))
    ret = plt.xticks([1, 2, 3], ["S", "A", "P"])