Kostic, D. 1999. Frekvencijski recnik savremenog srpskog jezika (Frequency Dictionary of Contemporary Serbian Language). Institute for Experimental Phonetics and Speech Pathology & Laboratory of Experimental Psychology, University of Belgrade, Serbia.
Baayen, R. H., Milin, P., Filipovic Durdevic, D., Hendrix, P. and Marelli, M. 2011. "An amorphous model for morphological processing in visual comprehension based on naive discriminative learning." Psychological Review 118:438-482.
import pandas as pd
import pandas.rpy.common as com
import numpy as np
from sklearn.feature_extraction import DictVectorizer
%load_ext autoreload
%autoreload 2
%load_ext rmagic
%precision 2
u'%.2f'
from ndl import *
%%R
library(ndl)
This is ndl version 0.2.16. For an overview of the package, type 'help("ndl.package")'.
serbian = com.load_data('serbian')
serbian['Cues'] = orthoCoding(serbian.WordForm,grams=2)
serbian['Outcomes'] = [tuple(c.split('_')) for c in serbian.LemmaCase]
serbian.head()
WordForm | LemmaCase | Frequency | Cues | Outcomes | |
---|---|---|---|---|---|
1 | yena | yena_nom_Sg | 576 | (#y, ye, en, na, a#) | (yena, nom, Sg) |
2 | yene | yena_gen_Sg | 229 | (#y, ye, en, ne, e#) | (yena, gen, Sg) |
3 | yeni | yena_dat_Sg | 55 | (#y, ye, en, ni, i#) | (yena, dat, Sg) |
4 | yenu | yena_acc_Sg | 167 | (#y, ye, en, nu, u#) | (yena, acc, Sg) |
5 | yenom | yena_ins_Sg | 39 | (#y, ye, en, no, om, m#) | (yena, ins, Sg) |
5 rows × 5 columns
sw = ndl(serbian)
sw.head()
Pl | Sg | acc | akademija | aparat | bitka | bog | boja | bol | bor | borac | brazda | brdo | brid | briga | brigada | brod | bura | cena | cesta | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
#a | -0.467714 | 0.952442 | 0.326035 | 0.471536 | 0.506614 | -0.001006 | -0.032020 | 0.037598 | -0.028174 | 0.015433 | -0.011195 | 0.006800 | 0.003051 | 0.000134 | 0.015739 | -0.039569 | 0.017378 | -0.002766 | 0.030046 | -0.052284 | ... |
#b | -0.069955 | 0.556007 | 0.095385 | -0.005611 | -0.012985 | 0.079436 | 0.121653 | 0.066181 | 0.254183 | 0.097360 | 0.109091 | 0.027724 | -0.003189 | 0.009077 | 0.036710 | 0.022223 | 0.106195 | 0.000795 | -0.000225 | -0.019009 | ... |
#c | -0.099439 | 0.585776 | 0.110130 | 0.002622 | -0.021386 | 0.010436 | -0.004947 | 0.016928 | -0.016833 | -0.005711 | -0.021122 | -0.009816 | 0.002113 | -0.007284 | 0.023586 | -0.012067 | 0.007555 | 0.000627 | 0.577554 | 0.344233 | ... |
#d | -0.017994 | 0.500962 | 0.077627 | -0.014584 | -0.004215 | -0.002597 | 0.015613 | -0.035485 | 0.024504 | 0.006509 | 0.007348 | -0.012459 | -0.045132 | -0.002656 | 0.027050 | -0.018789 | 0.013864 | 0.000444 | -0.025700 | 0.006292 | ... |
#e | -0.465341 | 0.925997 | -0.093245 | 0.038236 | -0.056175 | 0.024248 | 0.112225 | -0.042413 | -0.031564 | -0.030352 | -0.037280 | -0.028162 | 0.002051 | 0.010243 | 0.078581 | 0.000173 | -0.073940 | 0.000415 | -0.023138 | 0.004618 | ... |
5 rows × 278 columns
num = ['Sg','Pl']
case = ['nom','gen','dat','acc','ins','loc']
infl = num + case
predict = [ ]
for cue in serbian.Cues:
A = activation(cue,sw)
A.sort(ascending=False)
res = [ None, None, None ]
for ind in A.index:
if ind in num:
res[2] = ind
elif ind in case:
res[1] = ind
else:
res[0] = ind
if not None in res:
break
predict.append(tuple(res))
serbian['Predicted'] = predict
serbian
WordForm | LemmaCase | Frequency | Cues | Outcomes | Predicted | |
---|---|---|---|---|---|---|
1 | yena | yena_nom_Sg | 576 | (#y, ye, en, na, a#) | (yena, nom, Sg) | (yena, nom, Sg) |
2 | yene | yena_gen_Sg | 229 | (#y, ye, en, ne, e#) | (yena, gen, Sg) | (yena, nom, Pl) |
3 | yeni | yena_dat_Sg | 55 | (#y, ye, en, ni, i#) | (yena, dat, Sg) | (yena, nom, Sg) |
4 | yenu | yena_acc_Sg | 167 | (#y, ye, en, nu, u#) | (yena, acc, Sg) | (yena, acc, Sg) |
5 | yenom | yena_ins_Sg | 39 | (#y, ye, en, no, om, m#) | (yena, ins, Sg) | (yena, ins, Sg) |
6 | yeni | yena_loc_Sg | 16 | (#y, ye, en, ni, i#) | (yena, loc, Sg) | (yena, nom, Sg) |
7 | yene | yena_nom_Pl | 415 | (#y, ye, en, ne, e#) | (yena, nom, Pl) | (yena, nom, Pl) |
8 | yena | yena_gen_Pl | 336 | (#y, ye, en, na, a#) | (yena, gen, Pl) | (yena, nom, Sg) |
9 | yenama | yena_dat_Pl | 33 | (#y, ye, en, na, am, ma, a#) | (yena, dat, Pl) | (yena, loc, Pl) |
10 | yene | yena_acc_Pl | 136 | (#y, ye, en, ne, e#) | (yena, acc, Pl) | (yena, nom, Pl) |
11 | yenama | yena_ins_Pl | 24 | (#y, ye, en, na, am, ma, a#) | (yena, ins, Pl) | (yena, loc, Pl) |
12 | yenama | yena_loc_Pl | 4 | (#y, ye, en, na, am, ma, a#) | (yena, loc, Pl) | (yena, loc, Pl) |
13 | yeqa | yeqa_nom_Sg | 179 | (#y, ye, eq, qa, a#) | (yeqa, nom, Sg) | (yeqa, nom, Sg) |
14 | yeqe | yeqa_gen_Sg | 54 | (#y, ye, eq, qe, e#) | (yeqa, gen, Sg) | (yeqa, gen, Sg) |
15 | yeqi | yeqa_dat_Sg | 7 | (#y, ye, eq, qi, i#) | (yeqa, dat, Sg) | (yeqa, loc, Sg) |
16 | yequ | yeqa_acc_Sg | 95 | (#y, ye, eq, qu, u#) | (yeqa, acc, Sg) | (yeqa, acc, Sg) |
17 | yeqom | yeqa_ins_Sg | 30 | (#y, ye, eq, qo, om, m#) | (yeqa, ins, Sg) | (yeqa, ins, Sg) |
18 | yeqi | yeqa_loc_Sg | 43 | (#y, ye, eq, qi, i#) | (yeqa, loc, Sg) | (yeqa, loc, Sg) |
19 | yeqe | yeqa_nom_Pl | 102 | (#y, ye, eq, qe, e#) | (yeqa, nom, Pl) | (yeqa, gen, Sg) |
20 | yeqa | yeqa_gen_Pl | 164 | (#y, ye, eq, qa, a#) | (yeqa, gen, Pl) | (yeqa, nom, Sg) |
21 | yeqama | yeqa_dat_Pl | 3 | (#y, ye, eq, qa, am, ma, a#) | (yeqa, dat, Pl) | (yeqa, loc, Pl) |
22 | yeqe | yeqa_acc_Pl | 84 | (#y, ye, eq, qe, e#) | (yeqa, acc, Pl) | (yeqa, gen, Sg) |
23 | yeqama | yeqa_ins_Pl | 14 | (#y, ye, eq, qa, am, ma, a#) | (yeqa, ins, Pl) | (yeqa, loc, Pl) |
24 | yeqama | yeqa_loc_Pl | 7 | (#y, ye, eq, qa, am, ma, a#) | (yeqa, loc, Pl) | (yeqa, loc, Pl) |
25 | yivot | yivot_nom_Sg | 991 | (#y, yi, iv, vo, ot, t#) | (yivot, nom, Sg) | (yivot, nom, Sg) |
26 | yivota | yivot_gen_Sg | 1004 | (#y, yi, iv, vo, ot, ta, a#) | (yivot, gen, Sg) | (yivot, gen, Sg) |
27 | yivotu | yivot_dat_Sg | 100 | (#y, yi, iv, vo, ot, tu, u#) | (yivot, dat, Sg) | (yivot, loc, Sg) |
28 | yivot | yivot_acc_Sg | 799 | (#y, yi, iv, vo, ot, t#) | (yivot, acc, Sg) | (yivot, nom, Sg) |
29 | yivotom | yivot_ins_Sg | 142 | (#y, yi, iv, vo, ot, to, om, m#) | (yivot, ins, Sg) | (yivot, ins, Sg) |
30 | yivotu | yivot_loc_Sg | 248 | (#y, yi, iv, vo, ot, tu, u#) | (yivot, loc, Sg) | (yivot, loc, Sg) |
31 | yivoti | yivot_nom_Pl | 22 | (#y, yi, iv, vo, ot, ti, i#) | (yivot, nom, Pl) | (yivot, gen, Sg) |
32 | yivota | yivot_gen_Pl | 30 | (#y, yi, iv, vo, ot, ta, a#) | (yivot, gen, Pl) | (yivot, gen, Sg) |
33 | yivotima | yivot_dat_Pl | 3 | (#y, yi, iv, vo, ot, ti, im, ma, a#) | (yivot, dat, Pl) | (yivot, ins, Pl) |
34 | yivote | yivot_acc_Pl | 52 | (#y, yi, iv, vo, ot, te, e#) | (yivot, acc, Pl) | (yivot, gen, Sg) |
35 | yivotima | yivot_ins_Pl | 5 | (#y, yi, iv, vo, ot, ti, im, ma, a#) | (yivot, ins, Pl) | (yivot, ins, Pl) |
36 | yivotima | yivot_loc_Pl | 2 | (#y, yi, iv, vo, ot, ti, im, ma, a#) | (yivot, loc, Pl) | (yivot, ins, Pl) |
37 | {etwa | {etwa_nom_Sg | 33 | (#{, {e, et, tw, wa, a#) | ({etwa, nom, Sg) | ({etwa, gen, Sg) |
38 | {etwe | {etwa_gen_Sg | 10 | (#{, {e, et, tw, we, e#) | ({etwa, gen, Sg) | ({etwa, nom, Sg) |
39 | {etwi | {etwa_dat_Sg | 1 | (#{, {e, et, tw, wi, i#) | ({etwa, dat, Sg) | ({etwa, nom, Pl) |
40 | {etwu | {etwa_acc_Sg | 29 | (#{, {e, et, tw, wu, u#) | ({etwa, acc, Sg) | ({etwa, loc, Sg) |
41 | {etwom | {etwa_ins_Sg | 5 | (#{, {e, et, tw, wo, om, m#) | ({etwa, ins, Sg) | ({etwa, ins, Sg) |
42 | {etwi | {etwa_loc_Sg | 12 | (#{, {e, et, tw, wi, i#) | ({etwa, loc, Sg) | ({etwa, nom, Pl) |
43 | {etwe | {etwa_nom_Pl | 6 | (#{, {e, et, tw, we, e#) | ({etwa, nom, Pl) | ({etwa, nom, Sg) |
44 | {etwi | {etwa_gen_Pl | 5 | (#{, {e, et, tw, wi, i#) | ({etwa, gen, Pl) | ({etwa, nom, Pl) |
45 | {etwama | {etwa_dat_Pl | 1 | (#{, {e, et, tw, wa, am, ma, a#) | ({etwa, dat, Pl) | ({etwa, ins, Pl) |
46 | {etwe | {etwa_acc_Pl | 11 | (#{, {e, et, tw, we, e#) | ({etwa, acc, Pl) | ({etwa, nom, Sg) |
47 | {etwama | {etwa_ins_Pl | 2 | (#{, {e, et, tw, wa, am, ma, a#) | ({etwa, ins, Pl) | ({etwa, ins, Pl) |
48 | {etwama | {etwa_loc_Pl | 2 | (#{, {e, et, tw, wa, am, ma, a#) | ({etwa, loc, Pl) | ({etwa, ins, Pl) |
49 | {irina | {irina_nom_Sg | 16 | (#{, {i, ir, ri, in, na, a#) | ({irina, nom, Sg) | ({irina, gen, Sg) |
50 | {irine | {irina_gen_Sg | 28 | (#{, {i, ir, ri, in, ne, e#) | ({irina, gen, Sg) | ({irina, acc, Pl) |
51 | {irini | {irina_dat_Sg | 3 | (#{, {i, ir, ri, in, ni, i#) | ({irina, dat, Sg) | ({irina, loc, Sg) |
52 | {irinu | {irina_acc_Sg | 17 | (#{, {i, ir, ri, in, nu, u#) | ({irina, acc, Sg) | ({irina, acc, Sg) |
53 | {irinom | {irina_ins_Sg | 20 | (#{, {i, ir, ri, in, no, om, m#) | ({irina, ins, Sg) | ({irina, ins, Sg) |
54 | {irini | {irina_loc_Sg | 17 | (#{, {i, ir, ri, in, ni, i#) | ({irina, loc, Sg) | ({irina, loc, Sg) |
55 | {irine | {irina_nom_Pl | 11 | (#{, {i, ir, ri, in, ne, e#) | ({irina, nom, Pl) | ({irina, acc, Pl) |
56 | {irina | {irina_gen_Pl | 12 | (#{, {i, ir, ri, in, na, a#) | ({irina, gen, Pl) | ({irina, gen, Sg) |
57 | {irinama | {irina_dat_Pl | 2 | (#{, {i, ir, ri, in, na, am, ma, a#) | ({irina, dat, Pl) | ({irina, loc, Pl) |
58 | {irine | {irina_acc_Pl | 23 | (#{, {i, ir, ri, in, ne, e#) | ({irina, acc, Pl) | ({irina, acc, Pl) |
59 | {irinama | {irina_ins_Pl | 2 | (#{, {i, ir, ri, in, na, am, ma, a#) | ({irina, ins, Pl) | ({irina, loc, Pl) |
60 | {irinama | {irina_loc_Pl | 3 | (#{, {i, ir, ri, in, na, am, ma, a#) | ({irina, loc, Pl) | ({irina, loc, Pl) |
... | ... | ... | ... | ... | ... |
3240 rows × 6 columns
sum(serbian.Outcomes == serbian.Predicted) / float(len(serbian.index))
0.37