import pandas as pd import pandas.rpy.common as com import numpy as np from sklearn.feature_extraction import DictVectorizer %load_ext autoreload %autoreload 2 %load_ext rmagic %precision 2 from ndl import * %%R library(ndl) serbian = com.load_data('serbian') serbian['Cues'] = orthoCoding(serbian.WordForm,grams=2) serbian['Outcomes'] = [tuple(c.split('_')) for c in serbian.LemmaCase] serbian.head() sw = ndl(serbian) sw.head() num = ['Sg','Pl'] case = ['nom','gen','dat','acc','ins','loc'] infl = num + case predict = [ ] for cue in serbian.Cues: A = activation(cue,sw) A.sort(ascending=False) res = [ None, None, None ] for ind in A.index: if ind in num: res[2] = ind elif ind in case: res[1] = ind else: res[0] = ind if not None in res: break predict.append(tuple(res)) serbian['Predicted'] = predict serbian sum(serbian.Outcomes == serbian.Predicted) / float(len(serbian.index))