We show the embedding of nodes annotated as sentences, clauses, phrases, subphrases and words. We put them in a format (eventually) such that they can be read by TQUERY. Then Rens Bod and Andreas van Cranenburgh can do interesting business with it.
We walk through all anchor positions of the primary data, and follow the node events at that position. LAF-Fabric tries hard to generate node events in an order that respects the factual embedding of nodes.
However, some nodes have gaps, some nodes are linked to regions with zero length, and some nodes have identical regions. Moreover, the way our current LAF resource of the Hebrew Bible is coded causes all higher level nodes such as phrases, clauses and sentences to have gaps at each white space occurrence.
LAF-Fabric succeeds in overcoming all those problems.
In another notebook we use a different way.
%load_ext autoreload
import sys
import collections
from laf.fabric import LafFabric
processor = LafFabric(verbose='DETAIL')
0.00s This is LAF-Fabric 4.3.3 http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html
%autoreload 2
API = processor.load('etcbc4', '--', 'trees',
{
"xmlids": {"node": False, "edge": False},
"features": ("otype monads g_cons_utf8 sp book chapter verse", ""),
'primary': True,
})
0.00s LOADING API: please wait ... 0.00s DETAIL: COMPILING m: UP TO DATE 0.00s INFO: USING DATA COMPILED AT: 2014-07-14T16-45-08 0.00s DETAIL: COMPILING a: UP TO DATE 0.01s DETAIL: load main: P.node_anchor 0.81s DETAIL: load main: P.node_anchor_items 1.06s DETAIL: load main: G.node_anchor_min 1.11s DETAIL: load main: G.node_anchor_max 1.17s DETAIL: load main: P.node_events 1.27s DETAIL: load main: P.node_events_items 1.58s DETAIL: load main: P.node_events_k 1.67s DETAIL: load main: P.node_events_n 1.82s DETAIL: load main: G.node_sort 1.87s DETAIL: load main: G.node_sort_inv 2.30s DETAIL: load main: G.edges_from 2.37s DETAIL: load main: G.edges_to 2.44s DETAIL: load main: P.primary_data 2.50s DETAIL: load main: F.etcbc4_db_monads [node] 3.36s DETAIL: load main: F.etcbc4_db_otype [node] 4.03s DETAIL: load main: F.etcbc4_ft_g_cons_utf8 [node] 4.32s DETAIL: load main: F.etcbc4_ft_sp [node] 4.51s DETAIL: load main: F.etcbc4_sft_book [node] 4.53s DETAIL: load main: F.etcbc4_sft_chapter [node] 4.54s DETAIL: load main: F.etcbc4_sft_verse [node] 4.56s LOGFILE=/Users/dirk/laf-fabric-output/etcbc4/trees/__log__trees.txt 4.56s INFO: DATA LOADED FROM SOURCE etcbc4 AND ANNOX -- FOR TASK trees AT 2014-07-15T12-32-03
F = API['F']
NE = API['NE']
msg = API['msg']
outfile = API['outfile']
my_file = API['my_file']
relevant_nodes = [
("word", ''),
("subphrase", 'p'),
("phrase", 'P'),
("clause", 'C'),
("sentence", 'S'),
("_split_", None),
("verse", None),
("chapter", None),
("book", None),
]
pos_table = {
'adjv': 'aj',
'advb': 'av',
'art': 'dt',
'conj': 'cj',
'intj': 'ij',
'inrg': 'ir',
'nega': 'ng',
'subs': 'n',
'nmpr': 'n-pr',
'prep': 'pp',
'prps': 'pr-ps',
'prde': 'pr-dem',
'prin': 'pr-int',
'verb': 'vb',
}
select_node = collections.defaultdict(lambda: None)
abbrev_node = collections.defaultdict(lambda: None)
for (i, (otype, abb)) in enumerate(relevant_nodes):
select_node[otype] = i
abbrev_node[otype] = abb if abb != None else otype
split_n = select_node['_split_']
trees = outfile("trees.txt")
anomalies = outfile("anomalies.txt")
recent_sentences = collections.deque([], 3)
def process_saved():
for (i, (events, book, chapter, verse, verse_label, level)) in enumerate(recent_sentences):
if i == 0:
anomalies.write("BEFORE the anomaly {}\n".format(verse_label))
elif i == 1:
anomalies.write("the anomaly ITSELF {}\n".format(verse_label))
elif i == 2:
anomalies.write("AFTER the anomaly{}\n".format(verse_label))
for (anchor, node, kind) in events:
otype = F.otype.v(node)
if kind == 3:
if select_node[otype] > split_n:
continue
level -= 1
anomalies.write('{:>7}-{}{})\n'.format(anchor, "." * level, abbrev_node[otype]))
elif kind == 2:
if select_node[otype] > split_n:
continue
level -= 1
anomalies.write('{:>7}-{}{}»\n'.format(anchor, "." * level, abbrev_node[otype]))
elif kind == 1:
if select_node[otype] > split_n:
continue
anomalies.write('{:>7}-{}«{}\n'.format(anchor, "." * level, abbrev_node[otype]))
level += 1
elif kind == 0:
if otype == 'book':
book = F.book.v(node)
elif otype == 'chapter':
chapter = F.chapter.v(node)
elif otype == 'verse':
verse = F.verse.v(node)
verse_label = '{} {}:{}'.format(book, chapter, verse)
anomalies.write("\n{}\n".format(verse_label))
msg(verse_label)
elif otype == 'word':
pos = pos_table[F.sp.v(node)]
text = F.g_cons_utf8.v(node)
monads = F.monads.v(node)
anomalies.write('{:>7}-{}({} "{}" ={}=\n'.format(anchor, "." * level, pos, text, monads))
level += 1
else:
anomalies.write('{:>7}-{}({}\n'.format(anchor, "." * level, abbrev_node[otype]))
level += 1
anomalies.write("\nEND of the anomaly in {}:\n".format(verse_label))
book = None
chapter = None
verse = None
verse_label = None
tree = ''
n_warnings = 0
level = 0
warning = False
saved_events = ([], book, chapter, verse, verse_label, level) # we save the events of the current sentence, in case there is an anomaly.
for (anchor, events) in NE(key=lambda n:select_node[F.otype.v(n)], simplify=lambda n:select_node[F.otype.v(n)] < split_n):
for (node, kind) in events:
saved_events[0].append((anchor, node, kind))
otype = F.otype.v(node)
if kind == 3:
level -= 1
if select_node[otype] > split_n:
continue
tree += ')'
if otype == 'sentence':
trees.write(tree + "\n")
tree = ""
recent_sentences.append(saved_events)
if warning:
process_saved()
warning = False
saved_events = ([], book, chapter, verse, verse_label, level)
elif kind == 2:
level -= 1
if select_node[otype] > split_n:
continue
tree += '»{}»'.format(abbrev_node[otype])
if otype == 'sentence':
trees.write(tree + "\n")
tree = ""
recent_sentences.append(saved_events)
if warning:
process_saved()
warning = False
saved_events = ([], book, chapter, verse, verse_label, level)
elif kind == 1:
if select_node[otype] > split_n:
continue
if otype == 'sentence':
if tree != '':
msg("WARNING: material between two sentences in {}: [{}]".format(verse_label, tree))
n_warnings += 1
trees.write("{:<15} *** {} ***\n".format(verse_label, tree))
tree = ''
recent_sentences.append(saved_events)
if warning:
process_saved()
warning = False
saved_events = ([], book, chapter, verse, verse_label, level)
warning = True
tree += '{:<15} «S« '.format(verse_label)
else:
tree += '«{}« '.format(abbrev_node[otype])
level += 1
elif kind == 0:
if otype == 'book':
book = F.book.v(node)
msg(book)
elif otype == 'chapter':
chapter = F.chapter.v(node)
elif otype == 'verse':
verse = F.verse.v(node)
verse_label = '{} {}:{}'.format(book, chapter, verse)
elif otype == 'sentence':
if tree != '':
msg("WARNING: material between two sentences in {}: [{}]".format(verse_label, tree))
n_warnings += 1
trees.write("{:<15} *** {} ***\n".format(verse_label, tree))
tree = ''
recent_sentences.append(saved_events)
if warning:
process_saved()
warning = False
saved_events = ([], book, chapter, verse, verse_label, level)
warning = True
tree += '{:<15} (S '.format(verse_label)
elif otype == 'word':
pos = pos_table[F.sp.v(node)]
text = F.g_cons_utf8.v(node)
tree += '({} "{}"'.format(pos, text)
else:
tree += '({} '.format(abbrev_node[otype])
level += 1
msg("There were {} warnings".format(n_warnings))
8m 06s Genesis 8m 09s Exodus 8m 11s Leviticus 8m 13s Numeri 8m 15s Deuteronomium 8m 17s Josua 8m 19s Judices 8m 20s Samuel_I 8m 22s Samuel_II 8m 24s Reges_I 8m 25s Reges_II 8m 27s Jesaia 8m 29s Jeremia 8m 33s Ezechiel 8m 35s Hosea 8m 36s Joel 8m 36s Amos 8m 36s Obadia 8m 36s Jona 8m 36s Micha 8m 36s Nahum 8m 36s Habakuk 8m 36s Zephania 8m 37s Haggai 8m 37s Sacharia 8m 37s Maleachi 8m 37s Psalmi 8m 40s Iob 8m 41s Proverbia 8m 42s Ruth 8m 42s Canticum 8m 42s Ecclesiastes 8m 43s Threni 8m 43s Esther 8m 43s Daniel 8m 44s Esra 8m 45s Nehemia 8m 45s Chronica_I 8m 47s Chronica_II 8m 49s There were 0 warnings
API['close']()
8m 54s Results directory: /Users/dirk/laf-fabric-output/etcbc4/trees __log__trees.txt 867 Tue Jul 15 14:40:57 2014 anomalies.txt 0 Tue Jul 15 14:40:09 2014 trees.txt 8683409 Tue Jul 15 14:40:57 2014
!head -n 25 {my_file('trees.txt')}
Genesis 1:1 (S (C (P (pp "ב")(n "ראשׁית"))(P (vb "ברא"))(P (n "אלהים"))(P (p (pp "את")(dt "ה")(n "שׁמים"))(cj "ו")(p (pp "את")(dt "ה")(n "ארץ"))))) Genesis 1:2 (S (C (P (cj "ו"))(P (dt "ה")(n "ארץ"))(P (vb "היתה"))(P (p (n "תהו"))(cj "ו")(p (n "בהו"))))) Genesis 1:2 (S (C (P (cj "ו"))(P (n "חשׁך"))(P (pp "על")(p (n "פני"))(p (n "תהום"))))) Genesis 1:2 (S (C (P (cj "ו"))(P (p (n "רוח"))(p (n "אלהים")))(P (vb "מרחפת"))(P (pp "על")(p (n "פני"))(p (dt "ה")(n "מים"))))) Genesis 1:3 (S (C (P (cj "ו"))(P (vb "יאמר"))(P (n "אלהים")))) Genesis 1:3 (S (C (P (vb "יהי"))(P (n "אור")))) Genesis 1:3 (S (C (P (cj "ו"))(P (vb "יהי"))(P (n "אור")))) Genesis 1:4 (S (C (P (cj "ו"))(P (vb "ירא"))(P (n "אלהים"))(P (pp "את")(dt "ה")(n "אור")))(C (P (cj "כי"))(P (vb "טוב")))) Genesis 1:4 (S (C (P (cj "ו"))(P (vb "יבדל"))(P (n "אלהים"))(P (p (n "בין")(dt "ה")(n "אור"))(cj "ו")(p (n "בין")(dt "ה")(n "חשׁך"))))) Genesis 1:5 (S (C (P (cj "ו"))(P (vb "יקרא"))(P (n "אלהים"))(P (pp "ל")(dt "")(n "אור"))(P (n "יום")))) Genesis 1:5 (S (C (P (cj "ו"))(P (pp "ל")(dt "")(n "חשׁך"))(P (vb "קרא"))(P (n "לילה")))) Genesis 1:5 (S (C (P (cj "ו"))(P (vb "יהי"))(P (n "ערב")))) Genesis 1:5 (S (C (P (cj "ו"))(P (vb "יהי"))(P (n "בקר")))) Genesis 1:5 (S (C (P (p (n "יום"))(p (n "אחד"))))) Genesis 1:6 (S (C (P (cj "ו"))(P (vb "יאמר"))(P (n "אלהים")))) Genesis 1:6 (S (C (P (vb "יהי"))(P (n "רקיע"))(P (pp "ב")(p (n "תוך"))(p (dt "ה")(n "מים"))))) Genesis 1:6 (S (C (P (cj "ו"))(P (vb "יהי"))(P (vb "מבדיל"))(P (n "בין")(n "מים")(pp "ל")(n "מים")))) Genesis 1:7 (S (C (P (cj "ו"))(P (vb "יעשׂ"))(P (n "אלהים"))(P (pp "את")(dt "ה")(n "רקיע")))) Genesis 1:7 (S (C (P (cj "ו"))(P (vb "יבדל"))(P (n "בין")(dt "ה")(n "מים"))»C»(C (P (cj "אשׁר"))(P (pp "מ")(n "תחת")(pp "ל")(dt "")(n "רקיע")))«C« (P (cj "ו"))(P (n "בין")(dt "ה")(n "מים")))(C (P (cj "אשׁר"))(P (pp "מ")(pp "על")(pp "ל")(dt "")(n "רקיע")))) Genesis 1:7 (S (C (P (cj "ו"))(P (vb "יהי"))(P (av "כן")))) Genesis 1:8 (S (C (P (cj "ו"))(P (vb "יקרא"))(P (n "אלהים"))(P (pp "ל")(dt "")(n "רקיע"))(P (n "שׁמים")))) Genesis 1:8 (S (C (P (cj "ו"))(P (vb "יהי"))(P (n "ערב")))) Genesis 1:8 (S (C (P (cj "ו"))(P (vb "יהי"))(P (n "בקר")))) Genesis 1:8 (S (C (P (p (n "יום"))(p (aj "שׁני"))))) Genesis 1:9 (S (C (P (cj "ו"))(P (vb "יאמר"))(P (n "אלהים"))))