#!/usr/bin/env python # coding: utf-8 # # # # # # # Proper # A text visualization in which proper nouns and verbs are marked. # # The sentences, clauses and phrases are also marked. # # The visualization shows genre transitions to and from genealogies. # In[2]: import sys import collections import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') from laf.fabric import LafFabric fabric = LafFabric() # In[3]: fabric.load('etcbc4', '--', 'proper', { "xmlids": {"node": False, "edge": False}, "features": (''' otype monads maxmonad minmonad gn sp label chapter book ''',''), "primary": False, }) exec(fabric.localnames.format(var='fabric')) # In[4]: out = outfile("properviz.txt") type_map = collections.defaultdict(lambda: None, [ ("chapter", 'Ch'), ("verse", 'V'), ("sentence", 'S'), ("clause", 'C'), ("phrase", 'P'), ("word", 'w'), ]) otypes = ['Ch', 'V', 'S', 'C', 'P', 'w'] watch = collections.defaultdict(lambda: {}) start = {} cur_verse_label = ['',''] def print_node(ob, obdata): (node, minm, maxm, monads) = obdata if ob == "w": if not watch: out.write("◘".format(monads)) else: outchar = "─" p_o_s = F.sp.v(node) if p_o_s == "nmpr": if F.gn.v(node) == "m": outchar = "♂" elif F.gn.v(node) == "f": outchar = "♀" elif F.gn.v(node) == "unknown": outchar = "⊙" elif p_o_s == "verb": outchar = "♠" out.write(outchar) if monads in watch: tofinish = watch[monads] for o in reversed(otypes): if o in tofinish: if o == 'C': out.write("┤") elif o == 'P': if 'C' not in tofinish: out.write("┼") elif o != 'S': out.write("{}»".format(o)) del watch[monads] elif ob == "Ch": this_chapter_label = "{} {}".format(F.book.v(node), F.chapter.v(node)) elif ob == "V": this_verse_label = F.label.v(node).strip(" ") cur_verse_label[0] = this_verse_label cur_verse_label[1] = this_verse_label elif ob == "S": out.write("\n{:<11} ".format(cur_verse_label[1])) cur_verse_label[1] = '' watch[maxm][ob] = None elif ob == "C": out.write("├") watch[maxm][ob] = None elif ob == "P": watch[maxm][ob] = None else: out.write("«{}".format(ob)) watch[maxm][ob] = None lastmin = None lastmax = None for i in NN(): otype = F.otype.v(i) if otype == 'book': sys.stderr.write("{:<11}".format(F.book.v(i))) ob = type_map[otype] if ob == None: continue monads = F.monads.v(i) minm = F.minmonad.v(i) maxm = F.maxmonad.v(i) if lastmin == minm and lastmax == maxm: start[ob] = (i, minm, maxm, monads) else: for o in otypes: if o in start: print_node(o, start[o]) start = {ob: (i, minm, maxm, monads)} lastmin = minm lastmax = maxm for ob in otypes: if ob in start: print_node(ob, start[ob]) close() # In[5]: viz = infile('properviz.txt') lines = viz.readlines() print(''.join(lines[5021:5121])) # Let us plot the proper noun density per chapter. # First we put the chapters, proper noun frequencies and verb frequencies in corresponding lists ``ch``, ``p``, and ``v``. # In[6]: book = None chapter = None words = 0 proper = 0 verb = 0 ch = [] p = [] v = [] for node in NN(test=F.otype.v, values=['word', 'chapter', 'book']): otype = F.otype.v(node) if otype == 'book': book = F.book.v(node) elif otype == 'chapter': if chapter != None: p.append(100 * float(proper)/words if words else 0) v.append(100 * float(verb)/words if words else 0) ch.append(cur_chapter) chapter = F.chapter.v(node) cur_chapter = "{} {}".format(book, chapter) verb = 0 proper = 0 words = 0 else: words += 1 if F.sp.v(node) == 'nmpr': proper += 1 if F.sp.v(node) == 'verb': verb += 1 if chapter != None: ch.append(cur_chapter) p.append(100 * float(proper)/words if words else 0) v.append(100 * float(verb)/words if words else 0) # Then we plot the ``p`` and ``v`` series against the ``ch`` series. # In[7]: x = range(len(ch)) start = 50 end = 70 fig = plt.figure() plt.plot(x[start:end], p[start:end], 'b-', x[start:end], v[start:end], 'r-') plt.axis([start, end, 0, 50]) plt.xticks(x[start:end], ch[start:end], rotation='vertical') plt.margins(0.2) plt.subplots_adjust(bottom=0.15); plt.title('proper'); # In[ ]: