import nltk from nltk.corpus import treebank t = treebank.parsed_sents('wsj_0001.mrg')[0] print t print t.label() print t[0] print t[1] print t[2] print t[0][0] print t[0][0][1] print t[0][0][1][0] print t.treepositions(order='preorder') print t.leaves() print t.flatten() # A tree with only the root and the leaves directly connected to it for subtree in t.subtrees(): # Generate all subtrees print subtree def filt(x): return x.label()=='NP' for subtree in t.subtrees(filter = filt): # Generate all subtrees print subtree for subtree in t.subtrees(filter = lambda st: st.label()=='NN'): print subtree t.pos() import pln_inco.syntax_trees import pln_inco.graphviz as gv from IPython.display import Image from IPython.display import display from IPython.display import display_png tree_dot=pln_inco.syntax_trees.tree_to_dot(t) tree_png=Image(data=gv.generate(tree_dot,format='png'),width=1000) display_png(tree_png) print t.leaf_treeposition(10) #Note that leaves are numbered starting from 0! print t.treeposition_spanning_leaves(8,14) print t[1][1]