# import some modules import settings from meter_graph import MeterGraph import graph_parser from graph_parser import GraphParser from pprint import pprint import meter_graph # loads rules for transcribing text into components of meter # e.g. b = break, c = consonant, v = long vowel, s = short vowel # rules can include previous tokens, class of token before previous, following tokens, class of token following tokens pp = GraphParser('settings/urdu-meter.yaml') pprint(pp.rules[-5:]) def transcribe(s): return pp.parse(s) transcription = transcribe(' ranj se ;xuugar hu))aa insaa;n to mi;t jaataa hai ranj') transcription.output # output of transcript # also stores details of matches for later transcription.matches[0:5] # Using a graph here # Constraints on final edge (previous token class, previous tokens, next tokens, next token class) graph_parser.draw_parser_graph(pp.DG) mg = MeterGraph() def translate(s): return mg.graph_scan(s) example=' ranj se ;xuugar hu))aa insaa;n to mi;t jaataa hai ranj' translations = translate(example) for t in translations: print "scan: ",t.scan, "meter id: ",t.meter_type print "Also saves matches, e.g.:" pprint(t.matches[0:5], indent=4) # Using a graph here # On the edges between nodes are bad matches, e.g. illegal combos mg.draw_graph() import pydot import networkx g=mg.DG labels=dict((n,d['type']) for n,d in g.nodes(data=True)) networkx.drawing.nx_pylab.draw_graphviz(g, labels=labels, node_size=200) # load meter functions def load_data(): data = {} import csv verses = {} multiple_matches = [] with open('data/verses.csv', 'rb') as csvfile: versereader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in versereader: (verse_id, input_string, real_scan) = row data[verse_id] = {'input_string': input_string, 'real_scan': real_scan} return data verses = load_data() def get_ghazal_keys(id): # takes a string, e.g. 001 return [k for k in sorted(verses.keys()) if k.startswith(id)] def meters_of(s): trans = translate(s)#verses[key]['input_string']) return [t.meter_type for t in trans] def common_meter(keys): meters = {} for key in keys: trans = translate(verses[key]['input_string']) meters[key] = [] for t in trans: meters[key].append(t.meter_type) ms=[v for k,v in meters.items()] return reduce(set.intersection,map(set,ms)) def print_scan(key_id): for k in get_ghazal_keys(key_id): verse = verses[k]['input_string'] print k,verse translations = translate(verse) for t in translations: print " * ",t.meter_type, t.scan #identify meters of line meters_of(example) #show meters of full ghazal (poem) print_scan('026') #show common meter print 'Common meter(s): ',common_meter(get_ghazal_keys('026')) def test_all(verbose=False): for key,verse in sorted(verses.iteritems()): verse_meters = meters_of(verse['input_string']) meter_okay= verse['real_scan'] in verse_meters if verbose==True: print key, " assert ", verse['real_scan'],' in ', verse_meters assert(meter_okay==True) test_all(verbose=True) # check execution time import cProfile def quick_run(): for key,verse in verses.iteritems(): # print verse['input_string'] meters_of(verse['input_string']) #quick_run() cProfile.run('quick_run()')