IPython Notebook for presentation at Shared Horizons: Data, Biomedicine and the Digital Humanities
@ University of Maryland, 12 April 2013
A. Sean Pue, Tracy K. Teal, C. Titus Brown
Michigan State University
# import some modules
import settings
from meter_graph import MeterGraph
import graph_parser
from graph_parser import GraphParser
from pprint import pprint
import meter_graph
Convert from transliteration to metrical components
ranj se ;xuugar hu))aa insaa;n to mi;t jaataa hai ranj => bcsccbcvbcvcscbcscvbsccvnbcvbcscbcvcvbcvbcscc
b = word break, c = consonant, v = long vowel, s = short vowel, etc.
# loads rules for transcribing text into components of meter
# e.g. b = break, c = consonant, v = long vowel, s = short vowel
# rules can include previous tokens, class of token before previous, following tokens, class of token following tokens
pp = GraphParser('settings/urdu-meter.yaml')
pprint(pp.rules[-5:])
def transcribe(s):
return pp.parse(s)
(ParserRule(production='pahun', prev_class='wb', prev_tokens=None, tokens=['p', 'a', 'h', 'u', ';n'], next_tokens=['ch'], next_class='vowel'), ParserRule(production='kyaa', prev_class='wb', prev_tokens=None, tokens=['k', 'y', 'aa'], next_tokens=None, next_class='wb'), ParserRule(production='al', prev_class='wb', prev_tokens=None, tokens=['u', 'l', '-'], next_tokens=None, next_class=None), ParserRule(production='al', prev_class='wb', prev_tokens=None, tokens=['i', 'l', '-'], next_tokens=None, next_class=None), ParserRule(production='tum', prev_class='wb', prev_tokens=None, tokens=['t', 'u', 'm'], next_tokens=['h'], next_class=None))
transcription = transcribe(' ranj se ;xuugar hu))aa insaa;n to mi;t jaataa hai ranj')
transcription.output # output of transcript
'bcsccbcvbcvcscbcscvbsccvnbcvbcscbcvcvbcvbcscc'
# also stores details of matches for later
transcription.matches[0:5]
[ParserRule(production='b', prev_class=None, prev_tokens=None, tokens=' ', next_tokens=None, next_class=None), ParserRule(production='c', prev_class=None, prev_tokens=None, tokens=['r'], next_tokens=None, next_class=None), ParserRule(production='s', prev_class=None, prev_tokens=None, tokens=['a'], next_tokens=None, next_class=None), ParserRule(production='c', prev_class=None, prev_tokens=None, tokens=['n'], next_tokens=None, next_class=None), ParserRule(production='c', prev_class=None, prev_tokens=None, tokens=['j'], next_tokens=None, next_class=None)]
# Using a graph here
# Constraints on final edge (previous token class, previous tokens, next tokens, next token class)
graph_parser.draw_parser_graph(pp.DG)
Parses and translates into known meters,
e.g
bcsccbcvbcvcscbcscvbsccvnbcvbcscbcvcvbcvbcscc => =-==/=-==/=-==/=-=-
= is long, - is short, / is foot-divider
mg = MeterGraph()
def translate(s):
return mg.graph_scan(s)
example=' ranj se ;xuugar hu))aa insaa;n to mi;t jaataa hai ranj'
translations = translate(example)
for t in translations:
print "scan: ",t.scan, "meter id: ",t.meter_type
print "Also saves matches, e.g.:"
pprint(t.matches[0:5], indent=4)
scan: =-===-===-===-=- meter id: G1 Also saves matches, e.g.: [ NodeMatch(node_type='=', matched_tokens=['b', 'c', 's', 'c'], node_id=0, orig_tokens=[' ', ['r'], ['a'], ['n']], found='l_bcsc', token_i=0), NodeMatch(node_type='-', matched_tokens=['c'], node_id=1, orig_tokens=[['j']], found='s_c', token_i=4), NodeMatch(node_type='=', matched_tokens=['b', 'c', 'v'], node_id=11, orig_tokens=[' ', ['s'], ['e']], found='l_bcv', token_i=5), NodeMatch(node_type='=', matched_tokens=['b', 'c', 'v'], node_id=43, orig_tokens=[' ', [';x'], ['uu']], found='l_bcv', token_i=8), NodeMatch(node_type='=', matched_tokens=['c', 's', 'c'], node_id=44, orig_tokens=[['g'], ['a'], ['r']], found='l_csc', token_i=11)]
# Using a graph here
# On the edges between nodes are bad matches, e.g. illegal combos
mg.draw_graph()
import pydot
import networkx
g=mg.DG
labels=dict((n,d['type']) for n,d in g.nodes(data=True))
networkx.drawing.nx_pylab.draw_graphviz(g, labels=labels, node_size=200)
Load ghazal data, and identify meter
# load meter functions
def load_data():
data = {}
import csv
verses = {}
multiple_matches = []
with open('data/verses.csv', 'rb') as csvfile:
versereader = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in versereader:
(verse_id, input_string, real_scan) = row
data[verse_id] = {'input_string': input_string, 'real_scan': real_scan}
return data
verses = load_data()
def get_ghazal_keys(id): # takes a string, e.g. 001
return [k for k in sorted(verses.keys()) if k.startswith(id)]
def meters_of(s):
trans = translate(s)#verses[key]['input_string'])
return [t.meter_type for t in trans]
def common_meter(keys):
meters = {}
for key in keys:
trans = translate(verses[key]['input_string'])
meters[key] = []
for t in trans:
meters[key].append(t.meter_type)
ms=[v for k,v in meters.items()]
return reduce(set.intersection,map(set,ms))
def print_scan(key_id):
for k in get_ghazal_keys(key_id):
verse = verses[k]['input_string']
print k,verse
translations = translate(verse)
for t in translations:
print " * ",t.meter_type, t.scan
#identify meters of line
meters_of(example)
['G1']
#show meters of full ghazal (poem)
print_scan('026')
026.01.0 dard minnat-kash-e davaa nah hu))aa * G8 =-==-=-=--= 026.01.1 mai;n nah achchhaa hu))aa buraa nah hu))aa * G8 =-==-=-=--= * G8 --==-=-=--= 026.02.0 jam((a karte ho kyuu;n raqiibo;n ko * G8 =-==-=-=== 026.02.1 ik tamaashaa hu))aa gilaa nah hu))aa * G8 =-==-=-=--= 026.03.0 ham kahaa;n qismat aazmaane jaa))e;n * G8 =-==-=-===- 026.03.1 tuu hii jab ;xanjar-aazmaa nah hu))aa * G8 =-==-=-=--= * G8 --==-=-=--= 026.04.0 kitne shiirii;n hai;n tere lab kih raqiib * G8 =-==-=-=--=- 026.04.1 gaaliyaa;n khaa ke be-mazaa nah hu))aa * G8 =-==-=-=--= 026.05.0 hai ;xabar garm un ke aane kii * G11 =-==--==== * G8 =-==-=-=== * G11 --==--==== * G8 --==-=-=== 026.05.1 aaj hii ghar me;n boriyaa nah hu))aa * G8 =-==-=-=--= 026.06.0 kyaa vuh namruud kii ;xudaa))ii thii * G19 ====-=-==- * G8 =-==-=-=== 026.06.1 bandagii me;n miraa bhalaa nah hu))aa * G8 =-==-=-=--= 026.07.0 jaan dii dii hu))ii usii kii thii * G8 =-==-=-=== 026.07.1 ;haq to yuu;n hai kih ;haq adaa nah hu))aa * G8 =-==-=-=--= 026.08.0 za;xm gar dab gayaa lahuu nah thamaa * G8 =-==-=-=--= 026.08.1 kaam gar ruk gayaa ravaa nah hu))aa * G8 =-==-=-=--= 026.09.0 rahzanii hai kih dil-sitaanii hai * G8 =-==-=-=== 026.09.1 le ke dil dil-sitaa;n ravaanah hu))aa * G8 =-==-=-=--= * G8 --==-=-=--= 026.10.0 kuchh to pa;rhye kih log kahte hai;n * G19 ====-=-==- * G8 =-==-=-=== 026.10.1 aaj ;gaalib ;gazal-saraa nah hu))aa * G8 =-==-=-=--=
#show common meter
print 'Common meter(s): ',common_meter(get_ghazal_keys('026'))
Common meter(s): set(['G8'])
def test_all(verbose=False):
for key,verse in sorted(verses.iteritems()):
verse_meters = meters_of(verse['input_string'])
meter_okay= verse['real_scan'] in verse_meters
if verbose==True: print key, " assert ", verse['real_scan'],' in ', verse_meters
assert(meter_okay==True)
test_all(verbose=True)
# check execution time
import cProfile
def quick_run():
for key,verse in verses.iteritems():
# print verse['input_string']
meters_of(verse['input_string'])
#quick_run()
cProfile.run('quick_run()')