import networkx as nx
from parser import Parser
p = Parser('settings/short.yaml')
print p.rules
# each rule consists of tokens, a production, and possible a prev_class, prev_tokens, next_tokens, and next_class
# (prev_class) ([prev_tokens]) tokens ([next_tokens]) (next_class)
print p.tokens
[{'tokens': ['b', 'c', 'v', 'n'], 'production': 's_bcvn<b>', 'next_class': 'b'}, {'tokens': ['b', 'c', 's', 'c<h+wb>'], 'production': 's_bcsc<h+wb>'}, {'tokens': ['b', 'c', 's', 'n'], 'production': 's_bcsn'}, {'tokens': ['b', 'c', 'v'], 'production': 's_bcv<z>', 'next_class': 'z'}, {'tokens': ['b', 'c', 'v'], 'production': 's_bcv<b>', 'next_class': 'b'}, {'tokens': ['b', 'c', 'v'], 'production': 's_bcv<o>', 'next_class': 'o'}, {'tokens': ['c', 'v', 'n'], 'production': 's_cvn<b>', 'next_class': 'b'}, {'tokens': ['b', 'c', 'v_duii'], 'production': 's_bcv_duii'}, {'tokens': ['b', 'c', 'v<aa+z>'], 'production': 's_bcv<aa+z>'}, {'tokens': ['c', 'b', 'o'], 'production': 's_cbo'}, {'tokens': ['c', 'b', 's'], 'production': 's_cbs'}, {'tokens': ['c', 's', 'c<h+wb>'], 'production': 's_csc<h+wb>'}, {'tokens': ['c', 's', 'n'], 'production': 's_csn'}, {'tokens': ['b', 'c', 's'], 'production': 's_bcs'}, {'tokens': ['b', 'c', 'v<ii+z>'], 'production': 's_bcv<ii+z>'}, {'tokens': ['b', 'c', 'v<ii+o>'], 'production': 's_bcv<ii+o>'}, {'tokens': ['b', 's', 'n'], 'production': 's_bsn'}, {'tokens': ['c', 'v_e_fut', 'n'], 'production': 's_cv_e_futn'}, {'tokens': ['b', 'c', 'v_o_fut'], 'production': 's_bcv_o_fut'}, {'tokens': ['b', 'c<;xv>', 's'], 'production': 's_bc<;xv>s'}, {'prev_class': 'b', 'tokens': ['c', 'c'], 'prev_tokens': ['c', 'v'], 'next_class': 'b', 'production': 's_(<b>cv)cc<b>'}, {'prev_class': 'v', 'tokens': ['c', 'c'], 'production': 's_<v>cc<b>', 'next_class': 'b'}, {'prev_class': 'c', 'tokens': ['c', 'c'], 'production': 's_<c>cc<b>', 'next_class': 'b'}, {'prev_class': 'b', 'tokens': ['c', 'c'], 'production': 's_<b>cc<b>', 'next_class': 'b'}, {'tokens': ['c', 'v'], 'production': 's_cv<z>', 'next_class': 'z'}, {'tokens': ['c', 'v'], 'production': 's_cv<b>', 'next_class': 'b'}, {'prev_class': 'b', 'tokens': ['c', 's'], 'production': 's_<b>cs(bal)', 'next_tokens': ['b', 'al']}, {'tokens': ['b', 'c'], 'production': 's_bc<b>', 'next_class': 'b'}, {'tokens': ['c', 'v_e_fut'], 'production': 's_cv_e_fut'}, {'tokens': ['v<ii+z>', 'z'], 'production': 's_v<ii+z>z'}, {'tokens': ['c', 'v_o_fut'], 'production': 's_cv_o_fut'}, {'tokens': ['b', 's'], 'production': 's_bs'}, {'tokens': ['c', 'v<ii+z>'], 'production': 's_cv<ii+z>'}, {'tokens': ['c', 'v<ii+o>'], 'production': 's_cv<ii+o>'}, {'tokens': ['b', 'ko_ii'], 'production': 's_bko_ii'}, {'tokens': ['c', 'v<aa+z>'], 'production': 's_cv<aa+z>'}, {'tokens': ['c<h+wb>', 'o'], 'production': 's_c<h+wb>o'}, {'tokens': ['b', 'v<aur>'], 'production': 's_bv<aur>'}, {'tokens': ['c<;xv>', 's'], 'production': 's_c<;xv>s'}, {'tokens': ['c', 'z'], 'production': 's_cz'}, {'tokens': ['c', 's'], 'production': 's_cs'}, {'tokens': ['c', 'o'], 'production': 's_co'}, {'tokens': ['b', 'tum'], 'production': 's_btum'}, {'tokens': ['b', 'o'], 'production': 's_bo'}, {'tokens': ['v'], 'production': 's_v<b>', 'next_class': 'b'}, {'prev_class': 'o', 'tokens': ['s'], 'production': 's_<o>s'}, {'tokens': ['c<h+wb>'], 'production': 's_c<h+wb>'}, {'tokens': ['c'], 'production': 's_c'}, {'tokens': ['s'], 'production': 's_(al)s', 'prev_tokens': ['al']}, {'tokens': ['o'], 'production': 's_o'}, {'tokens': ['z'], 'production': 's_z'}] {'v<aur>': ['tkn'], 'c<;xv>': ['tkn'], 'v_duii': ['tkn'], '<h+wb>': ['tkn'], 'al': ['tkn'], 'v<ii+o>': ['tkn'], 'v_e_fut': ['tkn'], 'v<ii+z>': ['tkn'], 'o': ['tkn', 'o'], 'c': ['tkn', 'c'], 'b': ['tkn', 'b'], 'c<h+wb>': ['tkn'], 'v_o_fut': ['tkn'], 'pahun': ['tkn'], 'ko_ii': ['tkn'], 'n': ['tkn'], 's': ['tkn'], 'v': ['tkn'], 'z': ['tkn', 'z'], 'kyaa': ['tkn'], 'tum': ['tkn']}
# setup the DiGraph
DG = nx.DiGraph()
DG.add_node(0,token=None)
for rule in p.rules[0:15]:
if rule['tokens'][0]=='b':
print rule
{'tokens': ['b', 'c', 'v', 'n'], 'production': 's_bcvn<b>', 'next_class': 'b'} {'tokens': ['b', 'c', 's', 'c<h+wb>'], 'production': 's_bcsc<h+wb>'} {'tokens': ['b', 'c', 's', 'n'], 'production': 's_bcsn'} {'tokens': ['b', 'c', 'v'], 'production': 's_bcv<z>', 'next_class': 'z'} {'tokens': ['b', 'c', 'v'], 'production': 's_bcv<b>', 'next_class': 'b'} {'tokens': ['b', 'c', 'v'], 'production': 's_bcv<o>', 'next_class': 'o'} {'tokens': ['b', 'c', 'v_duii'], 'production': 's_bcv_duii'} {'tokens': ['b', 'c', 'v<aa+z>'], 'production': 's_bcv<aa+z>'} {'tokens': ['b', 'c', 's'], 'production': 's_bcs'} {'tokens': ['b', 'c', 'v<ii+z>'], 'production': 's_bcv<ii+z>'}
# switch to named tuples
from collections import namedtuple
ParserRule = namedtuple('ParserRule', ['production','prev_class','prev_tokens','tokens','next_tokens','next_class'])
new_rules =[]
for r in p.rules:
pr = ParserRule(production = r['production'],
prev_class = r.get('prev_class'),
prev_tokens = r.get('prev_tokens'),
tokens = r['tokens'],
next_tokens = r.get('next_tokens'),
next_class = r.get('next_class'))
new_rules.append(pr)
new_rules=tuple(new_rules)
new_rules
(ParserRule(production='s_bcvn<b>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v', 'n'], next_tokens=None, next_class='b'), ParserRule(production='s_bcsc<h+wb>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 's', 'c<h+wb>'], next_tokens=None, next_class=None), ParserRule(production='s_bcsn', prev_class=None, prev_tokens=None, tokens=['b', 'c', 's', 'n'], next_tokens=None, next_class=None), ParserRule(production='s_bcv<z>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v'], next_tokens=None, next_class='z'), ParserRule(production='s_bcv<b>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v'], next_tokens=None, next_class='b'), ParserRule(production='s_bcv<o>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v'], next_tokens=None, next_class='o'), ParserRule(production='s_cvn<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v', 'n'], next_tokens=None, next_class='b'), ParserRule(production='s_bcv_duii', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v_duii'], next_tokens=None, next_class=None), ParserRule(production='s_bcv<aa+z>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v<aa+z>'], next_tokens=None, next_class=None), ParserRule(production='s_cbo', prev_class=None, prev_tokens=None, tokens=['c', 'b', 'o'], next_tokens=None, next_class=None), ParserRule(production='s_cbs', prev_class=None, prev_tokens=None, tokens=['c', 'b', 's'], next_tokens=None, next_class=None), ParserRule(production='s_csc<h+wb>', prev_class=None, prev_tokens=None, tokens=['c', 's', 'c<h+wb>'], next_tokens=None, next_class=None), ParserRule(production='s_csn', prev_class=None, prev_tokens=None, tokens=['c', 's', 'n'], next_tokens=None, next_class=None), ParserRule(production='s_bcs', prev_class=None, prev_tokens=None, tokens=['b', 'c', 's'], next_tokens=None, next_class=None), ParserRule(production='s_bcv<ii+z>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v<ii+z>'], next_tokens=None, next_class=None), ParserRule(production='s_bcv<ii+o>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v<ii+o>'], next_tokens=None, next_class=None), ParserRule(production='s_bsn', prev_class=None, prev_tokens=None, tokens=['b', 's', 'n'], next_tokens=None, next_class=None), ParserRule(production='s_cv_e_futn', prev_class=None, prev_tokens=None, tokens=['c', 'v_e_fut', 'n'], next_tokens=None, next_class=None), ParserRule(production='s_bcv_o_fut', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v_o_fut'], next_tokens=None, next_class=None), ParserRule(production='s_bc<;xv>s', prev_class=None, prev_tokens=None, tokens=['b', 'c<;xv>', 's'], next_tokens=None, next_class=None), ParserRule(production='s_(<b>cv)cc<b>', prev_class='b', prev_tokens=['c', 'v'], tokens=['c', 'c'], next_tokens=None, next_class='b'), ParserRule(production='s_<v>cc<b>', prev_class='v', prev_tokens=None, tokens=['c', 'c'], next_tokens=None, next_class='b'), ParserRule(production='s_<c>cc<b>', prev_class='c', prev_tokens=None, tokens=['c', 'c'], next_tokens=None, next_class='b'), ParserRule(production='s_<b>cc<b>', prev_class='b', prev_tokens=None, tokens=['c', 'c'], next_tokens=None, next_class='b'), ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z'), ParserRule(production='s_cv<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='b'), ParserRule(production='s_<b>cs(bal)', prev_class='b', prev_tokens=None, tokens=['c', 's'], next_tokens=['b', 'al'], next_class=None), ParserRule(production='s_bc<b>', prev_class=None, prev_tokens=None, tokens=['b', 'c'], next_tokens=None, next_class='b'), ParserRule(production='s_cv_e_fut', prev_class=None, prev_tokens=None, tokens=['c', 'v_e_fut'], next_tokens=None, next_class=None), ParserRule(production='s_v<ii+z>z', prev_class=None, prev_tokens=None, tokens=['v<ii+z>', 'z'], next_tokens=None, next_class=None), ParserRule(production='s_cv_o_fut', prev_class=None, prev_tokens=None, tokens=['c', 'v_o_fut'], next_tokens=None, next_class=None), ParserRule(production='s_bs', prev_class=None, prev_tokens=None, tokens=['b', 's'], next_tokens=None, next_class=None), ParserRule(production='s_cv<ii+z>', prev_class=None, prev_tokens=None, tokens=['c', 'v<ii+z>'], next_tokens=None, next_class=None), ParserRule(production='s_cv<ii+o>', prev_class=None, prev_tokens=None, tokens=['c', 'v<ii+o>'], next_tokens=None, next_class=None), ParserRule(production='s_bko_ii', prev_class=None, prev_tokens=None, tokens=['b', 'ko_ii'], next_tokens=None, next_class=None), ParserRule(production='s_cv<aa+z>', prev_class=None, prev_tokens=None, tokens=['c', 'v<aa+z>'], next_tokens=None, next_class=None), ParserRule(production='s_c<h+wb>o', prev_class=None, prev_tokens=None, tokens=['c<h+wb>', 'o'], next_tokens=None, next_class=None), ParserRule(production='s_bv<aur>', prev_class=None, prev_tokens=None, tokens=['b', 'v<aur>'], next_tokens=None, next_class=None), ParserRule(production='s_c<;xv>s', prev_class=None, prev_tokens=None, tokens=['c<;xv>', 's'], next_tokens=None, next_class=None), ParserRule(production='s_cz', prev_class=None, prev_tokens=None, tokens=['c', 'z'], next_tokens=None, next_class=None), ParserRule(production='s_cs', prev_class=None, prev_tokens=None, tokens=['c', 's'], next_tokens=None, next_class=None), ParserRule(production='s_co', prev_class=None, prev_tokens=None, tokens=['c', 'o'], next_tokens=None, next_class=None), ParserRule(production='s_btum', prev_class=None, prev_tokens=None, tokens=['b', 'tum'], next_tokens=None, next_class=None), ParserRule(production='s_bo', prev_class=None, prev_tokens=None, tokens=['b', 'o'], next_tokens=None, next_class=None), ParserRule(production='s_v<b>', prev_class=None, prev_tokens=None, tokens=['v'], next_tokens=None, next_class='b'), ParserRule(production='s_<o>s', prev_class='o', prev_tokens=None, tokens=['s'], next_tokens=None, next_class=None), ParserRule(production='s_c<h+wb>', prev_class=None, prev_tokens=None, tokens=['c<h+wb>'], next_tokens=None, next_class=None), ParserRule(production='s_c', prev_class=None, prev_tokens=None, tokens=['c'], next_tokens=None, next_class=None), ParserRule(production='s_(al)s', prev_class=None, prev_tokens=['al'], tokens=['s'], next_tokens=None, next_class=None), ParserRule(production='s_o', prev_class=None, prev_tokens=None, tokens=['o'], next_tokens=None, next_class=None), ParserRule(production='s_z', prev_class=None, prev_tokens=None, tokens=['z'], next_tokens=None, next_class=None))
def found_token_at_node(t, n_id):
for s in DG.successors(n_id):
# print s,DG[s]
if DG.node[s].get('token')==t:
return s
return None
def weight_of_rule(r): # and here namedtuples get clunky?
weight=1 # start at one.
if r.prev_class:
weight+=1
if r.prev_tokens:
weight+=1
if r.next_class:
weight+=1
if r.next_tokens:
weight+=1
return 1/weight # when we navigate, we will go from low to high in the edges, so we want the inverse of the weights
token_nodes = []
match_nodes = []
for rule in new_rules:
curr_node = 0
for t in rule.tokens:
found = found_token_at_node(t,curr_node)
if found:
curr_node = found
continue
else:
new_node = len(DG.nodes())
DG.add_node(new_node, token=t)
token_nodes.append(new_node) # save in list to be used in drawing for labels, color, etc.
# print DG.node[new_node],t
DG.add_edge(curr_node,new_node,weight=0) # if not a match node, then no weight, so will try first
curr_node = new_node
rule_weight = weight_of_rule(rule) # this will be 1/4 for 4 constraints, 1/3 for 3 constraints, etc.
new_match_node = len(DG.nodes())
DG.add_node(new_match_node, rule=rule, found=rule.production)
match_nodes.append(new_match_node)
DG.add_edge(curr_node, new_match_node, weight=rule_weight,rule=rule) # for now, just copy the rule into the edge and check from that
print DG[curr_node][new_match_node]
print 'added rule',rule
{'weight': 0, 'rule': ParserRule(production='s_bcvn<b>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v', 'n'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_bcvn<b>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v', 'n'], next_tokens=None, next_class='b') {'weight': 1, 'rule': ParserRule(production='s_bcsc<h+wb>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 's', 'c<h+wb>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bcsc<h+wb>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 's', 'c<h+wb>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bcsn', prev_class=None, prev_tokens=None, tokens=['b', 'c', 's', 'n'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bcsn', prev_class=None, prev_tokens=None, tokens=['b', 'c', 's', 'n'], next_tokens=None, next_class=None) {'weight': 0, 'rule': ParserRule(production='s_bcv<z>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v'], next_tokens=None, next_class='z')} added rule ParserRule(production='s_bcv<z>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v'], next_tokens=None, next_class='z') {'weight': 0, 'rule': ParserRule(production='s_bcv<b>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_bcv<b>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v'], next_tokens=None, next_class='b') {'weight': 0, 'rule': ParserRule(production='s_bcv<o>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v'], next_tokens=None, next_class='o')} added rule ParserRule(production='s_bcv<o>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v'], next_tokens=None, next_class='o') {'weight': 0, 'rule': ParserRule(production='s_cvn<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v', 'n'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_cvn<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v', 'n'], next_tokens=None, next_class='b') {'weight': 1, 'rule': ParserRule(production='s_bcv_duii', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v_duii'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bcv_duii', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v_duii'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bcv<aa+z>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v<aa+z>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bcv<aa+z>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v<aa+z>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_cbo', prev_class=None, prev_tokens=None, tokens=['c', 'b', 'o'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cbo', prev_class=None, prev_tokens=None, tokens=['c', 'b', 'o'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_cbs', prev_class=None, prev_tokens=None, tokens=['c', 'b', 's'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cbs', prev_class=None, prev_tokens=None, tokens=['c', 'b', 's'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_csc<h+wb>', prev_class=None, prev_tokens=None, tokens=['c', 's', 'c<h+wb>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_csc<h+wb>', prev_class=None, prev_tokens=None, tokens=['c', 's', 'c<h+wb>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_csn', prev_class=None, prev_tokens=None, tokens=['c', 's', 'n'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_csn', prev_class=None, prev_tokens=None, tokens=['c', 's', 'n'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bcs', prev_class=None, prev_tokens=None, tokens=['b', 'c', 's'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bcs', prev_class=None, prev_tokens=None, tokens=['b', 'c', 's'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bcv<ii+z>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v<ii+z>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bcv<ii+z>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v<ii+z>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bcv<ii+o>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v<ii+o>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bcv<ii+o>', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v<ii+o>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bsn', prev_class=None, prev_tokens=None, tokens=['b', 's', 'n'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bsn', prev_class=None, prev_tokens=None, tokens=['b', 's', 'n'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_cv_e_futn', prev_class=None, prev_tokens=None, tokens=['c', 'v_e_fut', 'n'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cv_e_futn', prev_class=None, prev_tokens=None, tokens=['c', 'v_e_fut', 'n'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bcv_o_fut', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v_o_fut'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bcv_o_fut', prev_class=None, prev_tokens=None, tokens=['b', 'c', 'v_o_fut'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bc<;xv>s', prev_class=None, prev_tokens=None, tokens=['b', 'c<;xv>', 's'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bc<;xv>s', prev_class=None, prev_tokens=None, tokens=['b', 'c<;xv>', 's'], next_tokens=None, next_class=None) {'weight': 0, 'rule': ParserRule(production='s_(<b>cv)cc<b>', prev_class='b', prev_tokens=['c', 'v'], tokens=['c', 'c'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_(<b>cv)cc<b>', prev_class='b', prev_tokens=['c', 'v'], tokens=['c', 'c'], next_tokens=None, next_class='b') {'weight': 0, 'rule': ParserRule(production='s_<v>cc<b>', prev_class='v', prev_tokens=None, tokens=['c', 'c'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_<v>cc<b>', prev_class='v', prev_tokens=None, tokens=['c', 'c'], next_tokens=None, next_class='b') {'weight': 0, 'rule': ParserRule(production='s_<c>cc<b>', prev_class='c', prev_tokens=None, tokens=['c', 'c'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_<c>cc<b>', prev_class='c', prev_tokens=None, tokens=['c', 'c'], next_tokens=None, next_class='b') {'weight': 0, 'rule': ParserRule(production='s_<b>cc<b>', prev_class='b', prev_tokens=None, tokens=['c', 'c'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_<b>cc<b>', prev_class='b', prev_tokens=None, tokens=['c', 'c'], next_tokens=None, next_class='b') {'weight': 0, 'rule': ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z')} added rule ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z') {'weight': 0, 'rule': ParserRule(production='s_cv<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_cv<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='b') {'weight': 0, 'rule': ParserRule(production='s_<b>cs(bal)', prev_class='b', prev_tokens=None, tokens=['c', 's'], next_tokens=['b', 'al'], next_class=None)} added rule ParserRule(production='s_<b>cs(bal)', prev_class='b', prev_tokens=None, tokens=['c', 's'], next_tokens=['b', 'al'], next_class=None) {'weight': 0, 'rule': ParserRule(production='s_bc<b>', prev_class=None, prev_tokens=None, tokens=['b', 'c'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_bc<b>', prev_class=None, prev_tokens=None, tokens=['b', 'c'], next_tokens=None, next_class='b') {'weight': 1, 'rule': ParserRule(production='s_cv_e_fut', prev_class=None, prev_tokens=None, tokens=['c', 'v_e_fut'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cv_e_fut', prev_class=None, prev_tokens=None, tokens=['c', 'v_e_fut'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_v<ii+z>z', prev_class=None, prev_tokens=None, tokens=['v<ii+z>', 'z'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_v<ii+z>z', prev_class=None, prev_tokens=None, tokens=['v<ii+z>', 'z'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_cv_o_fut', prev_class=None, prev_tokens=None, tokens=['c', 'v_o_fut'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cv_o_fut', prev_class=None, prev_tokens=None, tokens=['c', 'v_o_fut'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bs', prev_class=None, prev_tokens=None, tokens=['b', 's'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bs', prev_class=None, prev_tokens=None, tokens=['b', 's'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_cv<ii+z>', prev_class=None, prev_tokens=None, tokens=['c', 'v<ii+z>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cv<ii+z>', prev_class=None, prev_tokens=None, tokens=['c', 'v<ii+z>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_cv<ii+o>', prev_class=None, prev_tokens=None, tokens=['c', 'v<ii+o>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cv<ii+o>', prev_class=None, prev_tokens=None, tokens=['c', 'v<ii+o>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bko_ii', prev_class=None, prev_tokens=None, tokens=['b', 'ko_ii'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bko_ii', prev_class=None, prev_tokens=None, tokens=['b', 'ko_ii'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_cv<aa+z>', prev_class=None, prev_tokens=None, tokens=['c', 'v<aa+z>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cv<aa+z>', prev_class=None, prev_tokens=None, tokens=['c', 'v<aa+z>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_c<h+wb>o', prev_class=None, prev_tokens=None, tokens=['c<h+wb>', 'o'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_c<h+wb>o', prev_class=None, prev_tokens=None, tokens=['c<h+wb>', 'o'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bv<aur>', prev_class=None, prev_tokens=None, tokens=['b', 'v<aur>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bv<aur>', prev_class=None, prev_tokens=None, tokens=['b', 'v<aur>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_c<;xv>s', prev_class=None, prev_tokens=None, tokens=['c<;xv>', 's'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_c<;xv>s', prev_class=None, prev_tokens=None, tokens=['c<;xv>', 's'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_cz', prev_class=None, prev_tokens=None, tokens=['c', 'z'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cz', prev_class=None, prev_tokens=None, tokens=['c', 'z'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_cs', prev_class=None, prev_tokens=None, tokens=['c', 's'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_cs', prev_class=None, prev_tokens=None, tokens=['c', 's'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_co', prev_class=None, prev_tokens=None, tokens=['c', 'o'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_co', prev_class=None, prev_tokens=None, tokens=['c', 'o'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_btum', prev_class=None, prev_tokens=None, tokens=['b', 'tum'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_btum', prev_class=None, prev_tokens=None, tokens=['b', 'tum'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_bo', prev_class=None, prev_tokens=None, tokens=['b', 'o'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_bo', prev_class=None, prev_tokens=None, tokens=['b', 'o'], next_tokens=None, next_class=None) {'weight': 0, 'rule': ParserRule(production='s_v<b>', prev_class=None, prev_tokens=None, tokens=['v'], next_tokens=None, next_class='b')} added rule ParserRule(production='s_v<b>', prev_class=None, prev_tokens=None, tokens=['v'], next_tokens=None, next_class='b') {'weight': 0, 'rule': ParserRule(production='s_<o>s', prev_class='o', prev_tokens=None, tokens=['s'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_<o>s', prev_class='o', prev_tokens=None, tokens=['s'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_c<h+wb>', prev_class=None, prev_tokens=None, tokens=['c<h+wb>'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_c<h+wb>', prev_class=None, prev_tokens=None, tokens=['c<h+wb>'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_c', prev_class=None, prev_tokens=None, tokens=['c'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_c', prev_class=None, prev_tokens=None, tokens=['c'], next_tokens=None, next_class=None) {'weight': 0, 'rule': ParserRule(production='s_(al)s', prev_class=None, prev_tokens=['al'], tokens=['s'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_(al)s', prev_class=None, prev_tokens=['al'], tokens=['s'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_o', prev_class=None, prev_tokens=None, tokens=['o'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_o', prev_class=None, prev_tokens=None, tokens=['o'], next_tokens=None, next_class=None) {'weight': 1, 'rule': ParserRule(production='s_z', prev_class=None, prev_tokens=None, tokens=['z'], next_tokens=None, next_class=None)} added rule ParserRule(production='s_z', prev_class=None, prev_tokens=None, tokens=['z'], next_tokens=None, next_class=None)
# now lets try
tkns = tuple(p.tokenize('cv'))
print tkns
('c', 'v')
# sort the edges by the weight. This could be done across the entire graph to speed up
edge_list = list(DG.edges(0, data = True))
edge_list.append( (0,6, {'weight':.5}) )
edge_list.append( (0,6, {'weight':.25}) )
edge_list
edge_list.sort(key=lambda x:x[2]['weight'])
edge_list
# key=lambda elem:
#key=lambda elem:
#newlist = sorted(list_to_be_sorted, key=itemgetter('name'))
[(0, 96, {'weight': 0}), (0, 1, {'weight': 0}), (0, 98, {'weight': 0}), (0, 72, {'weight': 0}), (0, 77, {'weight': 0}), (0, 14, {'weight': 0}), (0, 89, {'weight': 0}), (0, 58, {'weight': 0}), (0, 91, {'weight': 0}), (0, 6, {'weight': 0.25}), (0, 6, {'weight': 0.5})]
# this gnarly bit of code checks that the rule matches at a list of tokens at a particular token_i,
# following a match of tokens [of length level],
parser_tokens=p.tokens
def match_rule(rule, tokens, token_i,level, blank='b'):
# level shows how far we have moved forward from token_i
print "trying rule ",rule
(num_prev_tokens, num_next_tokens) = (0,0)
if rule.prev_tokens:
num_prev_tokens = len(rule.prev_tokens)
start_i = token_i - len(rule.prev_tokens)
if start_i < 0:
return False
for i in range(num_prev_tokens): # had some trouble will all here for some reason
if not tokens[start_i+i]==rule.prev_tokens[i]:
return False
if rule.prev_class:
print 'in prev_class'
start_i = token_i - num_prev_tokens-1
prev_token = ''
if start_i < -1:
return False
if start_i == -1: # if just one past, send a 'blank'
prev_token = blank
else:
prev_token = tokens[start_i]
if not rule.prev_class in parser_tokens[prev_token]:
return False
if rule.next_tokens:
num_next_tokens = len(rule.next_tokens)
start_i = token_i + level #
if start_i + num_next_tokens > len(tokens):
print 'too long'
return False
for i in range(num_next_tokens):
if not tokens[start_i+i]==rule.next_tokens[i]:
return False
if rule.next_class:
start_i = token_i+level+num_next_tokens
if start_i == len(tokens): # if one past
next_token = blank
else:
next_token = tokens[start_i]
if not rule.next_class in parser_tokens[next_token]:
return False
# if start_i + num_next_tokens == len(tokens): need to deal with this one, but can in next
return True
r=ParserRule(production='s_z', prev_class=None, prev_tokens=('b','c'), tokens=('z'), next_tokens=None, next_class=None)
ts = ['a','c','z']
print match_rule(r,ts,2,1)
ts = ['b','c','z']
print match_rule(r,ts,2,1)
ts = ['b','c','c']
r=ParserRule(production='s_z', prev_class='b', prev_tokens=None, tokens=('c','c'), next_tokens=None, next_class=None)
print match_rule(r,ts,1,1)
ts = ['v','z','c','c']
r=ParserRule(production='s_z', prev_class='b', prev_tokens=('z'), tokens=('c','c'), next_tokens=None, next_class=None)
print match_rule(r,ts,2,1)
ts = ['a','b','c','d']
r=ParserRule(production='s_z', prev_class=None, prev_tokens=None, tokens=('b','c'), next_tokens=['d'], next_class=None)
print match_rule(r,ts,1,2)
ts = ['a','b','c','d','b']
r=ParserRule(production='s_z', prev_class=None, prev_tokens=None, tokens=('b','c'), next_tokens=['d'], next_class='c')
print match_rule(r,ts,1,2)
trying rule ParserRule(production='s_z', prev_class=None, prev_tokens=('b', 'c'), tokens='z', next_tokens=None, next_class=None) False trying rule ParserRule(production='s_z', prev_class=None, prev_tokens=('b', 'c'), tokens='z', next_tokens=None, next_class=None) True trying rule ParserRule(production='s_z', prev_class='b', prev_tokens=None, tokens=('c', 'c'), next_tokens=None, next_class=None) in prev_class True trying rule ParserRule(production='s_z', prev_class='b', prev_tokens='z', tokens=('c', 'c'), next_tokens=None, next_class=None) in prev_class False trying rule ParserRule(production='s_z', prev_class=None, prev_tokens=None, tokens=('b', 'c'), next_tokens=['d'], next_class=None) True trying rule ParserRule(production='s_z', prev_class=None, prev_tokens=None, tokens=('b', 'c'), next_tokens=['d'], next_class='c') False
def graph_parse_match_first_at(tokens, token_i):
print "trying tokens ",tokens," at token_i",token_i
# level = 0 # tracks how far along we are. gets increased when find a token?
def descend_node(curr_node, level):
print "descending ",curr_node," at level ",level
#edges are sorted by weight
for edge in sorted(list(DG.edges(curr_node, data=True)), key=lambda x:x[2]['weight'] ):
print 'at edge', edge
next_node = edge[1]
print edge[2]
if edge[2].get('rule'): # if the edge has a rule
if match_rule(edge[2]['rule'], tokens, token_i,level):
return "MADE IT!",edge[1],edge[2]
ParserRule = namedtuple('ParserRule', ['production','prev_class','prev_tokens','tokens','next_tokens','next_class'])
return 'found a rule'
else:
print tokens, token_i, level
if token_i+level < len(tokens):
if DG.node[next_node].get('token')==tokens[token_i+level]:
print 'Found successor ',next_node, DG.node[next_node]
return descend_node(next_node, level+1)
# no matches
return descend_node(0,0)
print tkns
print graph_parse_match_first_at(tkns,0)
#
('c', 'v') trying tokens ('c', 'v') at token_i 0 descending 0 at level 0 at edge (0, 96, {'weight': 0}) {'weight': 0} ('c', 'v') 0 0 at edge (0, 1, {'weight': 0}) {'weight': 0} ('c', 'v') 0 0 at edge (0, 98, {'weight': 0}) {'weight': 0} ('c', 'v') 0 0 at edge (0, 72, {'weight': 0}) {'weight': 0} ('c', 'v') 0 0 at edge (0, 77, {'weight': 0}) {'weight': 0} ('c', 'v') 0 0 at edge (0, 14, {'weight': 0}) {'weight': 0} ('c', 'v') 0 0 Found successor 14 {'token': 'c'} descending 14 at level 1 at edge (14, 64, {'weight': 0}) {'weight': 0} ('c', 'v') 0 1 at edge (14, 80, {'weight': 0}) {'weight': 0} ('c', 'v') 0 1 at edge (14, 66, {'weight': 0}) {'weight': 0} ('c', 'v') 0 1 at edge (14, 70, {'weight': 0}) {'weight': 0} ('c', 'v') 0 1 at edge (14, 40, {'weight': 0}) {'weight': 0} ('c', 'v') 0 1 at edge (14, 15, {'weight': 0}) {'weight': 0} ('c', 'v') 0 1 Found successor 15 {'token': 'v'} descending 15 at level 2 at edge (15, 16, {'weight': 0}) {'weight': 0} ('c', 'v') 0 2 at edge (15, 53, {'weight': 0, 'rule': ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z')}) {'weight': 0, 'rule': ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z')} trying rule ParserRule(production='s_z', prev_class=None, prev_tokens=None, tokens=['z'], next_tokens=None, next_class=None) ('MADE IT!', 53, {'weight': 0, 'rule': ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z')})
def graph_parse_match_all_at(tokens, token_i):
print "trying tokens ",tokens," at token_i",token_i
matches = []
# level = 0 # tracks how far along we are. gets increased when find a token?
def match_rule(rule, level):
# level shows how far we have moved forward from token_i
print "trying rule ",rule
# prevtoken_len = len(rule.get('prev_tokens'))
return True
def descend_node(curr_node, level):
print "descending ",curr_node," at level ",level
for edge in sorted(list(DG.edges(curr_node, data=True)), key=lambda x:x[2]['weight'] ):
# print 'at edge', edge, (--
next_node = edge[1]
print 'at edge', edge, ' == > ',DG.node[next_node]
if edge[2].get('rule'):
print "...found rule"
if match_rule(edge[2]['rule'], level):
match = "MADE IT!",edge[1],edge[2]
matches.append(match)
#ParserRule = namedtuple('ParserRule', ['production','prev_class','prev_tokens','tokens','next_tokens','next_class'])
# else:
# continue # move on if rule not matched
else:
if token_i+level < len (tokens):
if DG.node[next_node].get('token')==tokens[token_i+level]:
# print 'Found successor ',next_node, DG.node[next_node]
descend_node(next_node, level+1)
# no matches
descend_node(0,0)
return matches
print DG.successors(0)
print graph_parse_match_all_at(tkns,0)
for n in DG.successors(15):
print DG.node[n]
[96, 1, 98, 72, 77, 14, 89, 58, 91] trying tokens ('c', 'v') at token_i 0 descending 0 at level 0 at edge (0, 96, {'weight': 0}) == > {'token': 'o'} at edge (0, 1, {'weight': 0}) == > {'token': 'b'} at edge (0, 98, {'weight': 0}) == > {'token': 'z'} at edge (0, 72, {'weight': 0}) == > {'token': 'c<h+wb>'} at edge (0, 77, {'weight': 0}) == > {'token': 'c<;xv>'} at edge (0, 14, {'weight': 0}) == > {'token': 'c'} descending 14 at level 1 at edge (14, 64, {'weight': 0}) == > {'token': 'v<ii+z>'} at edge (14, 80, {'weight': 0}) == > {'token': 'z'} at edge (14, 66, {'weight': 0}) == > {'token': 'v<ii+o>'} at edge (14, 70, {'weight': 0}) == > {'token': 'v<aa+z>'} at edge (14, 40, {'weight': 0}) == > {'token': 'v_e_fut'} at edge (14, 15, {'weight': 0}) == > {'token': 'v'} descending 15 at level 2 at edge (15, 16, {'weight': 0}) == > {'token': 'n'} at edge (15, 53, {'weight': 0, 'rule': ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z')}) == > {'found': 's_cv<z>', 'rule': ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z')} ...found rule trying rule ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z') at edge (15, 54, {'weight': 0, 'rule': ParserRule(production='s_cv<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='b')}) == > {'found': 's_cv<b>', 'rule': ParserRule(production='s_cv<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='b')} ...found rule trying rule ParserRule(production='s_cv<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='b') at edge (14, 48, {'weight': 0}) == > {'token': 'c'} at edge (14, 83, {'weight': 0}) == > {'token': 'o'} at edge (14, 22, {'weight': 0}) == > {'token': 'b'} at edge (14, 27, {'weight': 0}) == > {'token': 's'} at edge (14, 61, {'weight': 0}) == > {'token': 'v_o_fut'} at edge (14, 94, {'weight': 1, 'rule': ParserRule(production='s_c', prev_class=None, prev_tokens=None, tokens=['c'], next_tokens=None, next_class=None)}) == > {'found': 's_c', 'rule': ParserRule(production='s_c', prev_class=None, prev_tokens=None, tokens=['c'], next_tokens=None, next_class=None)} ...found rule trying rule ParserRule(production='s_c', prev_class=None, prev_tokens=None, tokens=['c'], next_tokens=None, next_class=None) at edge (0, 89, {'weight': 0}) == > {'token': 'v'} at edge (0, 58, {'weight': 0}) == > {'token': 'v<ii+z>'} at edge (0, 91, {'weight': 0}) == > {'token': 's'} [('MADE IT!', 53, {'weight': 0, 'rule': ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z')}), ('MADE IT!', 54, {'weight': 0, 'rule': ParserRule(production='s_cv<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='b')}), ('MADE IT!', 94, {'weight': 1, 'rule': ParserRule(production='s_c', prev_class=None, prev_tokens=None, tokens=['c'], next_tokens=None, next_class=None)})] {'token': 'n'} {'found': 's_cv<z>', 'rule': ParserRule(production='s_cv<z>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='z')} {'found': 's_cv<b>', 'rule': ParserRule(production='s_cv<b>', prev_class=None, prev_tokens=None, tokens=['c', 'v'], next_tokens=None, next_class='b')}
def draw_parser_graph(g):
pos=nx.spring_layout(g)
import matplotlib.pyplot as plt
plt.figure(figsize=(15,15))
labels = {}
for n,d in g.nodes(data=True):
label = ''
if d.get('token'):
label = d['token']
elif d.get('found'):
label = d['found']
labels[n] = label
colors = {}
for n,d in g.nodes(data=True):
color=''
if d.get('token'):
color = '#A0CBE2'
elif d.get('found'):
color = '#E2CBA0'
colors[n] = color
# labels=dict((n,d['token']) for n,d in g.nodes(data=True)) # need to change labels for 0,1,etc.
nx.draw(g,labels=labels,node_color='#2CBACB',node_size=400)
draw_parser_graph(DG) # T-Shirts?