from difflib import SequenceMatcher
a = 'ik ben die ik ben geweest'
b = 'ik was die ik gister.'
df = SequenceMatcher(isjunk=None, autojunk=False)
df.set_seq1(a)
df.set_seq2(b)
mb = df.get_matching_blocks()
mb
[Match(a=0, b=0, size=3), Match(a=6, b=6, size=8), Match(a=14, b=21, size=3), Match(a=25, b=25, size=0)]
for (ai, bi, n) in mb:
print('{} {} {}'.format(ai, bi, n))
0 0 3 6 6 8 14 21 3 25 25 0
opc = df.get_opcodes()
arep = ''
brep = ''
for (lb, ai, aj, bi, bj) in opc:
if lb == 'equal':
arep += a[ai:aj]
brep += b[bi:bj]
elif lb == 'delete':
arep += '<red>'+a[ai:aj]+'</red>'
elif lb == 'insert':
brep += '<green>'+b[bi:bj]+'</green>'
else:
arep += '<yellow>'+a[ai:aj]+'</yellow>'
brep += '<yellow>'+b[bi:bj]+'</yellow>'
print(arep)
print(brep)
ik <yellow>ben</yellow> die ik <red>ben </red>g<yellow>ewee</yellow>st ik <yellow>was</yellow> die ik g<yellow>i</yellow>st<green>er.</green>
opc
[('equal', 0, 3, 0, 3), ('replace', 3, 6, 3, 6), ('equal', 6, 14, 6, 14), ('delete', 14, 18, 14, 14), ('equal', 18, 19, 14, 15), ('replace', 19, 23, 15, 16), ('equal', 23, 25, 16, 18), ('insert', 25, 25, 18, 21)]
import laf
from laf.fabric import LafFabric
from etcbc.preprocess import prepare
fabric = LafFabric()
0.00s This is LAF-Fabric 4.5.3 API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html Feature doc: http://shebanq-doc.readthedocs.org/en/latest/texts/welcome.html
version = '4b'
API = fabric.load('etcbc{}'.format(version), '--', 'parallel', {
"xmlids": {"node": False, "edge": False},
"features": ('''
otype
''',
''),
"primary": False,
}, verbose='NORMAL')
exec(fabric.localnames.format(var='fabric'))
0.00s LOADING API: please wait ... 0.00s INFO: USING DATA COMPILED AT: 2015-06-29T05-30-49 0.01s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX -- FOR TASK parallel AT 2015-07-18T08-15-10
ns = len(list(F.otype.s('sentence')))
nh = len(list(F.otype.s('half_verse')))
nv = len(list(F.otype.s('verse')))
print('''
Sentences : {:>10} : {:>10}
Half verses: {:>10} : {:>10}
Verses : {:>10} : {:>10}
'''.format(
ns, ns * (ns - 1) / 2,
nh, nh * (nh - 1) / 2,
nv, nv * (nv - 1) / 2,
))
Sentences : 64125 : 2055975750.0 Half verses: 45087 : 1016396241.0 Verses : 23213 : 269410078.0