import sys
import collections
import re
import unicodedata
from IPython.display import clear_output, display, HTML
from laf.fabric import LafFabric
fabric3 = LafFabric()
fabric4 = LafFabric()
from etcbc.lib import Transcription
from etcbc.preprocess import prepare
tr = Transcription()
0.00s This is LAF-Fabric 4.4.1 http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html 0.00s This is LAF-Fabric 4.4.1 http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html
API3 = fabric3.load('etcbc3', '--', 'monads', {
"xmlids": {"node": False, "edge": False},
"features": ('''
otype monads text suffix
graphical_word
verse_label
''',''),
"primary": False,
"prepare": prepare,
})
API4 = fabric4.load('etcbc4', '--', 'monads', {
"xmlids": {"node": False, "edge": False},
"features": ('''
otype monads g_word_utf8 trailer_utf8
g_word
label
''',''),
"primary": False,
"prepare": prepare,
})
0.00s LOADING API: please wait ... 0.01s INFO: USING DATA COMPILED AT: 2014-06-27T12-21-04 2.66s LOGFILE=/Users/dirk/laf-fabric-output/etcbc3/monads/__log__monads.txt 3.18s INFO: DATA LOADED FROM SOURCE etcbc3 AND ANNOX -- FOR TASK monads AT 2014-07-16T09-47-51 0.00s LOADING API: please wait ... 0.00s INFO: USING DATA COMPILED AT: 2014-07-14T16-45-08 3.17s LOGFILE=/Users/dirk/laf-fabric-output/etcbc4/monads/__log__monads.txt 3.88s INFO: DATA LOADED FROM SOURCE etcbc4 AND ANNOX -- FOR TASK monads AT 2014-07-16T09-47-55
splits = {
(3, ' JES 19,18', 21): 2,
(3, ' DAN 01,02', 11): 2,
(3, ' DAN 01,05', 25): 3,
(3, ' DAN 01,15', 2): 2,
(3, ' DAN 01,18', 3): 2,
(3, ' NEH 07,69', 2): 2,
(3, ' ICHR27,12', 15): 0,
}
def monad_passage_index(API, vr, vlabel_name, graphical_name, text_name, trailer_name):
mp_index = collections.OrderedDict()
np_index = collections.OrderedDict()
cur_label = None
cur_wn = None
cur_nodes = None
NN = API['NN']
F = API['F']
msg = API['msg']
chunk = 50000
i = 0
ci = 0
for n in NN():
otype = F.otype.v(n)
if otype == 'verse':
if cur_nodes != None: np_index[cur_label] = tuple(cur_nodes)
cur_label = F.item[vlabel_name].v(n)
cur_nodes = []
cur_wn = 0
elif otype == 'word':
i += 1
ci += 1
if ci == chunk:
ci = 0
msg("{} words".format(i))
m = F.monads.v(n)
cur_nodes.append(n)
translit = F.item[graphical_name].v(n)
text = (F.item[text_name].v(n) + F.item[trailer_name].v(n).replace('\n',''))
if len(text) == 0:
cur_wn += 1
mp_index[(cur_label, cur_wn)] = (n, m, translit, '')
continue
start = 0
while start < len(text):
s_maqef = text.find('\u05BE', start)
s_space = text.find(' ', start)
s_min = min(s_maqef if s_maqef >= 0 else len(text) - 1, s_space if s_space >= 0 else len(text) - 1) + 1
comp = text[start:s_min]
start = s_min
comps = comp
while comps != None:
cur_wn += 1
spos = splits.get((vr, cur_label, cur_wn), None)
if spos != None:
do_comp = comp[0:spos]
comps = comp[spos:]
else:
do_comp = comp
comps = None
mp_index[(cur_label, cur_wn)] = (n, m, translit, do_comp)
if cur_nodes != None: np_index[cur_label] = tuple(cur_nodes)
msg("{} words".format(i))
return (mp_index, np_index)
API3['msg']("Making index for ETCBC3")
(etcbc3, etcbc3n) = monad_passage_index(API3, 3, 'verse_label', 'graphical_word', 'text', 'suffix')
API3['msg']("Done")
API4['msg']("Making index for ETCBC4")
(etcbc4, etcbc4n) = monad_passage_index(API4, 4, 'label', 'g_word', 'g_word_utf8', 'trailer_utf8')
API4['msg']("Done")
17s Making index for ETCBC3 18s 50000 words 19s 100000 words 20s 150000 words 21s 200000 words 22s 250000 words 23s 300000 words 25s 350000 words 26s 400000 words 27s 426499 words 27s Done 23s Making index for ETCBC4 24s 50000 words 25s 100000 words 26s 150000 words 27s 200000 words 28s 250000 words 29s 300000 words 30s 350000 words 31s 400000 words 32s 426555 words 32s Done
show_case_init = '''<html><head><style type="text/css">
.casehd {border-top: 4pt solid black; text-align: center; font-size: 24pt; height: 28pt; font-weight: bold; color: #00B060;}
.heb {padding-right: 12pt; text-align: right; font-family: SBL Hebrew; font-size: 32pt; height: 40pt;}
.heb1 {padding-right: 12pt;}
.heb2 {text-align: right; font-family: SBL Hebrew; font-size: 32pt; line-height: 40pt}
.gw {padding-right: 12pt; text-align: right; font-weight: bold; font-size: 18pt; height: 20pt;}
.hdiv {background-color: yellow;}
body {margin-left: 2em; margin-right: 2em; margin-top: 2em; margin-bottom: 2em;}
td {padding: 2pt;}
th {padding: 6pt;}
</style></head><body>
'''
show_case_table_init = '''<table rules="all" border="all">
'''
show_case_table_final = '''</table>
'''
show_case_final = '''</body></html>
'''
heb_css = '''
text-align: right;
font-family: SBL Hebrew;
font-size: 32pt;
height: 40pt;}
'''
def hebuni(heb):
display(HTML('''<p style="{}"> {}</p>'''.format(heb_css, heb)))
def check_dagesh_accent_vowel_pos(c, a):
d = '\u05BC'
row_tpl = '<tr>' + ('<td class="heb"> {}</td><td class="heb hdiv"> {}</td>' * 6) + '</tr>'
vowels = [chr(n) for n in range(0x5B0, 0x5BC)]
def perms(v): return (
c + d + v + a,
c + d + a + v,
c + a + d + v,
c + v + d + a,
c + v + a + d,
c + a + v + d,
)
def row(v):
r = []
for x in perms(v):
r.append(x)
r.append(unicodedata.normalize('NFKC', x))
return tuple(r)
rows = [row_tpl.format(*row(v)) for v in vowels]
t = show_case_init + show_case_table_init + '\n'.join(rows) + show_case_table_final + show_case_final
display(HTML(t))
def check_accent_vowel_pos(c, a):
row_tpl = '<tr>' + ('<td class="heb"> {}</td><td class="heb hdiv"> {}</td>' * 2) + '</tr>'
vowels = [chr(n) for n in range(0x5B0, 0x5BC)]
def perms(v): return (
c + v + a,
c + a + v,
)
def row(v):
r = []
for x in perms(v):
r.append(x)
r.append(unicodedata.normalize('NFKC', x))
return tuple(r)
rows = [row_tpl.format(*row(v)) for v in vowels]
t = show_case_init + show_case_table_init + '\n'.join(rows) + show_case_table_final + show_case_final
display(HTML(t))
def show_range(passage, wnums):
gw = ''
heb = ''
ms = []
ns = []
good = True
for wnum in wnums:
if (passage, wnum) not in etcbc4:
print("No word {} in passage {} in etcbc4".format(wnum, passage))
good = False
break
info = etcbc4[(passage, wnum)]
ms.append(info[0])
ns.append(info[1])
gw += info[2]
heb += info[3]
if not good: return 'ERROR'
lus = show_heb(heb)
wnumsrep = ', '.join(str(wnum) for wnum in wnums)
msrep = ', '.join(str(x) for x in ms)
nsrep = ', '.join(str(x) for x in ns)
t = ''
t += '<tr><th colspan="3" class="casehd"><a name="case.{}.{}">{} words {}</a></th></tr>\n'.format(
passage, wnumsrep, passage, wnumsrep,
)
t += '<tr><th colspan="3">ETCBC4</th></tr>\n'
t += '<tr><td colspan="3"><p>monad {}, node {}</p></td></tr>\n'.format(msrep, nsrep)
t += '<tr><td colspan="3" class="heb"><p>{}</p></td></tr>\n'.format(heb)
t += '<tr><td colspan="3" class="gw"><p>{}</p></td></tr>\n'.format(
gw.replace('&', '&').replace('<', '<').replace('>', '>'),
)
for lu in lus:
t += '<tr>{}</tr>\n'.format(lu)
ht = '{}{}{}{}{}'.format(
show_case_init,
show_case_table_init,
t,
show_case_table_final,
show_case_final,
)
display(HTML(ht))
oht = API4['outfile']('{}-{}.html'.format(passage.strip(), wnumsrep))
oht.write(ht)
oht.close()
def show_heb(heb):
return ['<td>{}</td><td>{:04X}</td><td>{}</td>'.format(
tr.hebrew_mappingi[c] if c in tr.hebrew_mappingi else c,
ord(c),
unicodedata.name(c).replace('HEBREW ',''),
) for c in Transcription._decomp(unicodedata.normalize('NFKD', heb))]
def show_hstring(heb):
t = '''<p class="heb2">{}</p><p>{}</p>'''.format(
heb,
'<br/>'.join('{:04X}={}'.format(ord(c), unicodedata.name(c).replace('HEBREW ','')) for c in heb),
)
display(HTML(t))
def _show_case(passage, wnum):
g_n = {}
g_m = {}
g_w = {}
g_h = {}
l_u = {}
good = True
for v in (('3', etcbc3), ('4', etcbc4)):
(vn, vindex) = v
if (passage, wnum) not in vindex:
print("No word {} in passage {} in etcbc{}".format(wnum, passage, vn))
good = False
continue
(g_n[vn], g_m[vn], g_w[vn], g_h[vn]) = vindex[(passage, wnum)]
l_u[vn] = show_heb(g_h[vn])
if not good: return 'ERROR'
lx = max(len(l_u['3']), len(l_u['4']))
ln = min(len(l_u['3']), len(l_u['4']))
t = ''
t += '<tr><th colspan="6" class="casehd"><a name="case.{}.{}">{} word {}</a></th></tr>\n'.format(passage, wnum, passage, wnum)
t += '<tr><th colspan="3">ETCBC3</th><th colspan="3">ETCBC4</th></tr>\n'
t += '<tr><td colspan="3"><p>monad {}, node {}</p></td><td colspan="3"><p>monad {}, node {}</p></td></tr>\n'.format(
g_m['3'], g_n['3'], g_m['4'], g_n['4'],
)
t += '<tr><td colspan="3" class="heb"><p>{}</p></td><td colspan="3" class="heb"><p>{}</p></td></tr>\n'.format(
g_h['3'], g_h['4'],
)
t += '<tr><td colspan="3" class="gw"><p>{}</p></td><td colspan="3" class="gw"><p>{}</p></td></tr>\n'.format(
g_w['3'].replace('&', '&').replace('<', '<').replace('>', '>'),
g_w['4'].replace('&', '&').replace('<', '<').replace('>', '>'),
)
for r in range(lx):
t += '<tr{}>{}{}</tr>\n'.format(' class="hdiv"' if r >= ln or l_u['3'][r] != l_u['4'][r] else '',
l_u['3'][r] if r < len(l_u['3']) else '<td colspan="3"> </td>',
l_u['4'][r] if r < len(l_u['4']) else '<td colspan="3"> </td>',
)
return t
def show_case(passage, wnum):
t = '{}{}{}{}{}'.format(
show_case_init,
show_case_table_init,
_show_case(passage, wnum),
show_case_table_final,
show_case_final)
display(HTML(t))
def write_cases(elist, oh):
oh.write(show_case_table_init)
for e in elist: oh.write(_show_case(*e))
oh.write(show_case_table_final)
check_dagesh_accent_vowel_pos('\u05DE', '\u0596')
check_accent_vowel_pos('\u05DE', '\u0596')
מְּ֖ | מְּ֖ | מְּ֖ | מְּ֖ | מְּ֖ | מְּ֖ | מְּ֖ | מְּ֖ | מְּ֖ | מְּ֖ | מְּ֖ | מְּ֖ |
מֱּ֖ | מֱּ֖ | מֱּ֖ | מֱּ֖ | מֱּ֖ | מֱּ֖ | מֱּ֖ | מֱּ֖ | מֱּ֖ | מֱּ֖ | מֱּ֖ | מֱּ֖ |
מֲּ֖ | מֲּ֖ | מֲּ֖ | מֲּ֖ | מֲּ֖ | מֲּ֖ | מֲּ֖ | מֲּ֖ | מֲּ֖ | מֲּ֖ | מֲּ֖ | מֲּ֖ |
מֳּ֖ | מֳּ֖ | מֳּ֖ | מֳּ֖ | מֳּ֖ | מֳּ֖ | מֳּ֖ | מֳּ֖ | מֳּ֖ | מֳּ֖ | מֳּ֖ | מֳּ֖ |
מִּ֖ | מִּ֖ | מִּ֖ | מִּ֖ | מִּ֖ | מִּ֖ | מִּ֖ | מִּ֖ | מִּ֖ | מִּ֖ | מִּ֖ | מִּ֖ |
מֵּ֖ | מֵּ֖ | מֵּ֖ | מֵּ֖ | מֵּ֖ | מֵּ֖ | מֵּ֖ | מֵּ֖ | מֵּ֖ | מֵּ֖ | מֵּ֖ | מֵּ֖ |
מֶּ֖ | מֶּ֖ | מֶּ֖ | מֶּ֖ | מֶּ֖ | מֶּ֖ | מֶּ֖ | מֶּ֖ | מֶּ֖ | מֶּ֖ | מֶּ֖ | מֶּ֖ |
מַּ֖ | מַּ֖ | מַּ֖ | מַּ֖ | מַּ֖ | מַּ֖ | מַּ֖ | מַּ֖ | מַּ֖ | מַּ֖ | מַּ֖ | מַּ֖ |
מָּ֖ | מָּ֖ | מָּ֖ | מָּ֖ | מָּ֖ | מָּ֖ | מָּ֖ | מָּ֖ | מָּ֖ | מָּ֖ | מָּ֖ | מָּ֖ |
מֹּ֖ | מֹּ֖ | מֹּ֖ | מֹּ֖ | מֹּ֖ | מֹּ֖ | מֹּ֖ | מֹּ֖ | מֹּ֖ | מֹּ֖ | מֹּ֖ | מֹּ֖ |
מֺּ֖ | מֺּ֖ | מֺּ֖ | מֺּ֖ | מֺּ֖ | מֺּ֖ | מֺּ֖ | מֺּ֖ | מֺּ֖ | מֺּ֖ | מֺּ֖ | מֺּ֖ |
מֻּ֖ | מֻּ֖ | מֻּ֖ | מֻּ֖ | מֻּ֖ | מֻּ֖ | מֻּ֖ | מֻּ֖ | מֻּ֖ | מֻּ֖ | מֻּ֖ | מֻּ֖ |
מְ֖ | מְ֖ | מְ֖ | מְ֖ |
מֱ֖ | מֱ֖ | מֱ֖ | מֱ֖ |
מֲ֖ | מֲ֖ | מֲ֖ | מֲ֖ |
מֳ֖ | מֳ֖ | מֳ֖ | מֳ֖ |
מִ֖ | מִ֖ | מִ֖ | מִ֖ |
מֵ֖ | מֵ֖ | מֵ֖ | מֵ֖ |
מֶ֖ | מֶ֖ | מֶ֖ | מֶ֖ |
מַ֖ | מַ֖ | מַ֖ | מַ֖ |
מָ֖ | מָ֖ | מָ֖ | מָ֖ |
מֹ֖ | מֹ֖ | מֹ֖ | מֹ֖ |
מֺ֖ | מֺ֖ | מֺ֖ | מֺ֖ |
מֻ֖ | מֻ֖ | מֻ֖ | מֻ֖ |
hebuni('\u05C6\u0307')
׆̇
F = API4['F']
show_hstring('אַ֚ף')
show_hstring(F.g_word_utf8.v(1186))
show_hstring('ה֠וּא')
n = 76786
show_hstring(F.g_word_utf8.v(n) + F.trailer_utf8.v(n).replace('\n', ''))
אַ֚ף
05D0=LETTER ALEF
05B7=POINT PATAH
059A=ACCENT YETIV
05E3=LETTER FINAL PE
אַ֚ף
05D0=LETTER ALEF
05B7=POINT PATAH
059A=ACCENT YETIV
05E3=LETTER FINAL PE
ה֠וּא
05D4=LETTER HE
05A0=ACCENT TELISHA GEDOLA
05D5=LETTER VAV
05BC=POINT DAGESH OR MAPIQ
05D0=LETTER ALEF
יִשְׂרָאֵֽל׃ ׆̇ פ
05D9=LETTER YOD
05B4=POINT HIRIQ
FB2B=LETTER SHIN WITH SIN DOT
05B0=POINT SHEVA
05E8=LETTER RESH
05B8=POINT QAMATS
05D0=LETTER ALEF
05B5=POINT TSERE
05BD=POINT METEG
05DC=LETTER LAMED
05C3=PUNCTUATION SOF PASUQ
0020=SPACE
05C6=PUNCTUATION NUN HAFUKHA
0307=COMBINING DOT ABOVE
0020=SPACE
05E4=LETTER PE
show_case(' JES 19,18', 23)
JES 19,18 word 23 | |||||
---|---|---|---|---|---|
ETCBC3 | ETCBC4 | ||||
monad 219372, node 218383 | monad 218425, node 218424 | ||||
יֵאָמֵ֖ר | יֵאָמֵ֖ר | ||||
J;>@M;73R | J;>@M;73R | ||||
J | 05D9 | LETTER YOD | J | 05D9 | LETTER YOD |
; | 05B5 | POINT TSERE | ; | 05B5 | POINT TSERE |
> | 05D0 | LETTER ALEF | > | 05D0 | LETTER ALEF |
@ | 05B8 | POINT QAMATS | @ | 05B8 | POINT QAMATS |
M | 05DE | LETTER MEM | M | 05DE | LETTER MEM |
; | 05B5 | POINT TSERE | ; | 05B5 | POINT TSERE |
73 | 0596 | ACCENT TIPEHA | 73 | 0596 | ACCENT TIPEHA |
R | 05E8 | LETTER RESH | R | 05E8 | LETTER RESH |
_ | 0020 | SPACE | _ | 0020 | SPACE |
show_range(' GEN 01,03', range(6,9))
GEN 01,03 words 6, 7, 8 | ||
---|---|---|
ETCBC4 | ||
monad 36, 37, 38, node 37, 38, 39 | ||
וַֽיְהִי־אֹֽור׃ | ||
WA75-J:HIJ&>O75WR00 | ||
W | 05D5 | LETTER VAV |
A | 05B7 | POINT PATAH |
35 | 05BD | POINT METEG |
J | 05D9 | LETTER YOD |
: | 05B0 | POINT SHEVA |
H | 05D4 | LETTER HE |
I | 05B4 | POINT HIRIQ |
J | 05D9 | LETTER YOD |
& | 05BE | PUNCTUATION MAQAF |
> | 05D0 | LETTER ALEF |
O | 05B9 | POINT HOLAM |
35 | 05BD | POINT METEG |
W | 05D5 | LETTER VAV |
R | 05E8 | LETTER RESH |
00 | 05C3 | PUNCTUATION SOF PASUQ |
for i in range(6,9):
show_case(' GEN 01,03', i)
GEN 01,03 word 6 | |||||
---|---|---|---|---|---|
ETCBC3 | ETCBC4 | ||||
monad 37, node 36 | monad 37, node 36 | ||||
וַֽ | וַֽ | ||||
WA75- | WA75- | ||||
W | 05D5 | LETTER VAV | W | 05D5 | LETTER VAV |
A | 05B7 | POINT PATAH | A | 05B7 | POINT PATAH |
35 | 05BD | POINT METEG | 35 | 05BD | POINT METEG |
GEN 01,03 word 7 | |||||
---|---|---|---|---|---|
ETCBC3 | ETCBC4 | ||||
monad 38, node 37 | monad 38, node 37 | ||||
יְהִי־ | יְהִי־ | ||||
J:HIJ& | J:HIJ& | ||||
J | 05D9 | LETTER YOD | J | 05D9 | LETTER YOD |
: | 05B0 | POINT SHEVA | : | 05B0 | POINT SHEVA |
H | 05D4 | LETTER HE | H | 05D4 | LETTER HE |
I | 05B4 | POINT HIRIQ | I | 05B4 | POINT HIRIQ |
J | 05D9 | LETTER YOD | J | 05D9 | LETTER YOD |
& | 05BE | PUNCTUATION MAQAF | & | 05BE | PUNCTUATION MAQAF |
GEN 01,03 word 8 | |||||
---|---|---|---|---|---|
ETCBC3 | ETCBC4 | ||||
monad 39, node 38 | monad 39, node 38 | ||||
אֹֽור׃ | אֹֽור׃ | ||||
>O75WR00 | >O75WR00 | ||||
> | 05D0 | LETTER ALEF | > | 05D0 | LETTER ALEF |
O | 05B9 | POINT HOLAM | O | 05B9 | POINT HOLAM |
35 | 05BD | POINT METEG | 35 | 05BD | POINT METEG |
W | 05D5 | LETTER VAV | W | 05D5 | LETTER VAV |
R | 05E8 | LETTER RESH | R | 05E8 | LETTER RESH |
00 | 05C3 | PUNCTUATION SOF PASUQ | 00 | 05C3 | PUNCTUATION SOF PASUQ |
missing_in_3 = collections.OrderedDict()
missing_in_4 = collections.OrderedDict()
different = collections.OrderedDict()
codes = collections.OrderedDict((
('q', 'qetiv/qere'),
('m', 'merecha -silluq-tifcha-mehuppach-nothing'),
('n', 'nun hafukha'),
('d', 'dagesh'),
(None, 'NOT ANALYSED'),
))
for e in etcbc3:
if e not in etcbc4: missing_in_4[e] = None
for e in etcbc4:
if e not in etcbc3: missing_in_3[e] = None
for e in etcbc4:
if e in etcbc3:
un3 = Transcription._decomp(unicodedata.normalize('NFKD', etcbc3[e][3]))
un4 = Transcription._decomp(unicodedata.normalize('NFKD', etcbc4[e][3]))
if un3 != un4:
code = None
if '\u05AF' in un4:
code = 'q'
elif un3.replace('\u05A5', '').replace('\u05BD', '').replace('\u0596', '').replace('\u05A4', '').replace('\u05BE', '') == un4.replace('\u05A5', '').replace('\u05BD', '').replace('\u0596', '').replace('\u05A4', '').replace('\u05BE', ''):
code = 'm'
elif '\u05C6' in un3 + un4:
code = 'n'
elif un3.replace('\u05BC', '') == un4.replace('\u05BC', ''):
code = 'd'
different[e] = code
print("Missing in etcbc3: {} words".format(len(missing_in_3)))
print("Missing in etcbc4: {} words".format(len(missing_in_4)))
print("Different in etcbc3 and etcbc4: {} words".format(len(different)))
for code in codes:
print("{}x : {}".format(len([e for e in different if different[e] == code]), codes[code]))
Missing in etcbc3: 2 words Missing in etcbc4: 1 words Different in etcbc3 and etcbc4: 2020 words 1892x : qetiv/qere 52x : merecha -silluq-tifcha-mehuppach-nothing 9x : nun hafukha 11x : dagesh 56x : NOT ANALYSED
ohtest = API4['outfile']('test.html')
testcases = [(' EXO 32,17', i) for i in range(1,11)]
write_cases(testcases, ohtest)
ohtest.close()
print(missing_in_4)
OrderedDict([((' EZE 18,14', 19), None)])
ohf = API4['outfile']('comp.txt')
ohe = API4['outfile']('cases.html')
toc = ["<h1>Table of Contents</h1>\n"]
for code in reversed(codes):
if code == 'q': continue
desc = codes[code]
toc.append('<p><a href="#cases.{}"><b>Cases of {}</b></a></p>\n'.format(code, desc))
cases = list(e for e in different if different[e] == code)
for (p,w) in cases:
toc.append('<a href="#case.{}.{}">{}w{}</a>\n'.format(p, w, p.replace(' ',''), w))
ohe.write(show_case_init)
ohe.write(''.join(toc))
for code in reversed(codes):
if code == 'q': continue
desc = codes[code]
cases = list(e for e in different if different[e] == code)
ohe.write('<h1><a name="cases.{}">Cases of {}</a></h1>\n'.format(code, desc))
write_cases(cases, ohe)
ohf.write('# {}\n'.format(desc))
ohf.write('\n'.join('{}\t{}\t{}'.format(e[0], e[1], 'x') for e in cases))
ohe.write(show_case_final)
ohf.close()
ohe.close()
def show_swap(p, wn, ws):
F3 = API3['F']
F4 = API4['F']
v3n = etcbc3n[p]
v4n = etcbc4n[p]
v3text = ''.join('{}{}'.format(F3.text.v(n), F3.suffix.v(n)) for n in v3n)
v4text = ''.join('{}{}'.format(F4.g_word_utf8.v(n), F4.trailer_utf8.v(n)) for n in v4n)
d_tpl = '''
<tr>
<td>{} word {} m={} n={}</td>
<td class="gw">{}</td>
<td class="heb"> {}</td>
<td class="gw">{}</td>
<td class="heb"> {}</td>
<td class="heb"> {}</td>
</tr>'''
v_tpl = '<tr><td>{}</td><td class="heb1" colspan="5"><p class="heb2"> {}</p></td></tr>'
t = '{}{}{}{}{}'.format(
show_case_init,
show_case_table_init,
d_tpl.format(p, wn, *ws),
v_tpl.format('ETCBC3', v3text),
v_tpl.format('ETCBC4', v4text),
show_case_table_final,
show_case_final,
)
display(HTML(t))
NN = API4['NN']
F = API4['F']
msg = API4['msg']
def find_swaps():
diffs = collections.OrderedDict()
cur_label = None
cur_w = 0
for n in NN():
otype = F.otype.v(n)
if otype == 'verse':
cur_label = F.label.v(n)
cur_w = 0
continue
elif otype == 'word':
cur_w += 1
translit = F.g_word.v(n)
translit_sw = Transcription.swap_accent_pat.sub(Transcription._swap_accent, translit)
if translit != translit_sw:
diffs[(cur_label, cur_w)] = (
F.monads.v(n),
n,
translit,
Transcription.to_hebrew_x(translit),
translit_sw,
Transcription.to_hebrew_x(translit_sw),
F.g_word_utf8.v(n),
)
print(len(diffs))
return diffs
swaps = find_swaps()
7916
oh = API4['outfile']('swaps.txt')
i = 0
limit = 10
for (p, wn) in swaps:
i += 1
if limit != None and i > limit: break
show_swap(p, wn, swaps[(p, wn)])
info = swaps[(p, wn)]
oh.write("={}\n#{}\n\n".format(info[0], info[2]))
oh.close()
GEN 01,11 word 8 m=186 n=185 | 10<;FEB | ֚עֵשֶׂב | <;10FEB | עֵ֚שֶׂב | עֵ֚שֶׂב |
ETCBC3 | וַיֹּ֣אמֶר אֱלֹהִ֗ים תַּֽדְשֵׁ֤א הָאָ֨רֶץ֙ דֶּ֔שֶׁא עֵ֚שֶׂב מַזְרִ֣יעַ זֶ֔רַע עֵ֣ץ פְּרִ֞י עֹ֤שֶׂה פְּרִי֙ לְמִינֹ֔ו אֲשֶׁ֥ר זַרְעֹו־בֹ֖ו עַל־הָאָ֑רֶץ וַֽיְהִי־כֵֽן׃ | ||||
ETCBC4 | וַיֹּ֣אמֶר אֱלֹהִ֗ים תַּֽדְשֵׁ֤א הָאָ֨רֶץ֙ דֶּ֔שֶׁא עֵ֚שֶׂב מַזְרִ֣יעַ זֶ֔רַע עֵ֣ץ פְּרִ֞י עֹ֤שֶׂה פְּרִי֙ לְמִינֹ֔ו אֲשֶׁ֥ר זַרְעֹו־בֹ֖ו עַל־הָאָ֑רֶץ וַֽיְהִי־כֵֽן׃ |
GEN 01,12 word 5 m=208 n=207 | 14D.ECE> | ֠דֶּשֶׁא | D.E14CE> | דֶּ֠שֶׁא | דֶּ֠שֶׁא |
ETCBC3 | וַתֹּוצֵ֨א הָאָ֜רֶץ דֶּ֠שֶׁא עֵ֣שֶׂב מַזְרִ֤יעַ זֶ֨רַע֙ לְמִינֵ֔הוּ וְעֵ֧ץ עֹֽשֶׂה־פְּרִ֛י אֲשֶׁ֥ר זַרְעֹו־בֹ֖ו לְמִינֵ֑הוּ וַיַּ֥רְא אֱלֹהִ֖ים כִּי־טֹֽוב׃ | ||||
ETCBC4 | וַתֹּוצֵ֨א הָאָ֜רֶץ דֶּ֠שֶׁא עֵ֣שֶׂב מַזְרִ֤יעַ זֶ֨רַע֙ לְמִינֵ֔הוּ וְעֵ֧ץ עֹ֥שֶׂה פְּרִ֛י אֲשֶׁ֥ר זַרְעֹו־בֹ֖ו לְמִינֵ֑הוּ וַיַּ֥רְא אֱלֹהִ֖ים כִּי־טֹֽוב׃ |
GEN 01,30 word 5 m=626 n=625 | 14H@- | ֠הָ- | H@14- | הָ֠- | הָ֠ |
ETCBC3 | וּֽלְכָל־חַיַּ֣ת הָ֠אָרֶץ וּלְכָל־עֹ֨וף הַשָּׁמַ֜יִם וּלְכֹ֣ל ׀ רֹומֵ֣שׂ עַל־הָאָ֗רֶץ אֲשֶׁר־בֹּו֙ נֶ֣פֶשׁ חַיָּ֔ה אֶת־כָּל־יֶ֥רֶק עֵ֖שֶׂב לְאָכְלָ֑ה וַֽיְהִי־כֵֽן׃ | ||||
ETCBC4 | וּֽלְכָל־חַיַּ֣ת הָ֠אָרֶץ וּלְכָל־עֹ֨וף הַשָּׁמַ֜יִם וּלְכֹ֣ל ׀ רֹומֵ֣שׂ עַל־הָאָ֗רֶץ אֲשֶׁר־בֹּו֙ נֶ֣פֶשׁ חַיָּ֔ה אֶת־כָּל־יֶ֥רֶק עֵ֖שֶׂב לְאָכְלָ֑ה וַֽיְהִי־כֵֽן׃ |
GEN 02,05 word 6 m=750 n=749 | 10VEREM | ֚טֶרֶמ | VE10REM | טֶ֚רֶמ | טֶ֚רֶם |
ETCBC3 | וְכֹ֣ל ׀ שִׂ֣יחַ הַשָּׂדֶ֗ה טֶ֚רֶם יִֽהְיֶ֣ה בָאָ֔רֶץ וְכָל־עֵ֥שֶׂב הַשָּׂדֶ֖ה טֶ֣רֶם יִצְמָ֑ח כִּי֩ לֹ֨א הִמְטִ֜יר יְהוָ֤ה אֱלֹהִים֙ עַל־הָאָ֔רֶץ וְאָדָ֣ם אַ֔יִן לַֽעֲבֹ֖ד אֶת־הָֽאֲדָמָֽה׃ | ||||
ETCBC4 | וְכֹ֣ל ׀ שִׂ֣יחַ הַשָּׂדֶ֗ה טֶ֚רֶם יִֽהְיֶ֣ה בָאָ֔רֶץ וְכָל־עֵ֥שֶׂב הַשָּׂדֶ֖ה טֶ֣רֶם יִצְמָ֑ח כִּי֩ לֹ֨א הִמְטִ֜יר יְהוָ֤ה אֱלֹהִים֙ עַל־הָאָ֔רֶץ וְאָדָ֣ם אַ֔יִן לַֽעֲבֹ֖ד אֶת־הָֽאֲדָמָֽה׃ |
GEN 02,11 word 8 m=889 n=888 | 10>;T | ֚אֵת | >;10T | אֵ֚ת | אֵ֚ת |
ETCBC3 | שֵׁ֥ם הָֽאֶחָ֖ד פִּישֹׁ֑ון ה֣וּא הַסֹּבֵ֗ב אֵ֚ת כָּל־אֶ֣רֶץ הַֽחֲוִילָ֔ה אֲשֶׁר־שָׁ֖ם הַזָּהָֽב׃ | ||||
ETCBC4 | שֵׁ֥ם הָֽאֶחָ֖ד פִּישֹׁ֑ון ה֣וּא הַסֹּבֵ֗ב אֵ֚ת כָּל־אֶ֣רֶץ הַֽחֲוִילָ֔ה אֲשֶׁר־שָׁ֖ם הַזָּהָֽב׃ |
GEN 02,23 word 8 m=1123 n=1122 | 10 ֚עֶצֶמ |
עֶ֚צֶמ |
עֶ֚צֶם |
| |
ETCBC3 | וַיֹּאמֶר֮ הָֽאָדָם֒ זֹ֣את הַפַּ֗עַם עֶ֚צֶם מֵֽעֲצָמַ֔י וּבָשָׂ֖ר מִבְּשָׂרִ֑י לְזֹאת֙ יִקָּרֵ֣א אִשָּׁ֔ה כִּ֥י מֵאִ֖ישׁ לֻֽקֳחָה־זֹּֽאת׃ | ||||
ETCBC4 | וַיֹּאמֶר֮ הָֽאָדָם֒ זֹ֣את הַפַּ֗עַם עֶ֚צֶם מֵֽעֲצָמַ֔י וּבָשָׂ֖ר מִבְּשָׂרִ֑י לְזֹאת֙ יִקָּרֵ֣א אִשָּׁ֔ה כִּ֥י מֵאִ֖ישׁ לֻֽקֳחָה־זֹּֽאת׃ |
GEN 03,01 word 20 m=1187 n=1186 | 10>AP | ֚אַפ | >A10P | אַ֚פ | אַ֚ף |
ETCBC3 | וְהַנָּחָשׁ֙ הָיָ֣ה עָר֔וּם מִכֹּל֙ חַיַּ֣ת הַשָּׂדֶ֔ה אֲשֶׁ֥ר עָשָׂ֖ה יְהוָ֣ה אֱלֹהִ֑ים וַיֹּ֨אמֶר֙ אֶל־הָ֣אִשָּׁ֔ה אַ֚ף כִּֽי־אָמַ֣ר אֱלֹהִ֔ים לֹ֣א תֹֽאכְל֔וּ מִכֹּ֖ל עֵ֥ץ הַגָּֽן׃ | ||||
ETCBC4 | וְהַנָּחָשׁ֙ הָיָ֣ה עָר֔וּם מִכֹּל֙ חַיַּ֣ת הַשָּׂדֶ֔ה אֲשֶׁ֥ר עָשָׂ֖ה יְהוָ֣ה אֱלֹהִ֑ים וַיֹּ֨אמֶר֙ אֶל־הָ֣אִשָּׁ֔ה אַ֚ף כִּֽי־אָמַ֣ר אֱלֹהִ֔ים לֹ֣א תֹֽאכְל֔וּ מִכֹּ֖ל עֵ֥ץ הַגָּֽן׃ |
GEN 03,05 word 1 m=1242 n=1241 | 10K.IJ | ֚כִּי | K.I10J | כִּ֚י | כִּ֚י |
ETCBC3 | כִּ֚י יֹדֵ֣עַ אֱלֹהִ֔ים כִּ֗י בְּיֹום֙ אֲכָלְכֶ֣ם מִמֶּ֔נּוּ וְנִפְקְח֖וּ עֵֽינֵיכֶ֑ם וִהְיִיתֶם֙ כֵּֽאלֹהִ֔ים יֹדְעֵ֖י טֹ֥וב וָרָֽע׃ | ||||
ETCBC4 | כִּ֚י יֹדֵ֣עַ אֱלֹהִ֔ים כִּ֗י בְּיֹום֙ אֲכָלְכֶ֣ם מִמֶּ֔נּוּ וְנִפְקְח֖וּ עֵֽינֵיכֶ֑ם וִהְיִיתֶם֙ כֵּֽאלֹהִ֔ים יֹדְעֵ֖י טֹ֥וב וָרָֽע׃ |
GEN 03,11 word 3 m=1372 n=1371 | 10MIJ | ֚מִי | MI10J | מִ֚י | מִ֚י |
ETCBC3 | וַיֹּ֕אמֶר מִ֚י הִגִּ֣יד לְךָ֔ כִּ֥י עֵירֹ֖ם אָ֑תָּה הֲמִן־הָעֵ֗ץ אֲשֶׁ֧ר צִוִּיתִ֛יךָ לְבִלְתִּ֥י אֲכָל־מִמֶּ֖נּוּ אָכָֽלְתָּ׃ | ||||
ETCBC4 | וַיֹּ֕אמֶר מִ֚י הִגִּ֣יד לְךָ֔ כִּ֥י עֵירֹ֖ם אָ֑תָּה הֲמִן־הָעֵ֗ץ אֲשֶׁ֧ר צִוִּיתִ֛יךָ לְבִלְתִּ֥י אֲכָל־מִמֶּ֖נּוּ אָכָֽלְתָּ׃ |
GEN 03,15 word 15 m=1470 n=1469 | 10HW.> | ֚הוּא | H10W.> | ה֚וּא | ה֚וּא |
ETCBC3 | וְאֵיבָ֣ה ׀ אָשִׁ֗ית בֵּֽינְךָ֙ וּבֵ֣ין הָֽאִשָּׁ֔ה וּבֵ֥ין זַרְעֲךָ֖ וּבֵ֣ין זַרְעָ֑הּ ה֚וּא יְשׁוּפְךָ֣ רֹ֔אשׁ וְאַתָּ֖ה תְּשׁוּפֶ֥נּוּ עָקֵֽב׃ ס | ||||
ETCBC4 | וְאֵיבָ֣ה ׀ אָשִׁ֗ית בֵּֽינְךָ֙ וּבֵ֣ין הָֽאִשָּׁ֔ה וּבֵ֥ין זַרְעֲךָ֖ וּבֵ֣ין זַרְעָ֑הּ ה֚וּא יְשׁוּפְךָ֣ רֹ֔אשׁ וְאַתָּ֖ה תְּשׁוּפֶ֥נּוּ עָקֵֽב׃ ס |