accented+vocalized, vocalized, consonantal
import sys,os,re
import collections
from IPython.display import HTML, display_pretty, display_html
import laf
from laf.fabric import LafFabric
from etcbc.lib import Transcription
from etcbc.preprocess import prep
fabric = LafFabric()
source = 'etcbc'
versions = ('4b', '4c')
0.00s This is LAF-Fabric 4.8.3 API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html Feature doc: https://shebanq.ancient-data.org/static/docs/featuredoc/texts/welcome.html
FF = {}
MSG = {}
LL = {}
for version in ('4b', '4c'):
API = fabric.load(source+version, '--', 'passage', {
"xmlids": {"node": False, "edge": False},
"features": ('''
otype
g_cons g_word g_cons_utf8 g_word_utf8 g_word trailer_utf8
book chapter verse label
''',''),
"prepare": prep(select={'L'}),
"primary": False,
}, verbose='NORMAL')
FF[version] = API['F']
MSG[version] = API['msg']
LL[version] = API['L']
0.00s LOADING API: please wait ... 0.00s USING main: etcbc4b DATA COMPILED AT: 2015-11-02T15-08-56 2.28s LOGFILE=/Users/dirk/laf/laf-fabric-output/etcbc4b/passage/__log__passage.txt 2.29s INFO: LOADING PREPARED data: please wait ... 2.29s prep prep: G.node_sort 2.34s prep prep: G.node_sort_inv 2.80s prep prep: L.node_up 5.37s prep prep: L.node_down 10s prep prep: V.verses 10s prep prep: V.books_la 10s ETCBC reference: http://laf-fabric.readthedocs.org/en/latest/texts/ETCBC-reference.html 11s INFO: LOADED PREPARED data 11s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX FOR TASK passage AT 2016-11-09T19-10-06 0.00s LOADING API: please wait ... 0.00s BEGIN COMPILE m: etcbc4c 0.00s LOGFILE=/Users/dirk/laf/laf-fabric-data/etcbc4c/bin/__log__compile__.txt 0.00s PARSING ANNOTATION FILES 0.08s INFO: parsing etcbc4c_regions.xml 5.37s INFO: parsing etcbc4c_monads.xml 28s INFO: parsing etcbc4c_lingo.xml 1m 22s INFO: parsing etcbc4c_sections.xml 1m 28s INFO: parsing etcbc4c_monads.lex.xml 3m 55s INFO: parsing etcbc4c_lingo.c.xml 4m 15s INFO: parsing etcbc4c_lingo.p.xml 4m 34s INFO: parsing etcbc4c_lingo.pa.xml 4m 59s INFO: parsing etcbc4c_lingo.s.xml 5m 02s INFO: parsing etcbc4c_lingo.sp.xml 5m 06s INFO: END PARSING 800724 good regions and 0 faulty ones 1436894 linked nodes and 0 unlinked ones 2225333 good edges and 0 faulty ones 5029799 good annots and 0 faulty ones 34171309 good features and 0 faulty ones 9492750 distinct xml identifiers 5m 06s MODELING RESULT FILES 5m 06s INFO: XML-IDS (inverse mapping) 5m 09s INFO: NODES AND REGIONS 5m 09s INFO: NODES ANCHOR BOUNDARIES 5m 29s INFO: NODES SORTING BY REGIONS 5m 31s INFO: NODES EVENTS 5m 53s INFO: CONNECTIVITY 5m 56s WRITING RESULT FILES for m: etcbc4c 6m 30s END COMPILE m: etcbc4c 6m 30s USING main: etcbc4c DATA COMPILED AT: 2016-11-09T19-16-37 6m 34s INFO: LOADING PREPARED data: please wait ... 6m 34s prep prep: G.node_sort 6m 34s PREPARING prep: G.node_sort 6m 34s LOADING API with EXTRAs: please wait ... 6m 34s USING main: etcbc4c DATA COMPILED AT: 2016-11-09T19-16-37 6m 36s NORMAL: DATA LOADED FROM SOURCE etcbc4c AND ANNOX FOR TASK passage AT 2016-11-09T19-16-42 6m 36s SORTING nodes ... 7m 21s WRITING prep: G.node_sort 7m 21s prep prep: G.node_sort_inv 7m 21s PREPARING prep: G.node_sort_inv 7m 21s SORTING nodes (inv) ... 7m 22s WRITING prep: G.node_sort_inv 7m 22s prep prep: L.node_up 7m 22s PREPARING prep: L.node_up 7m 22s LOADING API with EXTRAs: please wait ... 7m 22s USING main: etcbc4c DATA COMPILED AT: 2016-11-09T19-16-37 7m 22s NORMAL: DATA LOADED FROM SOURCE etcbc4c AND ANNOX FOR TASK passage AT 2016-11-09T19-17-28 7m 22s Objects contained in books 7m 33s Objects contained in chapters 7m 42s Objects contained in verses 7m 52s Objects contained in half_verses 8m 01s Objects contained in sentences 8m 10s Objects contained in sentence_atoms 8m 19s Objects contained in clauses 8m 27s Objects contained in clause_atoms 8m 34s Objects contained in phrases 8m 41s Objects contained in phrase_atoms 8m 46s Objects contained in subphrases 8m 49s Objects contained in words 8m 52s WRITING prep: L.node_up 8m 55s prep prep: L.node_down 8m 55s PREPARING prep: L.node_down 8m 55s WRITING prep: L.node_down 8m 59s prep prep: V.verses 8m 59s PREPARING prep: V.verses 8m 59s LOADING API with EXTRAs: please wait ... 8m 59s USING main: etcbc4c DATA COMPILED AT: 2016-11-09T19-16-37 8m 59s NORMAL: DATA LOADED FROM SOURCE etcbc4c AND ANNOX FOR TASK passage AT 2016-11-09T19-19-05 8m 59s Making verse index 9m 00s Done. 23213 verses 9m 00s WRITING prep: V.verses 9m 00s prep prep: V.books_la 9m 00s PREPARING prep: V.books_la 9m 00s Listing books 9m 02s Done. 39 books 9m 02s WRITING prep: V.books_la 9m 02s ETCBC reference: http://laf-fabric.readthedocs.org/en/latest/texts/ETCBC-reference.html 9m 02s INFO: LOADED PREPARED data 9m 02s INFO: DATA LOADED FROM SOURCE etcbc4c AND ANNOX FOR TASK passage AT 2016-11-09T19-19-08
verses = {}
for version in versions:
msg = MSG[version]
F = FF[version]
msg("{}: Making a mapping between a passage specification and a verse node".format(version))
versesv = collections.defaultdict(lambda: collections.defaultdict(lambda: {}))
for vn in F.otype.s('verse'):
bk = F.book.v(vn)
ch = int(F.chapter.v(vn))
vs = int(F.verse.v(vn))
versesv[bk][ch][vs] = vn
verses[version] = versesv
msg('Done')
6.65s 4b: Making a mapping between a passage specification and a verse node 8.34s Done 8.34s 4c: Making a mapping between a passage specification and a verse node 9.62s Done
HTML('''
<style type="text/css">
td.ht {
font-family: Ezra SIL, SBL Hebrew, Verdana, sans-serif;
font-size: x-large;
line-height: 1.7;
text-align: right;
direction: rtl;
}
td.et {
font-family: Verdana, sans-serif;
font-size: medium;
line-height: 1.2;
text-align: left;
direction: ltr;
}
td.vl {
font-family: Verdana, Arial, sans-serif;
font-size: small;
text-align: right;
vertical-align: top;
color: #aaaaaa;
width: 5%;
direction: ltr;
}
</style>
''')
accent_pat = re.compile('[*0-9]')
tr = Transcription()
def print_verse(bk, ch, vs, vowels=True, accents=True):
rows = {}
for version in versions:
F = FF[version]
L = LL[version]
label = '{} {}:{}'.format(bk, ch, vs)
vn = verses[version][bk][ch][vs]
treps = []
trepes = []
for w in L.d('word', vn):
if not vowels:
trep = '{}{}'.format(F.g_cons_utf8.v(w), F.trailer_utf8.v(w))
trepe = F.g_cons.v(w)
else:
trep = '{}{}'.format(F.g_word_utf8.v(w), F.trailer_utf8.v(w))
trepe = F.g_word.v(w)
if not accents:
trep = Transcription.to_hebrew(accent_pat.sub('', tr.from_hebrew(trep)))
treps.append(trep)
trepes.append(trepe)
text = ''.join(treps)
texte = ' '.join(trepes)
rows[version] = '''
<tr><td class="vl">{}</td><td class="ht">{}</td></tr>
<tr><td class="vl">{}</td><td class="et">{}</td></tr>
'''.format(version, text, label, texte)
return '''
<table>
{}
</table>'''.format('\n'.join(rows[version] for version in versions))
pc = lambda bk, ch, vs: print_verse(bk, ch, vs, vowels=False, accents=False) # no vowels, no accents
pv = lambda bk, ch, vs: print_verse(bk, ch, vs, vowels=True, accents=False) # vowels, no accents
pa = lambda bk, ch, vs: print_verse(bk, ch, vs, vowels=True, accents=True) # vowels and accents
HTML(pc('Esther', 3, 4))
4b | ויהי באמרם אליו יום ויום ולא שׁמע אליהם ויגידו להמן לראות היעמדו דברי מרדכי כי־הגיד להם אשׁר־הוא יהודי׃ |
Esther 3:4 | W JHJ B >MRM >LJW JWM W JWM W L> CM< >LJHM W JGJDW L HMN L R>WT H J |
4c | ויהי באמרם אליו יום ויום ולא שׁמע אליהם ויגידו להמן לראות היעמדו דברי מרדכי כי־הגיד להם אשׁר־הוא יהודי׃ |
Esther 3:4 | W JHJ B >MRM >LJW JWM W JWM W L> CM< >LJHM W JGJDW L HMN L R>WT H J |
HTML(pv('Esther', 3, 4))
4b | וַיְהִי באמרם אֵלָיו יֹום וָיֹום וְלֹא שָׁמַע אֲלֵיהֶם וַיַּגִּידוּ לְהָמָן לִרְאֹות הֲיַעַמְדוּ דִּבְרֵי מָרְדֳּכַי כִּי־הִגִּיד לָהֶם אֲשֶׁר־הוּא יְהוּדִי |
Esther 3:4 | WA- J:HI81J *B- *>MRM >;L@JW03 JO74WM W@- JO80WM W:- LO71> C@MA73< >:AL;JHE92M WA- J.AG.I74JDW. L:- H@M@81N LI- R:>OWT03 H:A- JA75 |
4c | וַיְהִי באמרם אֵלָיו יֹום וָיֹום וְלֹא שָׁמַע אֲלֵיהֶם וַיַּגִּידוּ לְהָמָן לִרְאֹות הֲיַעַמְדוּ דִּבְרֵי מָרְדֳּכַי כִּי־הִגִּיד לָהֶם אֲשֶׁר־הוּא יְהוּדִי |
Esther 3:4 | WA- J:HI81J *B- *>MRM >;L@JW03 JO74WM W@- JO80WM W:- LO71> C@MA73< >:AL;JHE92M WA- J.AG.I74JDW. L:- H@M@81N LI- R:>OWT03 H:A- JA45 |
HTML(pa('Esther', 3, 4))
4b | וַיְהִ֗י ֯ב֯אמרם אֵלָיו֙ יֹ֣ום וָיֹ֔ום וְלֹ֥א שָׁמַ֖ע אֲלֵיהֶ֑ם וַיַּגִּ֣ידוּ לְהָמָ֗ן לִרְאֹות֙ הֲיַֽעַמְדוּ֙ דִּבְרֵ֣י מָרְדֳּכַ֔י כִּֽי־הִגִּ֥יד לָהֶ֖ם אֲשֶׁר־ה֥וּא יְהוּדִֽי׃ |
Esther 3:4 | WA- J:HI81J *B- *>MRM >;L@JW03 JO74WM W@- JO80WM W:- LO71> C@MA73< >:AL;JHE92M WA- J.AG.I74JDW. L:- H@M@81N LI- R:>OWT03 H:A- JA75 |
4c | וַיְהִ֗י באמרם אֵלָיו֙ יֹ֣ום וָיֹ֔ום וְלֹ֥א שָׁמַ֖ע אֲלֵיהֶ֑ם וַיַּגִּ֣ידוּ לְהָמָ֗ן לִרְאֹות֙ הֲיַֽעַמְדוּ֙ דִּבְרֵ֣י מָרְדֳּכַ֔י כִּֽי־הִגִּ֥יד לָהֶ֖ם אֲשֶׁר־ה֥וּא יְהוּדִֽי׃ |
Esther 3:4 | WA- J:HI81J *B- *>MRM >;L@JW03 JO74WM W@- JO80WM W:- LO71> C@MA73< >:AL;JHE92M WA- J.AG.I74JDW. L:- H@M@81N LI- R:>OWT03 H:A- JA45 |