Give an overview of all clause_atoms and their indentation
import sys
import collections
from laf.fabric import LafFabric
from etcbc.lib import Transcription
fabric = LafFabric()
0.00s This is LAF-Fabric 4.4.6 API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html Feature doc: http://shebanq-doc.readthedocs.org/en/latest/texts/welcome.html
API = fabric.load('etcbc4', 'lexicon', 'indentation', {
"xmlids": {"node": False, "edge": False},
"features": ('''
otype monads
book chapter verse
g_word
tab
''',''),
"primary": False,
}, verbose='DETAIL')
exec(fabric.localnames.format(var='fabric'))
0.00s LOADING API: please wait ... 0.00s DETAIL: COMPILING m: UP TO DATE 0.00s INFO: USING DATA COMPILED AT: 2014-07-23T09-31-37 0.00s DETAIL: COMPILING a: UP TO DATE 0.00s INFO: USING DATA COMPILED AT: 2014-10-14T11-06-47 0.01s DETAIL: load main: G.node_anchor_min 0.08s DETAIL: load main: G.node_anchor_max 0.15s DETAIL: load main: G.node_sort 0.20s DETAIL: load main: G.node_sort_inv 0.65s DETAIL: load main: G.edges_from 0.72s DETAIL: load main: G.edges_to 0.80s DETAIL: load main: F.etcbc4_db_monads [node] 1.60s DETAIL: load main: F.etcbc4_db_otype [node] 2.37s DETAIL: load main: F.etcbc4_ft_g_word [node] 2.61s DETAIL: load main: F.etcbc4_ft_tab [node] 2.64s DETAIL: load main: F.etcbc4_sft_book [node] 2.66s DETAIL: load main: F.etcbc4_sft_chapter [node] 2.67s DETAIL: load main: F.etcbc4_sft_verse [node] 2.68s DETAIL: load annox: F.etcbc4_db_monads [node] 2.68s DETAIL: load annox: F.etcbc4_db_otype [node] 2.68s DETAIL: load annox: F.etcbc4_ft_g_word [node] 2.68s DETAIL: load annox: F.etcbc4_ft_tab [node] 2.69s DETAIL: load annox: F.etcbc4_sft_book [node] 2.69s DETAIL: load annox: F.etcbc4_sft_chapter [node] 2.69s DETAIL: load annox: F.etcbc4_sft_verse [node] 2.69s LOGFILE=/Users/dirk/laf-fabric-output/etcbc4/indentation/__log__indentation.txt 2.69s INFO: DATA LOADED FROM SOURCE etcbc4 AND ANNOX lexicon FOR TASK indentation AT 2014-10-23T08-59-42
When we have found our objects, we want to indicate where they occur in the bible. In order to specify the passage of a node, we have to now in what verse a node occurs. In the next code cell we create a mapping from nodes of type sentence, clause, etc to nodes of type verse. From a verse node we can read off the passage information.
Conversely, we also construct an index from verses to nodes: given a verse, we make a list of all nodes belonging to that verse, in the canonical order.
target_types = {
# 'sentence', # 'sentence_atom',
# 'clause',
'clause_atom',
# 'phrase', # 'phrase_atom',
# 'subphrase',
# 'word',
}
object_index = collections.defaultdict(lambda: set())
verse_monads = {}
verse_node = {}
target_words = collections.defaultdict(lambda: set())
monad_node = {}
def get_set(monads):
monad_set = set()
for rn in monads.split(','):
bnds = rn.split('-', 1)
if len(bnds) == 1:
monad_set.add(int(bnds[0]))
else:
monad_set |= set(range(int(bnds[0]), int(bnds[1]) + 1))
return frozenset(monad_set)
def ranges(monadset):
result = []
cur_start = None
cur_end = None
for i in sorted(monadset):
if cur_start == None:
cur_start = i
cur_end = i
else:
if i == cur_end + 1:
cur_end += 1
else:
result.append((cur_start, cur_end))
cur_start = i
cur_end = i
if cur_start != None:
result.append((cur_start, cur_end))
return result
msg("Making a mapping between monads and nodes ... ")
for n in NN():
if F.otype.v(n) == 'word':
monad_node[int(F.monads.v(n))] = n
msg("Done")
msg("Making verse index ...")
for n in NN():
otype = F.otype.v(n)
if otype == 'verse':
verse_monads[n] = get_set(F.monads.v(n))
elif otype in target_types:
for m in get_set(F.monads.v(n)):
object_index[m].add(n)
target_words[n].add(m)
for v in verse_monads:
vobjects = set()
for m in verse_monads[v]: vobjects |= object_index[m]
verse_node[v] = tuple(NN(nodes=vobjects))
msg("Verse index created for {} verses".format(len(verse_node)))
6m 04s Making a mapping between monads and nodes ... 6m 06s Done 6m 06s Making verse index ... 6m 11s Verse index created for 23213 verses
print(len(verse_node.values()))
23213
We collect clause atoms, their verse indicator, their indentation level, their text.
outf = outfile('indentation.csv')
for n in NN():
otype = F.otype.v(n)
if otype == 'book':
msg(F.book.v(n))
elif otype == 'verse':
vlabel = "{} {}:{}".format(F.book.v(n), F.chapter.v(n), F.verse.v(n))
for ca in verse_node[n]:
outf.write('''"{}";{};"{}"\n'''.format(
vlabel,
F.tab.v(ca),
' '.join(F.g_word.v(monad_node[w]) for w in sorted(target_words[ca]))
))
outf.close()
15m 06s Genesis 15m 06s Exodus 15m 07s Leviticus 15m 07s Numeri 15m 07s Deuteronomium 15m 07s Josua 15m 07s Judices 15m 07s Samuel_I 15m 07s Samuel_II 15m 07s Reges_I 15m 08s Reges_II 15m 08s Jesaia 15m 08s Jeremia 15m 08s Ezechiel 15m 08s Hosea 15m 08s Joel 15m 08s Amos 15m 08s Obadia 15m 08s Jona 15m 08s Micha 15m 08s Nahum 15m 08s Habakuk 15m 08s Zephania 15m 08s Haggai 15m 08s Sacharia 15m 08s Maleachi 15m 08s Psalmi 15m 09s Iob 15m 09s Proverbia 15m 09s Ruth 15m 09s Canticum 15m 09s Ecclesiastes 15m 09s Threni 15m 09s Esther 15m 09s Daniel 15m 09s Esra 15m 09s Nehemia 15m 09s Chronica_I 15m 09s Chronica_II