This is a general purpose tool to get acquainted with the features in the LAF version of the Hebrew Database
import sys
import collections
import pandas
import matplotlib.pyplot as plt
from IPython.display import display
pandas.set_option('display.notebook_repr_html', True)
%matplotlib inline
from laf.fabric import LafFabric
import etcbc
from etcbc.featuredoc import FeatureDoc
fabric = LafFabric()
0.00s This is LAF-Fabric 4.5.4 API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html Feature doc: https://shebanq.ancient-data.org/static/docs/featuredoc/texts/welcome.html
Specifiy the features you want to study. This notebook only offers to study the features in the ft namespace.
Do this by providing a list of features to the study specification below.
The requested features will be loaded, on top of the base supply of features that we need for showing examples.
API = fabric.load('etcbc3', '--', 'feature-doc', {
"xmlids": {"node": False, "edge": False},
"features": ("",""),
"primary": False,
})
doc = FeatureDoc(fabric, {
'features': {
'node': '''
gender
number
person
suffix_gender
suffix_number
suffix_person
locative
tense
noun_type
pronoun_type
lexical_set
part_of_speech
phrase_dependent_part_of_speech
phrase_type
phrase_function
clause_constituent_relation
clause_atom_relation
clause_atom_type
domain
text_type
embedding_domain
''',
'edge': '''
mother
parents
''',
},
'absence_values': set((
'none',
'unknown',
'Unkn',
)),
'VALUE_THRESHOLD': 50,
'vlabel': 'verse_label',
})
exec(fabric.localnames.format(var='fabric'))
0.00s LOADING API: please wait ... 0.76s INFO: USING DATA COMPILED AT: 2014-06-27T12-21-04 1.40s LOGFILE=/Users/dirk/laf-fabric-output/etcbc3/feature-doc/__log__feature-doc.txt 1.40s INFO: DATA LOADED FROM SOURCE etcbc3 AND ANNOX -- FOR TASK feature-doc AT 2014-09-23T08-43-14 0.00s LOADING API: please wait ... 0.00s DETAIL: COMPILING m: UP TO DATE 0.00s INFO: USING DATA COMPILED AT: 2014-06-27T12-21-04 0.00s DETAIL: COMPILING a: UP TO DATE 0.01s DETAIL: keep main: G.node_anchor_min 0.01s DETAIL: keep main: G.node_anchor_max 0.01s DETAIL: keep main: G.node_sort 0.01s DETAIL: keep main: G.node_sort_inv 0.01s DETAIL: keep main: G.edges_from 0.01s DETAIL: keep main: G.edges_to 0.01s DETAIL: load main: F.shebanq_db_otype [node] 0.69s DETAIL: load main: F.shebanq_ft_clause_atom_relation [node] 0.74s DETAIL: load main: F.shebanq_ft_clause_atom_type [node] 0.78s DETAIL: load main: F.shebanq_ft_clause_constituent_relation [node] 0.83s DETAIL: load main: F.shebanq_ft_domain [node] 0.88s DETAIL: load main: F.shebanq_ft_embedding_domain [node] 0.92s DETAIL: load main: F.shebanq_ft_gender [node] 1.11s DETAIL: load main: F.shebanq_ft_lexical_set [node] 1.31s DETAIL: load main: F.shebanq_ft_locative [node] 1.50s DETAIL: load main: F.shebanq_ft_noun_type [node] 1.70s DETAIL: load main: F.shebanq_ft_number [node] 1.90s DETAIL: load main: F.shebanq_ft_part_of_speech [node] 2.10s DETAIL: load main: F.shebanq_ft_person [node] 2.29s DETAIL: load main: F.shebanq_ft_phrase_dependent_part_of_speech [node] 2.50s DETAIL: load main: F.shebanq_ft_phrase_function [node] 2.61s DETAIL: load main: F.shebanq_ft_phrase_type [node] 2.74s DETAIL: load main: F.shebanq_ft_pronoun_type [node] 2.94s DETAIL: load main: F.shebanq_ft_suffix_gender [node] 3.13s DETAIL: load main: F.shebanq_ft_suffix_number [node] 3.33s DETAIL: load main: F.shebanq_ft_suffix_person [node] 3.53s DETAIL: load main: F.shebanq_ft_tense [node] 3.73s DETAIL: load main: F.shebanq_ft_text_type [node] 3.77s DETAIL: load main: F.shebanq_sft_verse_label [node] 3.79s DETAIL: load main: F.shebanq_ft_mother [e] 3.84s DETAIL: load main: F.shebanq_ft_parents [e] 4.09s DETAIL: load main: C.shebanq_ft_mother -> 4.23s DETAIL: load main: C.shebanq_ft_parents -> 5.38s DETAIL: load main: C.shebanq_ft_mother <- 5.51s DETAIL: load main: C.shebanq_ft_parents <- 6.56s INFO: DATA LOADED FROM SOURCE etcbc3 AND ANNOX -- FOR TASK feature-doc AT 2014-09-23T08-43-20
API = fabric.load('etcbc4', '--', 'feature-doc', {
"xmlids": {"node": False, "edge": False},
"features": ("",""),
"primary": False,
})
doc = FeatureDoc(fabric, {
'features': {
'node': '''
maxmonad
minmonad
monads
oid
otype
clause_kind
code
det
dist
dist_unit
domain
function
g_cons
g_cons_utf8
g_lex
g_lex_utf8
g_nme
g_nme_utf8
g_pfm
g_pfm_utf8
g_prs
g_prs_utf8
g_uvf
g_uvf_utf8
g_vbe
g_vbe_utf8
g_vbs
g_vbs_utf8
g_word
g_word_utf8
gn
is_root
language
lex
lex_utf8
ls
mother_object_type
nme
nu
number
pdp
pfm
prs
ps
rela
sp
st
tab
trailer_utf8
txt
typ
uvf
vbe
vbs
vs
vt
book
chapter
half_verse
label
verse
''',
'edge': '''
mother
distributional_parent
functional_parent
''',
},
'absence_values': set((
'none',
'unknown',
'Unkn',
'NA',
'?',
'n/a',
'absent',
)),
'VALUE_THRESHOLD': 25,
'vlabel': 'label',
})
exec(fabric.localnames.format(var='fabric'))
0.00s LOADING API: please wait ... 0.00s INFO: USING DATA COMPILED AT: 2014-07-23T09-31-37 0.77s LOGFILE=/Users/dirk/laf-fabric-output/etcbc4/feature-doc/__log__feature-doc.txt 0.77s INFO: DATA LOADED FROM SOURCE etcbc4 AND ANNOX -- FOR TASK feature-doc AT 2014-10-15T08-25-43 0.00s LOADING API: please wait ... 0.00s DETAIL: COMPILING m: UP TO DATE 0.00s INFO: USING DATA COMPILED AT: 2014-07-23T09-31-37 0.00s DETAIL: COMPILING a: UP TO DATE 0.01s DETAIL: keep main: G.node_anchor_min 0.01s DETAIL: keep main: G.node_anchor_max 0.01s DETAIL: keep main: G.node_sort 0.01s DETAIL: keep main: G.node_sort_inv 0.01s DETAIL: keep main: G.edges_from 0.01s DETAIL: keep main: G.edges_to 0.01s DETAIL: load main: F.etcbc4_db_maxmonad [node] 1.04s DETAIL: load main: F.etcbc4_db_minmonad [node] 1.92s DETAIL: load main: F.etcbc4_db_monads [node] 2.78s DETAIL: load main: F.etcbc4_db_oid [node] 3.58s DETAIL: load main: F.etcbc4_db_otype [node] 4.39s DETAIL: load main: F.etcbc4_ft_clause_kind [node] 4.43s DETAIL: load main: F.etcbc4_ft_code [node] 4.49s DETAIL: load main: F.etcbc4_ft_det [node] 4.72s DETAIL: load main: F.etcbc4_ft_dist [node] 4.86s DETAIL: load main: F.etcbc4_ft_dist_unit [node] 5.19s DETAIL: load main: F.etcbc4_ft_domain [node] 5.22s DETAIL: load main: F.etcbc4_ft_function [node] 5.37s DETAIL: load main: F.etcbc4_ft_g_cons [node] 5.65s DETAIL: load main: F.etcbc4_ft_g_cons_utf8 [node] 5.96s DETAIL: load main: F.etcbc4_ft_g_lex [node] 6.21s DETAIL: load main: F.etcbc4_ft_g_lex_utf8 [node] 6.53s DETAIL: load main: F.etcbc4_ft_g_nme [node] 6.68s DETAIL: load main: F.etcbc4_ft_g_nme_utf8 [node] 6.87s DETAIL: load main: F.etcbc4_ft_g_pfm [node] 7.02s DETAIL: load main: F.etcbc4_ft_g_pfm_utf8 [node] 7.17s DETAIL: load main: F.etcbc4_ft_g_prs [node] 7.32s DETAIL: load main: F.etcbc4_ft_g_prs_utf8 [node] 7.48s DETAIL: load main: F.etcbc4_ft_g_uvf [node] 7.61s DETAIL: load main: F.etcbc4_ft_g_uvf_utf8 [node] 7.73s DETAIL: load main: F.etcbc4_ft_g_vbe [node] 7.87s DETAIL: load main: F.etcbc4_ft_g_vbe_utf8 [node] 8.00s DETAIL: load main: F.etcbc4_ft_g_vbs [node] 8.11s DETAIL: load main: F.etcbc4_ft_g_vbs_utf8 [node] 8.22s DETAIL: load main: F.etcbc4_ft_g_word [node] 8.50s DETAIL: load main: F.etcbc4_ft_g_word_utf8 [node] 8.96s DETAIL: load main: F.etcbc4_ft_gn [node] 9.18s DETAIL: load main: F.etcbc4_ft_is_root [node] 9.23s DETAIL: load main: F.etcbc4_ft_language [node] 9.47s DETAIL: load main: F.etcbc4_ft_lex [node] 9.69s DETAIL: load main: F.etcbc4_ft_lex_utf8 [node] 10s DETAIL: load main: F.etcbc4_ft_ls [node] 10s DETAIL: load main: F.etcbc4_ft_mother_object_type [node] 11s DETAIL: load main: F.etcbc4_ft_nme [node] 11s DETAIL: load main: F.etcbc4_ft_nu [node] 11s DETAIL: load main: F.etcbc4_ft_number [node] 12s DETAIL: load main: F.etcbc4_ft_pdp [node] 12s DETAIL: load main: F.etcbc4_ft_pfm [node] 12s DETAIL: load main: F.etcbc4_ft_prs [node] 12s DETAIL: load main: F.etcbc4_ft_ps [node] 13s DETAIL: load main: F.etcbc4_ft_rela [node] 13s DETAIL: load main: F.etcbc4_ft_sp [node] 13s DETAIL: load main: F.etcbc4_ft_st [node] 14s DETAIL: load main: F.etcbc4_ft_tab [node] 14s DETAIL: load main: F.etcbc4_ft_trailer_utf8 [node] 14s DETAIL: load main: F.etcbc4_ft_txt [node] 14s DETAIL: load main: F.etcbc4_ft_typ [node] 14s DETAIL: load main: F.etcbc4_ft_uvf [node] 15s DETAIL: load main: F.etcbc4_ft_vbe [node] 15s DETAIL: load main: F.etcbc4_ft_vbs [node] 15s DETAIL: load main: F.etcbc4_ft_vs [node] 15s DETAIL: load main: F.etcbc4_ft_vt [node] 16s DETAIL: load main: F.etcbc4_sft_book [node] 16s DETAIL: load main: F.etcbc4_sft_chapter [node] 16s DETAIL: load main: F.etcbc4_sft_half_verse [node] 16s DETAIL: load main: F.etcbc4_sft_label [node] 16s DETAIL: load main: F.etcbc4_sft_verse [node] 16s DETAIL: load main: F.etcbc4_ft_distributional_parent [e] 16s DETAIL: load main: F.etcbc4_ft_functional_parent [e] 16s DETAIL: load main: F.etcbc4_ft_mother [e] 16s DETAIL: load main: C.etcbc4_ft_distributional_parent -> 18s DETAIL: load main: C.etcbc4_ft_functional_parent -> 19s DETAIL: load main: C.etcbc4_ft_mother -> 19s DETAIL: load main: C.etcbc4_ft_distributional_parent <- 20s DETAIL: load main: C.etcbc4_ft_functional_parent <- 21s DETAIL: load main: C.etcbc4_ft_mother <- 21s INFO: DATA LOADED FROM SOURCE etcbc4 AND ANNOX -- FOR TASK feature-doc AT 2014-10-15T08-26-04
API = fabric.load('etcbc4b', '--', 'feature-doc', {
"xmlids": {"node": False, "edge": False},
"features": ("",""),
"primary": False,
})
doc = FeatureDoc(fabric, {
'features': {
'node': '''
maxmonad
minmonad
monads
oid
otype
kind
code
det
dist
dist_unit
domain
function
g_cons
g_cons_utf8
g_lex
g_lex_utf8
g_nme
g_nme_utf8
g_pfm
g_pfm_utf8
g_prs
g_prs_utf8
g_uvf
g_uvf_utf8
g_vbe
g_vbe_utf8
g_vbs
g_vbs_utf8
g_word
g_word_utf8
gn
is_root
language
lex
lex_utf8
ls
mother_object_type
nme
nu
number
pdp
pfm
prs
ps
rela
sp
st
tab
trailer_utf8
txt
typ
uvf
vbe
vbs
vs
vt
book
chapter
label
verse
''',
'edge': '''
mother
distributional_parent
functional_parent
''',
},
'absence_values': set((
'none',
'unknown',
'Unkn',
'NA',
'?',
'n/a',
'absent',
)),
'VALUE_THRESHOLD': 25,
'vlabel': 'label',
})
exec(fabric.localnames.format(var='fabric'))
0.00s LOADING API: please wait ... 0.00s INFO: USING DATA COMPILED AT: 2015-06-29T05-30-49 1.05s LOGFILE=/Users/dirk/SURFdrive/laf-fabric-output/etcbc4b/feature-doc/__log__feature-doc.txt 1.05s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX -- FOR TASK feature-doc AT 2015-10-13T15-00-51 0.00s LOADING API: please wait ... 0.00s DETAIL: COMPILING m: UP TO DATE 0.00s INFO: USING DATA COMPILED AT: 2015-06-29T05-30-49 0.00s DETAIL: COMPILING a: UP TO DATE 0.01s DETAIL: keep main: G.node_anchor_min 0.01s DETAIL: keep main: G.node_anchor_max 0.01s DETAIL: keep main: G.node_sort 0.01s DETAIL: keep main: G.node_sort_inv 0.01s DETAIL: keep main: G.edges_from 0.01s DETAIL: keep main: G.edges_to 0.01s DETAIL: load main: F.etcbc4_db_maxmonad [node] 0.79s DETAIL: load main: F.etcbc4_db_minmonad [node] 1.60s DETAIL: load main: F.etcbc4_db_monads [node] 2.46s DETAIL: load main: F.etcbc4_db_oid [node] 3.24s DETAIL: load main: F.etcbc4_db_otype [node] 3.98s DETAIL: load main: F.etcbc4_ft_code [node] 4.03s DETAIL: load main: F.etcbc4_ft_det [node] 4.27s DETAIL: load main: F.etcbc4_ft_dist [node] 4.44s DETAIL: load main: F.etcbc4_ft_dist_unit [node] 4.76s DETAIL: load main: F.etcbc4_ft_domain [node] 4.78s DETAIL: load main: F.etcbc4_ft_function [node] 4.91s DETAIL: load main: F.etcbc4_ft_g_cons [node] 5.13s DETAIL: load main: F.etcbc4_ft_g_cons_utf8 [node] 5.42s DETAIL: load main: F.etcbc4_ft_g_lex [node] 5.67s DETAIL: load main: F.etcbc4_ft_g_lex_utf8 [node] 6.00s DETAIL: load main: F.etcbc4_ft_g_nme [node] 6.14s DETAIL: load main: F.etcbc4_ft_g_nme_utf8 [node] 6.27s DETAIL: load main: F.etcbc4_ft_g_pfm [node] 6.38s DETAIL: load main: F.etcbc4_ft_g_pfm_utf8 [node] 6.50s DETAIL: load main: F.etcbc4_ft_g_prs [node] 6.61s DETAIL: load main: F.etcbc4_ft_g_prs_utf8 [node] 6.74s DETAIL: load main: F.etcbc4_ft_g_uvf [node] 6.84s DETAIL: load main: F.etcbc4_ft_g_uvf_utf8 [node] 6.98s DETAIL: load main: F.etcbc4_ft_g_vbe [node] 7.22s DETAIL: load main: F.etcbc4_ft_g_vbe_utf8 [node] 7.42s DETAIL: load main: F.etcbc4_ft_g_vbs [node] 7.60s DETAIL: load main: F.etcbc4_ft_g_vbs_utf8 [node] 7.78s DETAIL: load main: F.etcbc4_ft_g_word [node] 8.08s DETAIL: load main: F.etcbc4_ft_g_word_utf8 [node] 8.43s DETAIL: load main: F.etcbc4_ft_gn [node] 8.59s DETAIL: load main: F.etcbc4_ft_is_root [node] 8.65s DETAIL: load main: F.etcbc4_ft_kind [node] 8.69s DETAIL: load main: F.etcbc4_ft_language [node] 8.91s DETAIL: load main: F.etcbc4_ft_lex [node] 9.12s DETAIL: load main: F.etcbc4_ft_lex_utf8 [node] 9.39s DETAIL: load main: F.etcbc4_ft_ls [node] 9.59s DETAIL: load main: F.etcbc4_ft_mother_object_type [node] 9.69s DETAIL: load main: F.etcbc4_ft_nme [node] 9.87s DETAIL: load main: F.etcbc4_ft_nu [node] 10s DETAIL: load main: F.etcbc4_ft_number [node] 11s DETAIL: load main: F.etcbc4_ft_pdp [node] 11s DETAIL: load main: F.etcbc4_ft_pfm [node] 11s DETAIL: load main: F.etcbc4_ft_prs [node] 11s DETAIL: load main: F.etcbc4_ft_ps [node] 12s DETAIL: load main: F.etcbc4_ft_rela [node] 12s DETAIL: load main: F.etcbc4_ft_sp [node] 12s DETAIL: load main: F.etcbc4_ft_st [node] 12s DETAIL: load main: F.etcbc4_ft_tab [node] 12s DETAIL: load main: F.etcbc4_ft_trailer_utf8 [node] 13s DETAIL: load main: F.etcbc4_ft_txt [node] 13s DETAIL: load main: F.etcbc4_ft_typ [node] 13s DETAIL: load main: F.etcbc4_ft_uvf [node] 13s DETAIL: load main: F.etcbc4_ft_vbe [node] 13s DETAIL: load main: F.etcbc4_ft_vbs [node] 14s DETAIL: load main: F.etcbc4_ft_vs [node] 14s DETAIL: load main: F.etcbc4_ft_vt [node] 14s DETAIL: load main: F.etcbc4_sft_book [node] 14s DETAIL: load main: F.etcbc4_sft_chapter [node] 14s DETAIL: load main: F.etcbc4_sft_label [node] 14s DETAIL: load main: F.etcbc4_sft_verse [node] 14s DETAIL: load main: F.etcbc4_ft_distributional_parent [e] 15s DETAIL: load main: F.etcbc4_ft_functional_parent [e] 15s DETAIL: load main: F.etcbc4_ft_mother [e] 15s DETAIL: load main: C.etcbc4_ft_distributional_parent -> 16s DETAIL: load main: C.etcbc4_ft_functional_parent -> 17s DETAIL: load main: C.etcbc4_ft_mother -> 17s DETAIL: load main: C.etcbc4_ft_distributional_parent <- 17s DETAIL: load main: C.etcbc4_ft_functional_parent <- 18s DETAIL: load main: C.etcbc4_ft_mother <- 18s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX -- FOR TASK feature-doc AT 2015-10-13T15-01-08
API = fabric.load('calap', '--', 'feature-doc', {
"xmlids": {"node": False, "edge": False},
"features": ("",""),
"primary": False,
})
doc = FeatureDoc(fabric, {
'features': {
'node': '''
gender
tense
psp
phrase_function
phrase_type
surface_consonants
''',
'edge': '',
},
'absence_values': set((
'none',
'unknown',
'Unkn',
)),
'VALUE_THRESHOLD': 50,
'vlabel': 'verse_label',
})
exec(fabric.localnames.format(var='fabric'))
If you want to know the set of available features, you can inspect it by:
(API['F_all'], API['FE_all'])
([('etcbc4', ['db.maxmonad', 'db.minmonad', 'db.monads', 'db.oid', 'db.otype', 'ft.code', 'ft.det', 'ft.dist', 'ft.dist_unit', 'ft.domain', 'ft.function', 'ft.g_cons', 'ft.g_cons_utf8', 'ft.g_lex', 'ft.g_lex_utf8', 'ft.g_nme', 'ft.g_nme_utf8', 'ft.g_pfm', 'ft.g_pfm_utf8', 'ft.g_prs', 'ft.g_prs_utf8', 'ft.g_uvf', 'ft.g_uvf_utf8', 'ft.g_vbe', 'ft.g_vbe_utf8', 'ft.g_vbs', 'ft.g_vbs_utf8', 'ft.g_word', 'ft.g_word_utf8', 'ft.gn', 'ft.is_root', 'ft.kind', 'ft.language', 'ft.lex', 'ft.lex_utf8', 'ft.ls', 'ft.mother_object_type', 'ft.nme', 'ft.nu', 'ft.number', 'ft.pdp', 'ft.pfm', 'ft.prs', 'ft.ps', 'ft.rela', 'ft.sp', 'ft.st', 'ft.tab', 'ft.trailer_utf8', 'ft.txt', 'ft.typ', 'ft.uvf', 'ft.vbe', 'ft.vbs', 'ft.vs', 'ft.vt', 'sft.book', 'sft.chapter', 'sft.label', 'sft.verse'])], [('etcbc4', ['ft.distributional_parent', 'ft.functional_parent', 'ft.mother']), ('laf', [('', 'x'), ('', 'y')])])
Now generate files with feature data.
For each selected feature, you get a list with values, with for each value its number of occurrences.
The values are divided into two sets: defined values and absence values.
The latter are values such as none
, unknown
. You have to provide these values yourself by adapting the absence_values
list in the study specification above.
doc.feature_doc()
39s Looking up feature values ... 51s 100000 nodes done 1m 03s 200000 nodes done 1m 14s 300000 nodes done 1m 24s 400000 nodes done 1m 34s 500000 nodes done 1m 44s 600000 nodes done 1m 55s 700000 nodes done 2m 05s 800000 nodes done 2m 17s 900000 nodes done 2m 27s 1000000 nodes done 2m 38s 1100000 nodes done 2m 50s 1200000 nodes done 3m 02s 1300000 nodes done 3m 13s 1400000 nodes done 3m 17s 1439779 nodes done 3m 17s 100000 edges done 3m 18s 200000 edges done 3m 19s 300000 edges done 3m 19s 400000 edges done 3m 20s 500000 edges done 3m 21s 600000 edges done 3m 21s 700000 edges done 3m 22s 800000 edges done 3m 23s 900000 edges done 3m 23s 1000000 edges done 3m 24s 1100000 edges done 3m 25s 1200000 edges done 3m 25s 1300000 edges done 3m 26s 1400000 edges done 3m 27s 1500000 edges done 3m 27s 1600000 edges done 3m 28s 1700000 edges done 3m 29s 1800000 edges done 3m 29s 1900000 edges done 3m 30s 2000000 edges done 3m 31s 2100000 edges done 3m 31s 2200000 edges done 3m 32s 2224295 edges done 3m 32s Computing results ... 4m 07s Done
There is also a summary file.
For each feature it has the following columns:
Feature name of the feature val (-), val (+) value of the feature; val (-) contains absence values, val (+) contains defined values. #vals number of distinct values of this feature, split between the absence values and the defined values occs number of occurrences of this value or this feature clause, phrase, etc: how often the feature occurs on which type of node.
dfn = pandas.read_csv(my_file('0_summary_node.csv'),sep='\t', na_filter=False)
dfe = pandas.read_csv(my_file('0_summary_edge.csv'),sep='\t', na_filter=False)
dfn
Feature | val (-) | val (+) | #vals (-) | #vals (+) | occs (-) | occs (+) | book (-) | book (+) | chapter (-) | chapter (+) | clause (-) | clause (+) | clause_atom (-) | clause_atom (+) | half_verse (-) | half_verse (+) | phrase (-) | phrase (+) | phrase_atom (-) | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | maxmonad | 0 | 426555 | 0 | 1441144 | 0 | 39 | 0 | 929 | 0 | 87978 | 0 | 90144 | 0 | 44682 | 0 | 254664 | 0 | ... | ||
1 | 110165 | 13 | ... | ||||||||||||||||||
2 | 112916 | 13 | ... | ||||||||||||||||||
3 | 212050 | 13 | ... | ||||||||||||||||||
4 | 247832 | 13 | ... | ||||||||||||||||||
5 | 264719 | 13 | ... | ||||||||||||||||||
6 | 293892 | 13 | ... | ||||||||||||||||||
7 | 346911 | 13 | ... | ||||||||||||||||||
8 | 358193 | 13 | ... | ||||||||||||||||||
9 | 406795 | 13 | ... | ||||||||||||||||||
10 | 426038 | 13 | ... | ||||||||||||||||||
11 | 65348 | 13 | ... | ||||||||||||||||||
12 | 102203 | 12 | ... | ||||||||||||||||||
13 | 107354 | 12 | ... | ||||||||||||||||||
14 | 107538 | 12 | ... | ||||||||||||||||||
15 | 109273 | 12 | ... | ||||||||||||||||||
16 | 109671 | 12 | ... | ||||||||||||||||||
17 | 11068 | 12 | ... | ||||||||||||||||||
18 | 110946 | 12 | ... | ||||||||||||||||||
19 | 112074 | 12 | ... | ||||||||||||||||||
20 | 112468 | 12 | ... | ||||||||||||||||||
21 | 112829 | 12 | ... | ||||||||||||||||||
22 | 115118 | 12 | ... | ||||||||||||||||||
23 | 120159 | 12 | ... | ||||||||||||||||||
24 | 125236 | 12 | ... | ||||||||||||||||||
25 | 127439 | 12 | ... | ||||||||||||||||||
26 | 128805 | 12 | ... | ||||||||||||||||||
27 | 133858 | 12 | ... | ||||||||||||||||||
28 | 134232 | 12 | ... | ||||||||||||||||||
29 | 134459 | 12 | ... | ||||||||||||||||||
30 | 134774 | 12 | ... | ||||||||||||||||||
31 | 136224 | 12 | ... | ||||||||||||||||||
32 | 140204 | 12 | ... | ||||||||||||||||||
33 | 152241 | 12 | ... | ||||||||||||||||||
34 | 155124 | 12 | ... | ||||||||||||||||||
35 | 155291 | 12 | ... | ||||||||||||||||||
36 | 160449 | 12 | ... | ||||||||||||||||||
37 | 162051 | 12 | ... | ||||||||||||||||||
38 | 162082 | 12 | ... | ||||||||||||||||||
39 | 164248 | 12 | ... | ||||||||||||||||||
40 | 165411 | 12 | ... | ||||||||||||||||||
41 | 1666 | 12 | ... | ||||||||||||||||||
42 | 167723 | 12 | ... | ||||||||||||||||||
43 | 170273 | 12 | ... | ||||||||||||||||||
44 | 170351 | 12 | ... | ||||||||||||||||||
45 | 172956 | 12 | ... | ||||||||||||||||||
46 | 173067 | 12 | ... | ||||||||||||||||||
47 | 174274 | 12 | ... | ||||||||||||||||||
48 | 174810 | 12 | ... | ||||||||||||||||||
49 | 179188 | 12 | ... | ||||||||||||||||||
50 | 179388 | 12 | ... | ||||||||||||||||||
51 | 426505 MORE | ... | |||||||||||||||||||
52 | minmonad | 0 | 426555 | 0 | 1441144 | 0 | 39 | 0 | 929 | 0 | 87978 | 0 | 90144 | 0 | 44682 | 0 | 254664 | 0 | ... | ||
53 | 252837 | 14 | ... | ||||||||||||||||||
54 | 253615 | 14 | ... | ||||||||||||||||||
55 | 48646 | 14 | ... | ||||||||||||||||||
56 | 107906 | 13 | ... | ||||||||||||||||||
57 | 268198 | 13 | ... | ||||||||||||||||||
58 | 386476 | 13 | ... | ||||||||||||||||||
59 | 386775 | 13 | ... | ||||||||||||||||||
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1976 rows × 31 columns
dfe
Feature | val | #vals | occs | clause->clause | clause->phrase | clause->sentence | clause->word | clause_atom->clause | clause_atom->clause_atom | clause_atom->sentence_atom | phrase->clause | phrase->phrase | phrase->word | phrase_atom->clause_atom | phrase_atom->phrase | phrase_atom->phrase_atom | phrase_atom->word | sentence_atom->sentence | subphrase->subphrase | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | mother | 1 | 176411 | 12462 | 5167 | 0 | 951 | 0 | 89079 | 0 | 5 | 195 | 7 | 0 | 0 | 11717 | 1584 | 0 | 20556 | ... | |
1 | 176411 | ... | |||||||||||||||||||
2 | distributional_parent | 1 | 784664 | 0 | 0 | 0 | 0 | 0 | 0 | 90144 | 0 | 0 | 0 | 267965 | 0 | 0 | 0 | 0 | 0 | ... | |
3 | 784664 | ... | |||||||||||||||||||
4 | functional_parent | 1 | 1194007 | 0 | 0 | 87978 | 0 | 90144 | 0 | 0 | 254664 | 0 | 0 | 0 | 267965 | 0 | 0 | 66701 | 0 | ... | |
5 | 1194007 | ... |
6 rows × 23 columns