Notebook

In [ ]:

#export
from local.torch_basics import *
from local.test import *
from local.core import *
from local.data.all import *

In [ ]:

from local.notebook.showdoc import *

In [ ]:

#default_exp text.core
#default_cls_lvl 3

Text core¶

Basic function to preprocess text before assembling it in a DataBunch.

In [ ]:

#export 
import spacy,html
from spacy.symbols import ORTH

Preprocessing rules¶

The following are rules applied to texts before or after it's tokenized.

In [ ]:

#export
#special tokens
UNK, PAD, BOS, EOS, FLD, TK_REP, TK_WREP, TK_UP, TK_MAJ = "xxunk xxpad xxbos xxeos xxfld xxrep xxwrep xxup xxmaj".split()

In [ ]:

#export
_all_ = ["UNK", "PAD", "BOS", "EOS", "FLD", "TK_REP", "TK_WREP", "TK_UP", "TK_MAJ"]

In [ ]:

#export
_re_spec = re.compile(r'([/#\\])')

def spec_add_spaces(t):
    "Add spaces around / and #"
    return _re_spec.sub(r' \1 ', t)

In [ ]:

test_eq(spec_add_spaces('#fastai'), ' # fastai')
test_eq(spec_add_spaces('/fastai'), ' / fastai')
test_eq(spec_add_spaces('\\fastai'), ' \\ fastai')

In [ ]:

#export
_re_space = re.compile(' {2,}')

def rm_useless_spaces(t):
    "Remove multiple spaces"
    return _re_space.sub(' ', t)

In [ ]:

test_eq(rm_useless_spaces('a  b   c'), 'a b c')

In [ ]:

#export
_re_rep = re.compile(r'(\S)(\1{2,})')

def replace_rep(t):
    "Replace repetitions at the character level: cccc -- TK_REP 4 c"
    def _replace_rep(m):
        c,cc = m.groups()
        return f' {TK_REP} {len(cc)+1} {c} '
    return _re_rep.sub(_replace_rep, t)

It starts replacing at 3 repetitions of the same character or more.

In [ ]:

test_eq(replace_rep('aa'), 'aa')
test_eq(replace_rep('aaaa'), f' {TK_REP} 4 a ')

In [ ]:

#export
_re_wrep = re.compile(r'(?:\s|^)(\w+)\s+((?:\1\s+)+)\1(\s|\W|$)')

In [ ]:

#hide
"""
Matches any word repeated at least four times with spaces between them
(?:\s|^)       Non-Capture either a whitespace character or the beginning of text
(\w+)          Capture any alphanumeric character
\s+            One or more whitespace
((?:\1\s+)+)   Capture a repetition of one or more times \1 followed by one or more whitespace
\1             Occurence of \1
(\s|\W|$)      Capture last whitespace, non alphanumeric character or end of text
""";

In [ ]:

#export
def replace_wrep(t):
    "Replace word repetitions: word word word word -- TK_WREP 4 word"
    def _replace_wrep(m):
        c,cc,e = m.groups()
        return f' {TK_WREP} {len(cc.split())+2} {c} {e}'
    return _re_wrep.sub(_replace_wrep, t)

It starts replacing at 3 repetitions of the same word or more.

In [ ]:

test_eq(replace_wrep('ah ah'), 'ah ah')
test_eq(replace_wrep('ah ah ah'), f' {TK_WREP} 3 ah ')
test_eq(replace_wrep('ah ah   ah  ah'), f' {TK_WREP} 4 ah ')
test_eq(replace_wrep('ah ah ah ah '), f' {TK_WREP} 4 ah  ')
test_eq(replace_wrep('ah ah ah ah.'), f' {TK_WREP} 4 ah .')
test_eq(replace_wrep('ah ah ahi'), f'ah ah ahi')

In [ ]:

#export
def fix_html(x):
    "Various messy things we've seen in documents"
    x = x.replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace('nbsp;', ' ').replace(
        '#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace('<br />', "\n").replace(
        '\\"', '"').replace('<unk>',UNK).replace(' @.@ ','.').replace(' @-@ ','-').replace('...',' …')
    return html.unescape(x)

In [ ]:

test_eq(fix_html('#39;bli#146;'), "'bli'")
test_eq(fix_html('Sarah amp; Duck...'), 'Sarah & Duck …')
test_eq(fix_html('a nbsp; #36;'), 'a   $')
test_eq(fix_html('\\" <unk>'), f'" {UNK}')
test_eq(fix_html('quot;  @.@  @-@ '), "' .-")
test_eq(fix_html('<br />text\\n'), '\ntext\n')

In [ ]:

#export
_re_all_caps = re.compile(r'(\s|^)([A-Z]+[^a-z\s]*)(?=(\s|$))')

In [ ]:

#hide
"""
Catches any word in all caps, even with ' or - inside
(\s|^)        Capture either a whitespace or the beginning of text
([A-Z]+       Capture one capitalized letter or more...
[^a-z\s]*)    ...followed by anything that's non lowercase or whitespace
(?=(\s|$))    Look ahead for a space or end of text
""";

In [ ]:

#export
def replace_all_caps(t):
    "Replace tokens in ALL CAPS by their lower version and add `TK_UP` before."
    def _replace_all_caps(m):
        tok = f'{TK_UP} ' if len(m.groups()[1]) > 1 else ''
        return f"{m.groups()[0]}{tok}{m.groups()[1].lower()}"
    return _re_all_caps.sub(_replace_all_caps, t)

In [ ]:

test_eq(replace_all_caps("I'M SHOUTING"), f"{TK_UP} i'm {TK_UP} shouting")
test_eq(replace_all_caps("I'm speaking normally"), "I'm speaking normally")
test_eq(replace_all_caps("I am speaking normally"), "i am speaking normally")

In [ ]:

#export
_re_maj = re.compile(r'(\s|^)([A-Z][^A-Z\s]*)(?=(\s|$))')

In [ ]:

#hide
"""
Catches any capitalized word
(\s|^)       Capture either a whitespace or the beginning of text
([A-Z]       Capture exactly one capitalized letter...
[^A-Z\s]*)   ...followed by anything that's not uppercase or whitespace
(?=(\s|$))   Look ahead for a space of end of text
""";

In [ ]:

#export
def replace_maj(t):
    "Replace tokens in ALL CAPS by their lower version and add `TK_UP` before."
    def _replace_maj(m):
        tok = f'{TK_MAJ} ' if len(m.groups()[1]) > 1 else ''
        return f"{m.groups()[0]}{tok}{m.groups()[1].lower()}"
    return _re_maj.sub(_replace_maj, t)

In [ ]:

test_eq(replace_maj("Jeremy Howard"), f'{TK_MAJ} jeremy {TK_MAJ} howard')
test_eq(replace_maj("I don't think there is any maj here"), ("i don't think there is any maj here"),)

In [ ]:

#export
def lowercase(t, add_bos=True, add_eos=False):
    "Converts `t` to lowercase"
    return (f'{BOS} ' if add_bos else '') + t.lower().strip() + (f' {EOS}' if add_eos else '')

In [ ]:

#export
def replace_space(t):
    "Replace embedded spaces in a token with unicode line char to allow for split/join"
    return t.replace(' ', '▁')

In [ ]:

#export
defaults.text_spec_tok = [UNK, PAD, BOS, EOS, FLD, TK_REP, TK_WREP, TK_UP, TK_MAJ]
defaults.text_proc_rules = [fix_html, replace_rep, replace_wrep, spec_add_spaces, rm_useless_spaces,
                            replace_all_caps, replace_maj, lowercase]
defaults.text_postproc_rules = [replace_space]

Tokenizing¶

A tokenizer is a class that must implement a pipe method. This pipe method receives a generator of texts and must return a generator with their tokenized versions. Here is the most basic example:

In [ ]:

#export
class BaseTokenizer():
    "Basic tokenizer that just splits on spaces"
    def __init__(self, split_char=' ', **kwargs): self.split_char=split_char
    def __call__(self, items): return (t.split(self.split_char) for t in items)

In [ ]:

tok = BaseTokenizer()
for t in tok(["This is a text"]): test_eq(t, ["This", "is", "a", "text"])
tok = BaseTokenizer('x')
for t in tok(["This is a text"]): test_eq(t, ["This is a te", "t"])

In [ ]:

#export
class SpacyTokenizer():
    "Spacy tokenizer for `lang`"
    def __init__(self, lang='en', special_toks=None, buf_sz=5000):
        special_toks = ifnone(special_toks, defaults.text_spec_tok)
        nlp = spacy.blank(lang, disable=["parser", "tagger", "ner"])
        for w in special_toks: nlp.tokenizer.add_special_case(w, [{ORTH: w}])
        self.pipe,self.buf_sz = nlp.pipe,buf_sz

    def __call__(self, items):
        return (L(doc).attrgot('text') for doc in self.pipe(items, batch_size=self.buf_sz))

In [ ]:

tok = SpacyTokenizer()
inp,exp = "This isn't the easiest text.",["This", "is", "n't", "the", "easiest", "text", "."]
test_eq(L(tok([inp]*5)), [exp]*5)

In [ ]:

#export
class TokenizeBatch:
    "A wrapper around `tok_func` to apply `rules` and tokenize in parallel"
    def __init__(self, tok_func=SpacyTokenizer, rules=None, post_rules=None, **tok_kwargs ):
        self.rules = L(ifnone(rules, defaults.text_proc_rules))
        self.post_f = compose(*L(ifnone(post_rules, defaults.text_postproc_rules)))
        self.tok = tok_func(**tok_kwargs)

    def __call__(self, batch):
        return (L(o).map(self.post_f) for o in self.tok(maps(*self.rules, batch)))

In [ ]:

f = TokenizeBatch()
test_eq(f(["This isn't a problem"]), [[BOS, TK_MAJ, 'this', 'is', "n't", 'a', 'problem']])
f = TokenizeBatch(BaseTokenizer, rules=[], split_char="'")
test_eq(f(["This isn't a problem"]), [['This▁isn', 't▁a▁problem']])

The main function that will be called during one of the processes handling tokenization. It will create an instance of a tokenizer with tok_func and tok_kwargs at init, then iterate through the batch of texts, apply them rules and tokenize them.

In [ ]:

texts = ["this is a text", "this is another text"]
tok = TokenizeBatch(BaseTokenizer, texts.__getitem__)
test_eq([t for t in tok([0,1])],[['this', 'is', 'a', 'text'], ['this', 'is', 'another', 'text']])

In [ ]:

#export
def tokenize1(text, tok_func=SpacyTokenizer, rules=None, post_rules=None, **tok_kwargs):
    "Tokenize one `text` with an instance of `tok_func` and some `rules`"
    return first(TokenizeBatch(tok_func, rules, post_rules, **tok_kwargs)([text]))

In [ ]:

test_eq(tokenize1("This isn't a problem"),
        [BOS, TK_MAJ, 'this', 'is', "n't", 'a', 'problem'])
test_eq(tokenize1("This isn't a problem", BaseTokenizer, rules=[], split_char="'"),
        ['This▁isn', 't▁a▁problem'])

In [ ]:

#export
def parallel_tokenize(items, tok_func, rules, as_gen=False, n_workers=defaults.cpus, **tok_kwargs):
    "Calls a potential setup on `tok_func` before launching `TokenizeBatch` in parallel"
    if hasattr(tok_func, 'setup'): tok_kwargs = tok_func(**tok_kwargs).setup(items, rules)
    return parallel_gen(TokenizeBatch, items, as_gen=as_gen, tok_func=tok_func,
                        rules=rules, n_workers=n_workers, **tok_kwargs)

Tokenize texts in files¶

Preprocessing function for texts in filenames. Tokenized texts will be saved in a similar fashion in a directory suffixed with _tok in the parent folder of path (override with output_dir).

In [ ]:

#export
fn_counter_pkl = 'counter.pkl'

In [ ]:

#export
def tokenize_folder(path, extensions=None, folders=None, output_dir=None, n_workers=defaults.cpus,
                    rules=None, tok_func=SpacyTokenizer, encoding='utf8', **tok_kwargs):
    "Tokenize text files in `path` in parallel using `n_workers`"
    path,extensions = Path(path),ifnone(extensions, ['.txt'])
    fnames = get_files(path, extensions=extensions, recurse=True, folders=folders)
    output_dir = Path(ifnone(output_dir, path.parent/f'{path.name}_tok'))
    rules = partial(Path.read, encoding=encoding) + L(ifnone(rules, defaults.text_proc_rules.copy()))

    counter = Counter()
    for i,tok in parallel_tokenize(fnames, tok_func, rules, as_gen=True, n_workers=n_workers, **tok_kwargs):
        out = output_dir/fnames[i].relative_to(path)
        out.write(' '.join(tok))
        counter.update(tok)

    (output_dir/fn_counter_pkl).save(counter)

The result will be in output_dir (defaults to a folder in the same parent directory as path, with _tok added to path.name) with the same structure as in path. Tokenized texts for a given file will be in the file having the same name in output_dir. Additionally, a file with a .len suffix contains the number of tokens and the count of all words is stored in output_dir/counter.pkl.

extensions will default to ['.txt'] and all text files in path are treated unless you specify a list of folders in include. tok_func is instantiated in each process with tok_kwargs, and rules (that defaults to defaults.text_proc_rules) are applied to each text before going in the tokenizer.

Tokenize texts in a dataframe¶

In [ ]:

#export
def _join_texts(df, mark_fields=False):
    "Join texts in row `idx` of `df`, marking each field with `FLD` if `mark_fields=True`"
    text_col = (f'{FLD} {1} ' if mark_fields else '' ) + df.iloc[:,0].astype(str)
    for i in range(1,len(df.columns)):
        text_col += (f' {FLD} {i+1} ' if mark_fields else ' ') + df.iloc[:,i].astype(str)
    return text_col.values

In [ ]:

#hide
texts = [f"This is an example of text {i}" for i in range(10)]
df = pd.DataFrame({'text': texts, 'text1': texts}, columns=['text', 'text1'])
col = _join_texts(df, mark_fields=True)    

for i in range(len(df)):
    test_eq(col[i], f'{FLD} 1 This is an example of text {i} {FLD} 2 This is an example of text {i}')

In [ ]:

#export
def tokenize_df(df, text_cols, n_workers=defaults.cpus, rules=None, mark_fields=None,
                tok_func=SpacyTokenizer, **tok_kwargs):
    "Tokenize texts in `df[text_cols]` in parallel using `n_workers`"
    text_cols = L(text_cols)
    #mark_fields defaults to False if there is one column of texts, True if there are multiple
    if mark_fields is None: mark_fields = len(text_cols)>1
    rules = L(ifnone(rules, defaults.text_proc_rules.copy()))
    texts = _join_texts(df[text_cols], mark_fields=mark_fields)
    outputs = L(parallel_tokenize(texts, tok_func, rules, n_workers=n_workers, **tok_kwargs)
               ).sorted().itemgot(1)

    other_cols = df.columns[~df.columns.isin(text_cols)]
    res = df[other_cols].copy()
    res['text'] = outputs
    return res,Counter(outputs.concat())

This function returns a new dataframe with the same non-text columns, a colum named text that contains the tokenized texts and a column named text_lengths that contains their respective length. It also returns a counter of all words see to quickly build a vocabulary afterward.

tok_func is instantiated in each process with tok_kwargs, and rules (that defaults to defaults.text_proc_rules) are applied to each text before going in the tokenizer. If mark_fields isn't specified, it defaults to False when there is a single text column, True when there are several. In that case, the texts in each of those columns are joined with FLD markes followed by the number of the field.

In [ ]:

#export
def tokenize_csv(fname, text_cols, outname=None, n_workers=4, rules=None, mark_fields=None,
                 tok_func=SpacyTokenizer, header='infer', chunksize=50000, **tok_kwargs):
    "Tokenize texts in the `text_cols` of the csv `fname` in parallel using `n_workers`"
    df = pd.read_csv(fname, header=header, chunksize=chunksize)
    outname = Path(ifnone(outname, fname.parent/f'{fname.stem}_tok.csv'))
    cnt = Counter()

    for i,dfp in enumerate(df):
        out,c = tokenize_df(dfp, text_cols, n_workers=n_workers, rules=rules,
                            mark_fields=mark_fields, tok_func=tok_func, **tok_kwargs)
        out.text = out.text.str.join(' ')
        out.to_csv(outname, header=(None,header)[i==0], index=False, mode=('a','w')[i==0])
        cnt.update(c)

    outname.with_suffix('.pkl').save(cnt)

In [ ]:

#export
def load_tokenized_csv(fname):
    "Utility function to quickly load a tokenized csv ans the corresponding counter"
    fname = Path(fname)
    out = pd.read_csv(fname)
    for txt_col in out.columns[1:-1]:
        out[txt_col] = out[txt_col].str.split(' ')
    return out,fname.with_suffix('.pkl').load()

The result will be written in a new csv file in outname (defaults to the same as fname with the suffix _tok.csv) and will have the same header as the original file, the same non-text columns, a text and a text_lengths column as described in tokenize_df.

The csv file is opened with header and optionally with blocks of chunksize at a time. If this argument is passed, each chunk is processed independtly and saved in the output file to save memory usage.

In [ ]:

def _prepare_texts(tmp_d):
    "Prepare texts in a folder struct in tmp_d, a csv file and returns a dataframe"
    path = Path(tmp_d)/'tmp'
    path.mkdir()
    for d in ['a', 'b', 'c']: 
        (path/d).mkdir()
        for i in range(5):
            with open(path/d/f'text{i}.txt', 'w') as f: f.write(f"This is an example of text {d} {i}")
    
    texts = [f"This is an example of text {d} {i}" for i in range(5) for d in ['a', 'b', 'c']]
    df = pd.DataFrame({'text': texts, 'label': list(range(15))}, columns=['text', 'label'])
    csv_fname = tmp_d/'input.csv'
    df.to_csv(csv_fname, index=False)
    return path,df,csv_fname

In [ ]:

with tempfile.TemporaryDirectory() as tmp_d:
    path,df,csv_fname = _prepare_texts(Path(tmp_d))
    #Tokenize as folders
    tokenize_folder(path)
    outp = Path(tmp_d)/'tmp_tok'
    for d in ['a', 'b', 'c']: 
        p = outp/d
        for i in range(5):
            test_eq((p/f'text{i}.txt').read(), ' '.join([
                BOS, TK_MAJ, 'this', 'is', 'an', 'example', 'of', 'text', d, str(i) ]))
    cnt_a = (outp/fn_counter_pkl).load()
    test_eq(cnt_a['this'], 15)
    test_eq(cnt_a['a'], 5)
    test_eq(cnt_a['0'], 3)
    
    #Tokenize as a dataframe
    out,cnt_b = tokenize_df(df, text_cols='text')
    test_eq(list(out.columns), ['label', 'text'])
    test_eq(out['label'].values, df['label'].values)
    test_eq(out['text'], [(outp/d/f'text{i}.txt').read().split(' ') for i in range(5) for d in ['a', 'b', 'c']])
    test_eq(cnt_a, cnt_b)
    
    #Tokenize as a csv 
    out_fname = Path(tmp_d)/'output.csv'
    tokenize_csv(csv_fname, text_cols='text', outname=out_fname)
    test_eq((out,cnt_b), load_tokenized_csv(out_fname))

Sentencepiece¶

In [ ]:

eu_langs = ["bg", "cs", "da", "de", "el", "en", "es", "et", "fi", "fr", "ga", "hr", "hu",
            "it","lt","lv","mt","nl","pl","pt","ro","sk","sl","sv"] # all European langs

In [ ]:

#export
class SentencePieceTokenizer():#TODO: pass the special tokens symbol to sp
    "Spacy tokenizer for `lang`"
    def __init__(self, lang='en', special_toks=None, sp_model=None, vocab_sz=None, max_vocab_sz=30000,
                 model_type='unigram', char_coverage=None, cache_dir='tmp'):
        try: from sentencepiece import SentencePieceTrainer,SentencePieceProcessor
        except ImportError:
            raise Exception('sentencepiece module is missing: run `pip install sentencepiece`')
        self.sp_model,self.cache_dir = sp_model,Path(cache_dir)
        self.vocab_sz,self.max_vocab_sz,self.model_type = vocab_sz,max_vocab_sz,model_type
        self.char_coverage = ifnone(char_coverage, 0.99999 if lang in eu_langs else 0.9998)
        self.special_toks = ifnone(special_toks, defaults.text_spec_tok)
        if sp_model is None: self.tok = None
        else:
            self.tok = SentencePieceProcessor()
            self.tok.Load(str(sp_model))
        os.makedirs(self.cache_dir, exist_ok=True)

    def _get_vocab_sz(self, raw_text_path):
        cnt = Counter()
        with open(raw_text_path, 'r') as f:
            for line in f.readlines():
                cnt.update(line.split())
                if len(cnt)//4 > self.max_vocab_sz: return self.max_vocab_sz
        res = len(cnt)//4
        while res%8 != 0: res+=1
        return res

    def train(self, raw_text_path):
        "Train a sentencepiece tokenizer on `texts` and save it in `path/tmp_dir`"
        from sentencepiece import SentencePieceTrainer
        vocab_sz = self._get_vocab_sz(raw_text_path) if self.vocab_sz is None else self.vocab_sz
        spec_tokens = ['\u2581'+s for s in self.special_toks]
        SentencePieceTrainer.Train(" ".join([
            f"--input={raw_text_path} --vocab_size={vocab_sz} --model_prefix={self.cache_dir/'spm'}",
            f"--character_coverage={self.char_coverage} --model_type={self.model_type}",
            f"--unk_id={len(spec_tokens)} --pad_id=-1 --bos_id=-1 --eos_id=-1",
            f"--user_defined_symbols={','.join(spec_tokens)}"]))
        raw_text_path.unlink()
        return self.cache_dir/'spm.model'

    def setup(self, items, rules):
        if self.tok is not None: return {'sp_model': self.sp_model}
        raw_text_path = self.cache_dir/'texts.out'
        with open(raw_text_path, 'w') as f:
            for t in progress_bar(maps(*rules, items), total=len(items), leave=False):
                f.write(f'{t}\n')
        return {'sp_model': self.train(raw_text_path)}

    def __call__(self, items):
        for t in items: yield self.tok.EncodeAsPieces(t)

In [ ]:

texts = [f"This is an example of text {i}" for i in range(10)]
df = pd.DataFrame({'text': texts, 'label': list(range(10))}, columns=['text', 'label'])

In [ ]:

out,cnt = tokenize_df(df, text_cols='text', tok_func=SentencePieceTokenizer, vocab_sz=34)

Export -¶

In [ ]:

#hide
from local.notebook.export import notebook2script
notebook2script(all_fs=True)

Converted 00_test.ipynb.
Converted 01_core_foundation.ipynb.
Converted 01a_core_utils.ipynb.
Converted 01b_core_dispatch.ipynb.
Converted 01c_core_transform.ipynb.
Converted 02_core_script.ipynb.
Converted 03_torchcore.ipynb.
Converted 03a_layers.ipynb.
Converted 04_data_load.ipynb.
Converted 05_data_core.ipynb.
Converted 06_data_transforms.ipynb.
Converted 07_data_block.ipynb.
Converted 08_vision_core.ipynb.
Converted 09_vision_augment.ipynb.
Converted 09a_vision_data.ipynb.
Converted 10_pets_tutorial.ipynb.
Converted 11_vision_models_xresnet.ipynb.
Converted 12_optimizer.ipynb.
Converted 13_learner.ipynb.
Converted 13a_metrics.ipynb.
Converted 14_callback_schedule.ipynb.
Converted 14a_callback_data.ipynb.
Converted 15_callback_hook.ipynb.
Converted 15a_vision_models_unet.ipynb.
Converted 16_callback_progress.ipynb.
Converted 17_callback_tracker.ipynb.
Converted 18_callback_fp16.ipynb.
Converted 19_callback_mixup.ipynb.
Converted 20_interpret.ipynb.
Converted 20a_distributed.ipynb.
Converted 21_vision_learner.ipynb.
Converted 22_tutorial_imagenette.ipynb.
Converted 23_tutorial_transfer_learning.ipynb.
Converted 30_text_core.ipynb.
Converted 31_text_data.ipynb.
Converted 32_text_models_awdlstm.ipynb.
Converted 33_text_models_core.ipynb.
Converted 34_callback_rnn.ipynb.
Converted 35_tutorial_wikitext.ipynb.
Converted 36_text_models_qrnn.ipynb.
Converted 37_text_learner.ipynb.
Converted 38_tutorial_ulmfit.ipynb.
Converted 40_tabular_core.ipynb.
Converted 41_tabular_model.ipynb.
Converted 42_tabular_rapids.ipynb.
Converted 50_data_block_examples.ipynb.
Converted 60_medical_imaging.ipynb.
Converted 65_medical_text.ipynb.
Converted 70_callback_wandb.ipynb.
Converted 71_callback_tensorboard.ipynb.
Converted 90_notebook_core.ipynb.
Converted 91_notebook_export.ipynb.
Converted 92_notebook_showdoc.ipynb.
Converted 93_notebook_export2html.ipynb.
Converted 94_notebook_test.ipynb.
Converted 95_index.ipynb.
Converted 96_data_external.ipynb.
Converted 97_utils_test.ipynb.
Converted notebook2jekyll.ipynb.

In [ ]: