Notebook

In [ ]:

# export
from nbdev.imports import *
from nbdev.sync import *
from nbdev.export import *
from nbdev.showdoc import *
from nbdev.template import *

from html.parser import HTMLParser
from nbconvert.preprocessors import ExecutePreprocessor, Preprocessor
from nbconvert import HTMLExporter,MarkdownExporter
import traitlets

In [ ]:

# default_exp export2html
# default_cls_lvl 3

Convert to html¶

The functions that transform the dev notebooks in the documentation of the library

toc: true

The most important function defined in this module is notebook2html, so you may want to jump to it before scrolling though the rest, which explain the details behind the scenes of the conversion from notebooks to the html documentation. The main things to remember are:

put a #hide flag at the top of any cell you want to completely hide in the docs
use the hide input jupyter extension to hide the input of some cells (by default all show_doc cells have that marker added)
you can define some jekyll metadata in the markdown cell with the title, see get_metadata
use backsticks for terms you want automatic links to be found, but use <code> and </code> when you have homonyms and don't want those links
you can define the default toc level of classes with # default_cls_lvl flag followed by a number (default is 2)
you can add jekyll warnings, important or note banners with appropriate block quotes (see add_jekyll_notes)
put any images you want to use in the images folder of your notebook folder, they will be automatically copied over to the docs folder

Preprocessing notebook¶

Cell processors¶

In [ ]:

#export
class HTMLParseAttrs(HTMLParser):
    "Simple HTML parser which stores any attributes in `attrs` dict"
    def handle_starttag(self, tag, attrs): self.tag,self.attrs = tag,dict(attrs)
        
    def attrs2str(self):
        "Attrs as string"
        return ' '.join([f'{k}="{v}"' for k,v in self.attrs.items()])
        
    def show(self):
        "Tag with updated attrs"
        return f'<{self.tag} {self.attrs2str()} />'

    def __call__(self, s):
        "Parse `s` and store attrs"
        self.feed(s)
        return self.attrs

In [ ]:

h = HTMLParseAttrs()
t = h('<img src="src" alt="alt" width="700" caption="cap" />')
test_eq(t['width'], '700')
test_eq(t['src'  ], 'src')

In [ ]:

t['width'] = '600'
test_eq(h.show(), '<img src="src" alt="alt" width="600" caption="cap" />')

In [ ]:

t['max-width'] = t.pop('width')
test_eq(h.show(), '<img src="src" alt="alt" caption="cap" max-width="600" />')

The following functions are applied on individual cells as a preprocessing step before the conversion to html.

In [ ]:

#export
def remove_widget_state(cell):
    "Remove widgets in the output of `cells`"
    if cell['cell_type'] == 'code' and 'outputs' in cell:
        cell['outputs'] = [l for l in cell['outputs']
                           if not ('data' in l and 'application/vnd.jupyter.widget-view+json' in l.data)]
    return cell

Those outputs usually can't be rendered properly in html.

In [ ]:

#export
# Matches any cell that has a `show_doc` or an `#export` in it
_re_cell_to_hide = r's*show_doc\(|^\s*#\s*export\s+|^\s*#\s*hide_input\s+'

In [ ]:

#export
def hide_cells(cell):
    "Hide inputs of `cell` that need to be hidden"
    if check_re(cell, _re_cell_to_hide):  cell['metadata'] = {'hide_input': True}
    return cell

This concerns all the cells with a # export flag and all the cell containing a show_doc for a function or class.

In [ ]:

for source in ['show_doc(read_nb)', '# export\nfrom local.core import *', '# hide_input\n2+2']:
    cell = {'cell_type': 'code', 'source': source}
    cell1 = hide_cells(cell.copy())
    assert 'metadata' in cell1
    assert 'hide_input' in cell1['metadata']
    assert cell1['metadata']['hide_input']

cell = {'cell_type': 'code', 'source': '# exports\nfrom local.core import *'}
test_eq(hide_cells(cell.copy()), cell)

In [ ]:

#export
# Matches any line containing an #exports
_re_exports = re.compile(r'^#\s*exports[^\n]*\n')

In [ ]:

#export
def clean_exports(cell):
    "Remove exports flag from `cell`"
    cell['source'] = _re_exports.sub('', cell['source'])
    return cell

The rest of the cell is displayed without any modification.

In [ ]:

cell = {'cell_type': 'code', 'source': '# exports\nfrom local.core import *'}
test_eq(clean_exports(cell.copy()), {'cell_type': 'code', 'source': 'from local.core import *'})
cell = {'cell_type': 'code', 'source': '# exports core\nfrom local.core import *'}
test_eq(clean_exports(cell.copy()), {'cell_type': 'code', 'source': 'from local.core import *'})

In [ ]:

#export
def treat_backticks(cell):
    "Add links to backticks words in `cell`"
    if cell['cell_type'] == 'markdown': cell['source'] = add_doc_links(cell['source'])
    return cell

In [ ]:

cell = {'cell_type': 'markdown', 'source': 'This is a `DocsTestClass`'}
test_eq(treat_backticks(cell), {'cell_type': 'markdown',
    'source': 'This is a [`DocsTestClass`](/export#DocsTestClass)'})

In [ ]:

#export
_re_nb_link = re.compile(r"""
# Catches any link to a local notebook and keeps the title in group 1, the link without .ipynb in group 2
\[          # Opening [
([^\]]*)    # Catching group for any character except ]
\]\(        # Closing ], opening (
([^http]    # Catching group that must not begin by html (local notebook)
[^\)]*)     # and containing anything but )
.ipynb\)    # .ipynb and closing )
""", re.VERBOSE)

In [ ]:

#export
_re_block_notes = re.compile(r"""
# Catches any pattern > Title: content with title in group 1 and content in group 2
^\s*>\s*     # > followed by any number of whitespace
([^:]*)      # Catching group for any character but :
:\s*         # : then any number of whitespace
([^\n]*)     # Catching group for anything but a new line character
(?:\n|$)     # Non-catching group for either a new line or the end of the text
""", re.VERBOSE | re.MULTILINE)

In [ ]:

#export
def _to_html(text):
    return text.replace("'", "&#8217;")

In [ ]:

#export
def add_jekyll_notes(cell):
    "Convert block quotes to jekyll notes in `cell`"
    styles = Config().get('jekyll_styles', 'note,warning,tip,important').split(',')
    def _inner(m):
        title,text = m.groups()
        if title.lower() not in styles: return f"> {m.groups()[0]}: {m.groups()[1]}"
        return '{% include '+title.lower()+".html content=\'"+_to_html(text)+"\' %}"
    if cell['cell_type'] == 'markdown':
        cell['source'] = _re_block_notes.sub(_inner, cell['source'])
    return cell

Supported styles are Warning, Note Tip and Important:

Typing > Warning: There will be no second warning! will render in the docs:

Warning: There will be no second warning!

Typing > Important: Pay attention! It's important. will render in the docs:

Important: Pay attention! It's important.

Typing > Tip: This is my tip. will render in the docs:

Tip: This is my tip.

Typing > Note: Take note of this. will render in the docs:

Note: Take note of this.

Typing > Note: A doc link to `add_jekyll_notes` should also work fine. will render in the docs:

Note: A doc link to add_jekyll_notes should also work fine.

In [ ]:

#hide
for w in ['Warning', 'Note', 'Important', 'Tip', 'Bla']:
    cell = {'cell_type': 'markdown', 'source': f"> {w}: This is my final {w.lower()}!"}
    res = '{% include '+w.lower()+'.html content=\'This is my final '+w.lower()+'!\' %}'
    if w != 'Bla': test_eq(add_jekyll_notes(cell), {'cell_type': 'markdown', 'source': res})
    else: test_eq(add_jekyll_notes(cell), cell)

In [ ]:

#export
_re_image = re.compile(r"""
# Catches any image file used, either with `![alt](image_file)` or `<img src="image_file">`
^(!\[           #   Beginning of line (since re.MULTILINE is passed) followed by ![ in a catching group
[^\]]*          #   Anything but ]
\]\()           #   Closing ] and opening (, end of the first catching group
([^\)]*)        #   Catching block with any character but )
(\))            #   Catching group with closing )
|               # OR
^(<img\ [^>]*>)  #   Catching group with <img some_html_code>
""", re.MULTILINE | re.VERBOSE)

_re_image1 = re.compile(r"^<img\ [^>]*>", re.MULTILINE)

In [ ]:

#export
def _img2jkl(d, h, jekyll=True):
    if not jekyll: return '<img ' + h.attrs2str() + '>'
    if 'width' in d: d['max-width'] = d.pop('width')
    if 'src' in d:   d['file'] = d.pop('src')
    return '{% include image.html ' + h.attrs2str() + ' %}' 

In [ ]:

#export
def copy_images(cell, fname, dest, jekyll=True):
    "Copy images referenced in `cell` from `fname` parent folder to `dest` folder"
    def _rep_src(m):
        grps = m.groups()
        if grps[3] is not None:
            h = HTMLParseAttrs()
            dic = h(grps[3])
            src = dic['src']
        else: src = grps[1]
        os.makedirs((Path(dest)/src).parent, exist_ok=True)
        if not ((src.startswith('http://') or src.startswith('https://'))):
            shutil.copy(Path(fname).parent/src, Path(dest)/src)
            src = Config().doc_baseurl + src
        if grps[3] is not None: 
            dic['src'] = src
            return _img2jkl(dic, h, jekyll=jekyll)
        else:  return f"{grps[0]}{src}{grps[2]}"
    cell['source'] = _re_image.sub(_rep_src, cell['source']) 
    return cell

This is to ensure that all images defined in nbs_folder/images and used in notebooks are copied over to doc_folder/images.

In [ ]:

dest_img = Config().doc_path/'images'/'logo.png'
cell = {'cell_type': 'markdown', 'source':'Text\n![Alt](images/logo.png)'}
try:
    copy_images(cell, Path('01_export.ipynb'), Config().doc_path)
    test_eq(cell["source"], 'Text\n![Alt](/images/logo.png)')
    #Image has been copied
    assert dest_img.exists()
finally: dest_img.unlink()

In [ ]:

#hide
cell = {'cell_type': 'markdown', 'source':'Text\n![Alt](https://site.logo.png)'}
copy_images(cell, Path('01_export.ipynb'), Config().doc_path)
test_eq(cell["source"], 'Text\n![Alt](https://site.logo.png)')

In [ ]:

#hide
cell = {'cell_type': 'markdown', 'source': 'Text\n<img src="images/logo.png" alt="alt" width="600" caption="cap" />'}
try:
    copy_images(cell, Path('01_export.ipynb'), Config().doc_path)
    test_eq(cell["source"], 'Text\n{% include image.html alt="alt" caption="cap" max-width="600" file="/images/logo.png" %}')
    assert dest_img.exists()
finally: dest_img.unlink()

In [ ]:

#hide
cell = {'cell_type': 'markdown', 'source': 'Text\n<img src="http://site.logo.png" alt="alt" width="600" caption="cap" />'}
copy_images(cell, Path('01_export.ipynb'), Config().doc_path)
test_eq(cell["source"], 'Text\n{% include image.html alt="alt" caption="cap" max-width="600" file="http://site.logo.png" %}')

In [ ]:

#export
def _relative_to(path1, path2):
    p1,p2 = Path(path1).absolute().parts,Path(path2).absolute().parts
    i=0
    while i <len(p1) and i<len(p2) and p1[i] == p2[i]: i+=1
    p1,p2 = p1[i:],p2[i:]
    return os.path.sep.join(['..' for _ in p2] + list(p1))

In [ ]:

#hide
test_eq(_relative_to(Path('images/logo.png'), Config().doc_path), '../nbs/images/logo.png')
test_eq(_relative_to(Path('images/logo.png'), Config().doc_path.parent), 'nbs/images/logo.png')

In [ ]:

#export
def adapt_img_path(cell, fname, dest, jekyll=True):
    "Adapt path of images referenced in `cell` from `fname` to work in folder `dest`"
    def _rep(m):
        gps = m.groups()
        if gps[0] is not None: 
            start,img,end = gps[:3]
            if not (img.startswith('http:/') or img.startswith('https:/')):
                img = _relative_to(fname.parent/img, dest)
            return f'{start}{img}{end}'
        else:
            h = HTMLParseAttrs()
            dic = h(gps[3])
            if not (dic['src'].startswith('http:/') or dic['src'].startswith('https:/')):
                dic['src'] = _relative_to(fname.parent/dic['src'], dest)
            return _img2jkl(dic, h, jekyll=jekyll)
    if cell['cell_type'] == 'markdown': cell['source'] = _re_image.sub(_rep, cell['source'])
    return cell

This function is slightly different as it ensures that a notebook convert to a file that will be placed in dest will have the images location updated. It is used for the README.md file (generated automatically from the index) since the images are copied inside the github repo, but in general, you should make sure your images are going to be accessible from the location your file ends up being.

In [ ]:

cell = {'cell_type': 'markdown', 'source': 'Text\n![Alt](images/logo.png)'}
cell1 = adapt_img_path(cell, Path('01_export.ipynb'), Path('.').absolute().parent)
test_eq(cell1['source'], 'Text\n![Alt](nbs/images/logo.png)')

cell = {'cell_type': 'markdown', 'source': 'Text\n![Alt](http://site.logo.png)'}
cell1 = adapt_img_path(cell, Path('01_export.ipynb'), Path('.').absolute().parent)
test_eq(cell1['source'], 'Text\n![Alt](http://site.logo.png)')

cell = {'cell_type': 'markdown', 
        'source': 'Text\n<img alt="Logo" src="images/logo.png" width="600"/>'}
cell1 = adapt_img_path(cell, Path('01_export.ipynb'), Path('.').absolute().parent)
test_eq(cell1['source'], 'Text\n{% include image.html alt="Logo" max-width="600" file="nbs/images/logo.png" %}')

cell = {'cell_type': 'markdown', 
        'source': 'Text\n<img alt="Logo" src="https://site.image.png" width="600"/>'}
cell1 = adapt_img_path(cell, Path('01_export.ipynb'), Path('.').absolute().parent)
test_eq(cell1['source'], 'Text\n{% include image.html alt="Logo" max-width="600" file="https://site.image.png" %}')

In [ ]:

#export
#Matches any cell with #hide or #default_exp or #default_cls_lvl
_re_cell_to_remove = re.compile(r'^\s*#\s*(hide|default_exp|default_cls_lvl)\s+')

Collapsable Code Cells¶

In [ ]:

#export
#Matches any cell with #collapse or #collapse_hide
_re_cell_to_collapse_closed = re.compile(r'^\s*#\s*(collapse|collapse_hide|collapse-hide)\s+')

#Matches any cell with #collapse_show
_re_cell_to_collapse_open = re.compile(r'^\s*#\s*(collapse_show|collapse-show)\s+')

In [ ]:

#export
def collapse_cells(cell):
    "Add a collapse button to inputs of `cell` in either the open or closed position"
    if check_re(cell, _re_cell_to_collapse_closed):  cell['metadata'] = {'collapse_hide': True}
    elif check_re(cell, _re_cell_to_collapse_open):  cell['metadata'] = {'collapse_show': True}
    return cell

Placing #collapse_open as a comment in a code cell will inlcude your code under a collapsable element that is open by default.

In [ ]:

#collapse_open
print('This code cell is not collapsed by default but you can collapse it to hide it from view!')
print("Note that the output always shows with `#collapse`.")

This code cell is not collapsed by default but you can collapse it to hide it from view!
Note that the output always shows with `#collapse`.

Placing #collapse or #collapse_closed will include your code in a collapsable element that is closed by default. For example:

In [ ]:

#collapse
print('The code cell that produced this output is collapsed by default but you can expand it!')

The code cell that produced this output is collapsed by default but you can expand it!

Preprocessing the list of cells¶

The following functions are applied to the entire list of cells of the notebook as a preprocessing step before the conversion to html.

In [ ]:

#export
def remove_hidden(cells):
    "Remove in `cells` the ones with a flag `#hide`, `#default_exp` or `#default_cls_lvl`"
    return [c for c in cells if _re_cell_to_remove.search(c['source']) is None]

In [ ]:

cells = [{'cell_type': 'code', 'source': source} for source in [
    '# export\nfrom local.core import *', 
    '# hide\nfrom local.core import *',
    '#exports\nsuper code',
    '#default_exp notebook.export',
    'show_doc(read_nb)',
    '#default_cls_lvl 3']] + [{'cell_type': 'markdown', 'source': source} for source in [
    'nice', '#hide\n\nto hide']]
         
cells1 = remove_hidden(cells)
test_eq(len(cells1), 4)
test_eq(cells1[0], cells[0])
test_eq(cells1[1], cells[2])
test_eq(cells1[2], cells[4])
test_eq(cells1[3], cells[6])

In [ ]:

#export
_re_default_cls_lvl = re.compile(r"""
^               # Beginning of line (since re.MULTILINE is passed)
\s*\#\s*        # Any number of whitespace, #, any number of whitespace
default_cls_lvl # default_cls_lvl
\s*             # Any number of whitespace
(\d*)           # Catching group for any number of digits
\s*$            # Any number of whitespace and end of line (since re.MULTILINE is passed)
""", re.IGNORECASE | re.MULTILINE | re.VERBOSE)

In [ ]:

# export
def find_default_level(cells):
    "Find in `cells` the default class level."
    for cell in cells:
        tst = check_re(cell, _re_default_cls_lvl)
        if tst: return int(tst.groups()[0])
    return 2

In [ ]:

tst_nb = read_nb('00_export.ipynb')
test_eq(find_default_level(tst_nb['cells']), 3)

In [ ]:

#export
#Find a cell with #export(s)
_re_export = re.compile(r'^\s*#\s*exports?\s*', re.IGNORECASE | re.MULTILINE)
_re_show_doc = re.compile(r"""
# First one catches any cell with a #export or #exports, second one catches any show_doc and get the first argument in group 1
show_doc     # show_doc
\s*\(\s*     # Any number of whitespace, opening (, any number of whitespace
([^,\)\s]*)  # Catching group for any character but a comma, a closing ) or a whitespace
[,\)\s]      # A comma, a closing ) or a whitespace
""", re.MULTILINE | re.VERBOSE)

In [ ]:

#export
def _show_doc_cell(name, cls_lvl=None):
    return {'cell_type': 'code',
            'execution_count': None,
            'metadata': {},
            'outputs': [],
            'source': f"show_doc({name}{'' if cls_lvl is None else f', default_cls_level={cls_lvl}'})"}

def add_show_docs(cells, cls_lvl=None):
    "Add `show_doc` for each exported function or class"
    documented = [_re_show_doc.search(cell['source']).groups()[0] for cell in cells
                  if cell['cell_type']=='code' and _re_show_doc.search(cell['source']) is not None]
    res = []
    for cell in cells:
        res.append(cell)
        if check_re(cell, _re_export):
            names = export_names(cell['source'], func_only=True)
            for n in names:
                if n not in documented: res.append(_show_doc_cell(n, cls_lvl=cls_lvl))
    return res

This only adds cells with a show_doc for non-documented functions, so if you add yourself a show_doc cell (because you want to change one of the default argument), there won't be any duplicates.

In [ ]:

for i,cell in enumerate(tst_nb['cells']):
    if cell['source'].startswith('#export\ndef read_nb'): break
tst_cells = [c.copy() for c in tst_nb['cells'][i-1:i+1]]
added_cells = add_show_docs(tst_cells, cls_lvl=3)
test_eq(len(added_cells), 3)
test_eq(added_cells[0], tst_nb['cells'][i-1])
test_eq(added_cells[1], tst_nb['cells'][i])
test_eq(added_cells[2], _show_doc_cell('read_nb', cls_lvl=3))
test_eq(added_cells[2]['source'], 'show_doc(read_nb, default_cls_level=3)')

#Check show_doc isn't added if it was already there.
tst_cells1 = [{'cell_type':'code', 'source': '#export\ndef my_func(x):\n    return x'},
              {'cell_type':'code', 'source': 'show_doc(my_func)'}]
test_eq(add_show_docs(tst_cells1), tst_cells1)
tst_cells1 = [{'cell_type':'code', 'source': '#export\ndef my_func(x):\n    return x'},
              {'cell_type':'markdown', 'source': 'Some text'},
              {'cell_type':'code', 'source': 'show_doc(my_func, title_level=3)'}]
test_eq(add_show_docs(tst_cells1), tst_cells1)

In [ ]:

#export 
_re_fake_header = re.compile(r"""
# Matches any fake header (one that ends with -)
\#+    # One or more #
\s+    # One or more of whitespace
.*     # Any char
-\s*   # A dash followed by any number of white space
$      # End of text
""", re.VERBOSE)

In [ ]:

# export
def remove_fake_headers(cells):
    "Remove in `cells` the fake header"
    return [c for c in cells if c['cell_type']=='code' or _re_fake_header.search(c['source']) is None]

You can fake headers in your notebook to navigate them more easily with collapsible headers, just make them finish with a dash and they will be removed. One typicl use case is to have a header of level 2 with the name of a class, since the show_doc cell of that class will create the same anchor, you need to have the one you created manually disappear to avoid any duplicate.

In [ ]:

cells = [{'cell_type': 'markdown',
          'metadata': {},
          'source': '### Fake-'}] + tst_nb['cells'][:10]
cells1 = remove_fake_headers(cells)
test_eq(len(cells1), len(cells)-1)
test_eq(cells1[0], cells[1])

In [ ]:

# export
def remove_empty(cells):
    "Remove in `cells` the empty cells"
    return [c for c in cells if len(c['source']) >0]

Grabbing metada¶

In [ ]:

#export 
_re_title_summary = re.compile(r"""
# Catches the title and summary of the notebook, presented as # Title > summary, with title in group 1 and summary in group 2
^\s*       # Beginning of text followe by any number of whitespace
\#\s+      # # followed by one or more of whitespace
([^\n]*)   # Catching group for any character except a new line
\n+        # One or more new lines
>[ ]*       # > followed by any number of whitespace
([^\n]*)   # Catching group for any character except a new line
""", re.VERBOSE)

_re_properties = re.compile(r"""
^-\s+      # Beginnig of a line followed by - and at least one space
(.*?)      # Any pattern (shortest possible)
\s*:\s*    # Any number of whitespace, :, any number of whitespace
(.*?)$     # Any pattern (shortest possible) then end of line
""", re.MULTILINE | re.VERBOSE)

In [ ]:

# export
def get_metadata(cells):
    "Find the cell with title and summary in `cells`."
    for i,cell in enumerate(cells):
        if cell['cell_type'] == 'markdown':
            match = _re_title_summary.match(cell['source'])
            if match:
                cells.pop(i)
                attrs = {k:v for k,v in _re_properties.findall(cell['source'])}
                return {'keywords': 'fastai',
                        'summary' : match.groups()[1],
                        'title'   : match.groups()[0],
                        **attrs}
                
    return {'keywords': 'fastai',
            'summary' : 'summary',
            'title'   : 'Title'}

In the markdown cell with the title, you can add the summary as a block quote (just put an empty block quote for an empty summary) and a list with any additional metada you would like to add, for instance:

# Title

> Awesome summary
- toc: False

The toc: False metadata will prevent the table of contents from showing on the page.

In [ ]:

tst_nb = read_nb('00_export.ipynb')
test_eq(get_metadata(tst_nb['cells']), {
    'keywords': 'fastai',
    'summary': 'The functions that transform notebooks in a library',
    'title': 'Export to modules'})

#The cell with the metada is popped out, so if we do it a second time we get the default.
test_eq(get_metadata(tst_nb['cells']), {'keywords': 'fastai',
            'summary' : 'summary',
            'title'   : 'Title'})

In [ ]:

#hide
cells = [{'cell_type': 'markdown', 'source': "# Title\n\n> s\n\n- toc: false"}]
test_eq(get_metadata(cells), {'keywords': 'fastai', 'summary': 's', 'title': 'Title', 'toc': 'false'})

Executing show_doc cells¶

In [ ]:

# export
_re_cell_to_execute = ReLibName(r"^\s*show_doc\(([^\)]*)\)|^from LIB_NAME\.", re.MULTILINE)

In [ ]:

# export
class ExecuteShowDocPreprocessor(ExecutePreprocessor):
    "An `ExecutePreprocessor` that only executes `show_doc` and `import` cells"
    def preprocess_cell(self, cell, resources, index):
        if 'source' in cell and cell['cell_type'] == "code":
            if _re_cell_to_execute.re.search(cell['source']):
                return super().preprocess_cell(cell, resources, index)
        return cell, resources

In [ ]:

# export
def _import_show_doc_cell(mod=None):
    "Add an import show_doc cell."
    source = f"#export\nfrom nbdev.showdoc import show_doc"
    if mod:  source += f"\nfrom {Config().lib_name}.{mod} import *"
    return {'cell_type': 'code',
            'execution_count': None,
            'metadata': {'hide_input': True},
            'outputs': [],
            'source': source}

def execute_nb(nb, mod=None, metadata=None, show_doc_only=True):
    "Execute `nb` (or only the `show_doc` cells) with `metadata`"
    nb['cells'].insert(0, _import_show_doc_cell(mod))
    ep_cls = ExecuteShowDocPreprocessor if show_doc_only else ExecutePreprocessor
    ep = ep_cls(timeout=600, kernel_name='python3')
    metadata = metadata or {}
    pnb = nbformat.from_dict(nb)
    ep.preprocess(pnb, metadata)
    return pnb

It's important to execute all show_doc cells before exporting the notebook to html because some of them have just been added automatically or others could have outdated links.

In [ ]:

fake_nb = {k:v for k,v in tst_nb.items() if k != 'cells'}
fake_nb['cells'] = [tst_nb['cells'][0].copy()] + added_cells
fake_nb = execute_nb(fake_nb, mod='export')
assert len(fake_nb['cells'][-1]['outputs']) > 0

Filling templates¶

The following functions automatically adds jekyll templates if they are misssing.

In [ ]:

#export
def write_tmpl(tmpl, nms, cfg, dest):
    "Write `tmpl` to `dest` (if missing) filling in `nms` in template using dict `cfg`"
    if dest.exists(): return
    vs = {o:cfg.d[o] for o in nms.split()}
    outp = tmpl.format(**vs)
    dest.write_text(outp)

In [ ]:

#export
def write_tmpls():
    "Write out _config.yml and _data/topnav.yml using templates"
    cfg = Config()
    write_tmpl(config_tmpl, 'user lib_name title copyright description', cfg, cfg.doc_path/'_config.yml')
    write_tmpl(topnav_tmpl, 'user lib_name', cfg, cfg.doc_path/'_data'/'topnav.yml')
    write_tmpl(makefile_tmpl, 'nbs_path lib_name', cfg, cfg.config_file.parent/'Makefile')

Conversion¶

In [ ]:

__file__ = Config().lib_path/'export2html.py'

In [ ]:

# export
def nbdev_exporter(cls=HTMLExporter, template_file=None):
    cfg = traitlets.config.Config()
    exporter = cls(cfg)
    exporter.exclude_input_prompt=True
    exporter.exclude_output_prompt=True
    exporter.template_file = 'jekyll.tpl' if template_file is None else template_file
    exporter.template_path.append(str(Path(__file__).parent/'templates'))
    return exporter

In [ ]:

# export
process_cells = [remove_fake_headers, remove_hidden, remove_empty]
process_cell  = [hide_cells, collapse_cells, remove_widget_state, add_jekyll_notes]

In [ ]:

# export
_re_digits = re.compile(r'^\d+\S*?_')

In [ ]:

#export
def _nb2htmlfname(nb_path, dest=None): 
    if dest is None: dest = Config().doc_path
    return Path(dest)/_re_digits.sub('', nb_path.with_suffix('.html').name)

In [ ]:

#hide
test_eq(_nb2htmlfname(Path('00a_export.ipynb')), Config().doc_path/'export.html')
test_eq(_nb2htmlfname(Path('export.ipynb')), Config().doc_path/'export.html')
test_eq(_nb2htmlfname(Path('00ab_export_module_1.ipynb')), Config().doc_path/'export_module_1.html')
test_eq(_nb2htmlfname(Path('export.ipynb'), '.'), Path('export.html'))

In [ ]:

# export
def convert_nb(fname, cls=HTMLExporter, template_file=None, exporter=None, dest=None):
    "Convert a notebook `fname` to html file in `dest_path`."
    fname = Path(fname).absolute()
    nb = read_nb(fname)
    meta_jekyll = get_metadata(nb['cells'])
    meta_jekyll['nb_path'] = str(fname.relative_to(Config().lib_path.parent))
    cls_lvl = find_default_level(nb['cells'])
    mod = find_default_export(nb['cells'])
    nb['cells'] = compose(*process_cells,partial(add_show_docs, cls_lvl=cls_lvl))(nb['cells'])
    nb['cells'] = [compose(partial(copy_images, fname=fname, dest=Config().doc_path), *process_cell, treat_backticks)(c)
                    for c in nb['cells']]
    nb = execute_nb(nb, mod=mod)
    nb['cells'] = [clean_exports(c) for c in nb['cells']]
    if exporter is None: exporter = nbdev_exporter(cls=cls, template_file=template_file)
    with open(_nb2htmlfname(fname, dest=dest),'w') as f:
        f.write(exporter.from_notebook_node(nb, resources=meta_jekyll)[0])

In [ ]:

# export
def _notebook2html(fname, cls=HTMLExporter, template_file=None, exporter=None, dest=None):
    time.sleep(random.random())
    print(f"converting: {fname}")
    try: 
        convert_nb(fname, cls=cls, template_file=template_file, exporter=exporter, dest=dest)
        return True
    except Exception as e: 
        print(e)
        return False

In [ ]:

# export
def notebook2html(fname=None, force_all=False, n_workers=None, cls=HTMLExporter, template_file=None, exporter=None, dest=None):
    "Convert all notebooks matching `fname` to html files"
    if fname is None: 
        files = [f for f in Config().nbs_path.glob('*.ipynb') if not f.name.startswith('_')]
    else: 
        p = Path(fname)
        files = list(p.parent.glob(p.name))
    if len(files)==1:
        force_all = True
        if n_workers is None: n_workers=0
    if not force_all:
        # only rebuild modified files
        files,_files = [],files.copy()
        for fname in _files:
            fname_out = _nb2htmlfname(Path(fname).absolute(), dest=dest)
            if not fname_out.exists() or os.path.getmtime(fname) >= os.path.getmtime(fname_out):
                files.append(fname)
    if len(files)==0: print("No notebooks were modified")          
    else: 
        passed = parallel(_notebook2html, files, n_workers=n_workers, cls=cls, template_file=template_file, exporter=exporter, dest=dest)
        if not all(passed):
            msg = "Conversion failed on the following:\n"
            raise Exception(msg + '\n'.join([f.name for p,f in zip(passed,files) if not p]))

In [ ]:

#hide

# Test when an argument is given to notebook2html
p1 = Path('/tmp/sync.html')
if p1.exists(): p1.unlink()
notebook2html('01_sync.ipynb', dest='/tmp');
assert p1.exists()

# Test when no argument is given to notebook2html
dest_files = [_nb2htmlfname(f, dest='/tmp') for f in Config().nbs_path.glob('*.ipynb') if not f.name.startswith('_')]
[f.unlink() for f in dest_files if f.exists()]
notebook2html(fname=None, dest='/tmp');
assert all([f.exists() for f in dest_files])


# Test Error handling
try: notebook2html('../README.md');
except Exception as e: assert True
else: assert False, 'An error should be raised when a non-notebook file is passed to notebook2html!'

converting: 01_sync.ipynb
converting: /home/sgugger/git/nbdev/nbs/02_showdoc.ipynb
converting: /home/sgugger/git/nbdev/nbs/06_cli.ipynb
converting: /home/sgugger/git/nbdev/nbs/07_clean.ipynb
converting: /home/sgugger/git/nbdev/nbs/tutorial.ipynb
converting: /home/sgugger/git/nbdev/nbs/99_search.ipynb
converting: /home/sgugger/git/nbdev/nbs/04_test.ipynb
converting: /home/sgugger/git/nbdev/nbs/03_export2html.ipynb
converting: /home/sgugger/git/nbdev/nbs/05_merge.ipynb
converting: /home/sgugger/git/nbdev/nbs/index.ipynb
converting: /home/sgugger/git/nbdev/nbs/01_sync.ipynb
converting: /home/sgugger/git/nbdev/nbs/00_export.ipynb
converting: ../README.md
Notebook does not appear to be JSON: '![](https://github.com/fastai/nbdev/wor...

Hide cells starting with #export and only leaves the prose and the tests. If fname is not specified, this will convert all notebooks not beginning with an underscore in the nb_folder defined in setting.ini. Otherwise fname can be a single filename or a glob expression.

By default, only the notebooks that are more recent than their html counterparts are modified, pass force_all=True to change that behavior.

In [ ]:

#hide
#notebook2html(force_all=True)

In [ ]:

# export
def convert_md(fname, dest_path, img_path='docs/images/', jekyll=True):
    "Convert a notebook `fname` to a markdown file in `dest_path`."
    fname = Path(fname).absolute()
    if not img_path: img_path = fname.stem + '_files/'
    Path(img_path).mkdir(exist_ok=True, parents=True)
    nb = read_nb(fname)
    meta_jekyll = get_metadata(nb['cells'])
    try: meta_jekyll['nb_path'] = str(fname.relative_to(Config().lib_path.parent))
    except: meta_jekyll['nb_path'] = str(fname)
    nb['cells'] = compose(*process_cells)(nb['cells'])
    nb['cells'] = [compose(partial(adapt_img_path, fname=fname, dest=dest_path, jekyll=jekyll), *process_cell)(c)
                   for c in nb['cells']]
    fname = Path(fname).absolute()
    dest_name = fname.with_suffix('.md').name
    exp = nbdev_exporter(cls=MarkdownExporter, template_file='jekyll-md.tpl' if jekyll else 'md.tpl')
    export = exp.from_notebook_node(nb, resources=meta_jekyll)
    md = export[0]
    for ext in ['png', 'svg']:
        md = re.sub(r'!\['+ext+'\]\((.+)\)', '!['+ext+'](' + img_path + '\\1)', md)
    with (Path(dest_path)/dest_name).open('w') as f: f.write(md)
    for n,o in export[1]['outputs'].items():
            with open(Path(dest_path)/img_path/n, 'wb') as f: f.write(o)

This is used to convert the index into the README.md.

In [ ]:

#hide
try: convert_md('index.ipynb', Path('.').absolute().parent, jekyll=False)
finally: (Path('.').absolute().parent/'index.md').unlink()

In [ ]:

#export
_re_att_ref = re.compile(r' *!\[(.*)\]\(attachment:image.png(?: "(.*)")?\)')

In [ ]:

t = '![screenshot](attachment:image.png)'
test_eq(_re_att_ref.match(t).groups(), ('screenshot', None))

t = '![screenshot](attachment:image.png "Deploying to Binder")'
test_eq(_re_att_ref.match(t).groups(), ('screenshot', "Deploying to Binder"))

In [ ]:

#export
try: from PIL import Image
except: pass # Only required for _update_att_ref

In [ ]:

#export
_tmpl_img = '<img alt="{title}" width="{width}" caption="{title}" id="{id}" src="{name}">'

def _update_att_ref(line, path, img):
    m = _re_att_ref.match(line)
    if not m: return line
    alt,title = m.groups()
    w = img.size[0]
    if alt=='screenshot': w //= 2
    if not title: title = "TK: add title"
    return _tmpl_img.format(title=title, width=str(w), id='TK: add it', name=str(path))

In [ ]:

#export
def _nb_detach_cell(cell, dest, use_img):
    att,src = cell['attachments'],cell['source']
    mime,img = first(first(att.values()).items())
    ext = mime.split('/')[1]
    for i in range(99999):
        p = dest/(f'att_{i:05d}.{ext}')
        if not p.exists(): break
    img = b64decode(img)
    p.write_bytes(img)
    del(cell['attachments'])
    if use_img:  return [_update_att_ref(o,p,Image.open(p)) for o in src]
    else: return [o.replace('attachment:image.png', str(p)) for o in src]

In [ ]:

#export
def nb_detach_cells(path_nb, dest=None, replace=True, use_img=False):
    "Export cell attachments to `dest` and update references"
    path_nb = Path(path_nb)
    if not dest: dest = f'{path_nb.stem}_files'
    dest = Path(dest)
    dest.mkdir(exist_ok=True, parents=True)
    j = json.load(path_nb.open())
    atts = [o for o in j['cells'] if 'attachments' in o]
    for o in atts: o['source'] = _nb_detach_cell(o, dest, use_img)
    if atts and replace: json.dump(j, path_nb.open('w'))
    if not replace: return j

In [ ]:

#export
import time,random,warnings

In [ ]:

#export
def _leaf(k,v):
    url = 'external_url' if "http" in v else 'url'
    #if url=='url': v=v+'.html'
    return {'title':k, url:v, 'output':'web,pdf'}

In [ ]:

#export
_k_names = ['folders', 'folderitems', 'subfolders', 'subfolderitems']
def _side_dict(title, data, level=0):
    k_name = _k_names[level]
    level += 1
    res = [(_side_dict(k, v, level) if isinstance(v,dict) else _leaf(k,v))
        for k,v in data.items()]
    return ({k_name:res} if not title
            else res if title.startswith('empty')
            else {'title': title, 'output':'web', k_name: res})

In [ ]:

#export
_re_catch_title = re.compile('^title\s*:\s*(\S+.*)$', re.MULTILINE)

In [ ]:

#export
def _get_title(fname):
    "Grabs the title of html file `fname`"
    with open(fname, 'r') as f: code = f.read()
    src =  _re_catch_title.search(code)
    return fname.stem if src is None else src.groups()[0]

In [ ]:

#hide
test_eq(_get_title(Config().doc_path/'export.html'), "Export to modules")

In [ ]:

#export
def create_default_sidebar():
    "Create the default sidebar for the docs website"
    dic = {"Overview": "/"}
    files = [f for f in Config().nbs_path.glob('*.ipynb') if not f.name.startswith('_')]
    fnames = [_nb2htmlfname(f) for f in sorted(files)]
    titles = [_get_title(f) for f in fnames if 'index' not in f.stem!='index']
    if len(titles) > len(set(titles)): print(f"Warning: Some of your Notebooks use the same title ({titles}).")
    dic.update({_get_title(f):f'/{f.stem}' for f in fnames if f.stem!='index'})
    dic = {Config().lib_name: dic}
    json.dump(dic, open(Config().doc_path/'sidebar.json', 'w'), indent=2)

The default sidebar lists all html pages with their respective title, except the index that is named "Overview". To build a custom sidebar, set the flag custom_sidebar in your settings.ini to True then change the sidebar.json file in the doc_folder to your liking. Otherwise, the sidebar is updated at each doc build.

In [ ]:

#hide
#create_default_sidebar()

In [ ]:

#export
def make_sidebar():
    "Making sidebar for the doc website form the content of `doc_folder/sidebar.json`"
    if not (Config().doc_path/'sidebar.json').exists() or Config().custom_sidebar == 'False': create_default_sidebar()
    sidebar_d = json.load(open(Config().doc_path/'sidebar.json', 'r'))
    res = _side_dict('Sidebar', sidebar_d)
    res = {'entries': [res]}
    res_s = yaml.dump(res, default_flow_style=False)
    res_s = res_s.replace('- subfolders:', '  subfolders:').replace(' - - ', '   - ')
    res_s = f"""
#################################################
### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# Instead edit {'../../sidebar.json'}
"""+res_s
    open(Config().doc_path/'_data/sidebars/home_sidebar.yml', 'w').write(res_s)

Export-¶

In [ ]:

#hide
notebook2script()

Converted 00_export.ipynb.
Converted 01_sync.ipynb.
Converted 02_showdoc.ipynb.
Converted 03_export2html.ipynb.
Converted 04_test.ipynb.
Converted 05_merge.ipynb.
Converted 06_cli.ipynb.
Converted 07_clean.ipynb.
Converted 99_search.ipynb.
Converted index.ipynb.
Converted tutorial.ipynb.

In [ ]:

Convert to html¶

Preprocessing notebook¶

Cell processors¶

Collapsable Code Cells¶

Preprocessing the list of cells¶

Grabbing metada¶

Executing show_doc cells¶

Filling templates¶

Conversion¶

Sidebar¶

Export-¶