A limitation of the current notebook importing systems is that we cannot view the code of imported modules. Both importnb
and The simplest path hook importer have this problem.
Ø = __name__ == '__main__'
from . import __The_simplest_path_hook_importer_for_a_notebook; from inspect import getsource
At the module level we can recover the source, and it is in fact json
.
with open(__The_simplest_path_hook_importer_for_a_notebook.__file__) as file: assert __import__('json').load(file)
However, at the class or function level Python is unable to discover the source.
Ø and getsource(__The_simplest_path_hook_importer_for_a_notebook.load_ipython_extension)
This ultimately effects the interactive experience with the IPython source inspector.
Ø and get_ipython().inspector.psource(__The_simplest_path_hook_importer_for_a_notebook.load_ipython_extension)
The current problem we are facing in that the getsource
discovery doesn't have the correct regular expressions to discover source code in json lines. These lines are discovered in the linecache
which stores information about files that are loaded in as modules.
To return the source with getsource
must replace assure that valid code is in the linecache
rather than the raw json
. And, we must assure that the source code lines the original json file align with the lines in the linecache
.
import linecache
json
string a sparse code blob.¶Our custom json decoder will record the slices from the original string where source code exists. These slices allow us to create an aligned string later.
def scanstring(s, end, strict=True, **kwargs):
s, id = py_scanstring(s, end, strict, **kwargs)
return (slice(end, id), s), id
The object_pairs_hook
filters the import parts of the decoded source for us. We are ultimately working with a list pairs of slices and source code.
def object_pairs_hook(object) -> (slice, str):
object = dict(object)
if 'cells' in object: return object['cells']
if 'cell_type' in object:
_, object['cell_type'] = object['cell_type']
if 'source' in object:
if object['source']:
source = ''.join(_[1] for _ in object['source'])
if object['cell_type'] == 'markdown':
source = "'''" + source + "'''"
object['cell_type'] = 'code'
if object['cell_type'] == 'code':
return slice(object['source'][0][0].start, object['source'][-1][0].stop), source
return slice(None), None
json
decoder¶This machinery supervises the decoding of a notebook on disk to source. The source file is aligned with the code by filling in non code with whitespace. The source code of a notebook will be very sparse.
from json.scanner import py_make_scanner
from json.decoder import JSONDecoder, WHITESPACE, WHITESPACE_STR, JSONObject, py_scanstring
class LineCacheDecoder(JSONDecoder):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.parse_string = scanstring
self.scan_once = py_make_scanner(self)
def decode(self, object, filename, transform=__import__('textwrap').dedent):
lines = []
linecache.updatecache(filename)
linecache.cache[filename] = *linecache.cache[filename][:2], lines, filename
last, new, old = slice(0, 0), 0, 0
for current, cell in super().decode(object):
if cell:
lines += ['\n'] * (object[last.stop:current.start].splitlines().__len__() - 1 + (old-new))
transformed = transform(cell)
lines += list(map("{}\n".format, transformed.splitlines()))
new, old = map(len, map(str.splitlines, (transformed, object[current])))
if not lines[-1]: lines.pop()
last = current
return ''.join(lines)
decoder = LineCacheDecoder(object_pairs_hook=object_pairs_hook)
This decoder will update the linecache
with valid python source to be used for introspection.
... invokes the decoder
.
from importlib.machinery import SourceFileLoader; from importlib.util import decode_source
class FlatSourceLoader(SourceFileLoader):
def get_data(self, path):
return decoder.decode(
decode_source(super().get_data(self.path)), self.path,
get_ipython().input_transformer_manager.transform_cell)
get_source = get_data
def load_ipython_extension(ip=None):
__The_simplest_path_hook_importer_for_a_notebook.load_ipython_extension(None, FlatSourceLoader)
Ø and load_ipython_extension()
The new decoder and source file loader makes the notebook source code inspectable.
from pytest import fixture, raises
@fixture
def module(): import Untitled; return __import__('importlib').reload(Untitled)
black
can clean up the sparse source created by the decoder.
def _is_a_valid_module(module):
assert module.__file__.endswith('.ipynb')
assert __import__('ast').parse(__import__('black').format_str(getsource(module.test_thing), 100))
assert __import__('ast').parse(__import__('black').format_str(getsource(module), 100))
with raises(__import__('json').JSONDecodeError):
__import__('json').loads(getsource(module))