import importnb
from pathlib import Path
from pandas import *
import mimetypes
from toolz.curried import *
from poser import *
from toolz.curried import *
mimetypes.add_type('application/x-ipynb+json', '.ipynb')
mimetypes.add_type('text/markdown', '.md')
def files_by_type():
contents = list(filter(
compose(all, juxt(
Path.is_file,
lambda x: all(
s not in str(x)for s in ('__pycache__', 'checkpoint')
))),
Path(importnb.__file__).parent.parent.rglob('*')))
return Series(pipe(
contents, map(compose(first, mimetypes.guess_type, str))
), index=contents, name='mime').to_frame().reset_index().set_index('mime').pipe(
lambda df: df.groupby(df.index).agg(compose(list, unique))
)['index'].apply(Series)
def count_lines(str):
significant, empty, comment = 0, 0, 0
for line in str.splitlines():
if line.strip():
if line.lstrip().startswith('#'): comment +=1
else: significant += 1
else: empty += 1
return Series(dict(zip(('significant', 'empty', 'comment'), (significant, empty, comment))))
import json
from pandas import concat as Concat
from IPython.utils.capture import capture_output
def nb_info(df):
"""Number of cells maybe"""
notebooks = df.loc['application/x-ipynb+json'].dropna().apply(compose(
Series, get('cells'), json.loads, Path.read_text
)).set_index(df.loc['application/x-ipynb+json'].dropna().values).stack().apply(
lambda x: Series({'cell_type': x['cell_type'], 'source': '\n'.join(x.get('source'))})
).set_index('cell_type', append=True).reorder_levels((2,0,1))
notebooks = notebooks.reset_index(0, drop=True).join(
notebooks.loc['code']['source'].apply(count_lines)
).set_index(notebooks.index).unstack(-1)
return notebooks['significant'].sum(axis=1).unstack(0).fillna(0)['code']
from IPython import get_ipython
def iterate_over_the_project(project, max=10, iter=20, **loc):
repo, project = project
if not Path(project).exists():
!git clone $repo
!cd $project && git pull
for i in range(max):
with capture_output() as time:
!cd $project && git log -1 --format=%cd --date=local
loc[to_datetime(time.stdout)] = compose(nb_info, files_by_type)()
!cd $project && git reset --hard HEAD~$iter
!cd $project && git pull
return loc
def _test_with_importnb():
assert iterate_over_the_project(('https://github.com/deathbeds/importnb', 'importnb'))
fatal: repository 'None' does not exist The system cannot find the path specified.
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-11-77f619f20ded> in <module>() ----> 1 iterate_over_the_project((None, 'importnb')) <ipython-input-10-ca5d7888dc97> in iterate_over_the_project(project, **loc) 4 get_ipython().system('git clone $repo') 5 get_ipython().system('cd $project && git pull ') ----> 6 for i in range(max): 7 with capture_output() as time: 8 get_ipython().system('cd $project && git log -1 --format=%cd --date=local ') TypeError: 'builtin_function_or_method' object cannot be interpreted as an integer
if __name__ == '__main__':
%matplotlib inline
Concat(loc).unstack(-1).fillna(0).sum(axis=1).plot()