#!/usr/bin/env python # coding: utf-8 # # Counting lines of code in a git repo. # In[1]: import importnb from pathlib import Path from pandas import * import mimetypes from toolz.curried import * from poser import * from toolz.curried import * # In[2]: mimetypes.add_type('application/x-ipynb+json', '.ipynb') mimetypes.add_type('text/markdown', '.md') # In[3]: def files_by_type(): contents = list(filter( compose(all, juxt( Path.is_file, lambda x: all( s not in str(x)for s in ('__pycache__', 'checkpoint') ))), Path(importnb.__file__).parent.parent.rglob('*'))) return Series(pipe( contents, map(compose(first, mimetypes.guess_type, str)) ), index=contents, name='mime').to_frame().reset_index().set_index('mime').pipe( lambda df: df.groupby(df.index).agg(compose(list, unique)) )['index'].apply(Series) # In[4]: def count_lines(str): significant, empty, comment = 0, 0, 0 for line in str.splitlines(): if line.strip(): if line.lstrip().startswith('#'): comment +=1 else: significant += 1 else: empty += 1 return Series(dict(zip(('significant', 'empty', 'comment'), (significant, empty, comment)))) # In[8]: import json from pandas import concat as Concat from IPython.utils.capture import capture_output # In[9]: def nb_info(df): """Number of cells maybe""" notebooks = df.loc['application/x-ipynb+json'].dropna().apply(compose( Series, get('cells'), json.loads, Path.read_text )).set_index(df.loc['application/x-ipynb+json'].dropna().values).stack().apply( lambda x: Series({'cell_type': x['cell_type'], 'source': '\n'.join(x.get('source'))}) ).set_index('cell_type', append=True).reorder_levels((2,0,1)) notebooks = notebooks.reset_index(0, drop=True).join( notebooks.loc['code']['source'].apply(count_lines) ).set_index(notebooks.index).unstack(-1) return notebooks['significant'].sum(axis=1).unstack(0).fillna(0)['code'] # In[ ]: # In[12]: from IPython import get_ipython # In[13]: def iterate_over_the_project(project, max=10, iter=20, **loc): repo, project = project if not Path(project).exists(): get_ipython().system('git clone $repo') get_ipython().system('cd $project && git pull') for i in range(max): with capture_output() as time: get_ipython().system('cd $project && git log -1 --format=%cd --date=local') loc[to_datetime(time.stdout)] = compose(nb_info, files_by_type)() get_ipython().system('cd $project && git reset --hard HEAD~$iter') get_ipython().system('cd $project && git pull') return loc # In[11]: def _test_with_importnb(): assert iterate_over_the_project(('https://github.com/deathbeds/importnb', 'importnb')) # In[16]: if __name__ == '__main__': get_ipython().run_line_magic('matplotlib', 'inline') Concat(loc).unstack(-1).fillna(0).sum(axis=1).plot() # In[ ]: