#!/usr/bin/env python # coding: utf-8 # In[7]: import pandas as pd git_log = pd.read_csv( "../../../spring-petclinic/git_knowledge.log", names=['additions', 'deletions', 'filename', 'author'], sep='\t') git_log['author'] = git_log['author'].ffill() git_log = git_log.dropna() git_log.head() # In[30]: git_log[git_log['filename'] == ".classpath"] # In[40]: git_log[git_log['filename'] == ".bowerrc"] # In[53]: get_ipython().run_line_magic('pinfo', 'pd.DataFrame.var') # In[90]: git_log['aid'] = pd.factorize(git_log.author)[0] gs = git_log.groupby('filename')['author'] # pd.DataFrame(gs.ndim() / gs.count()) gs. # In[70]: git_log[git_log['filename'] == ".bowerrc"] # In[79]: git_log[git_log['filename'] == ".editorconfig"] # In[42]: git_log[git_log['filename'] == ".gitignore"] # In[38]: gb = git_log.groupby(['filename', 'author']).count() gb.groupby(gb.index.get_level_values(0)).var() # In[18]: ownership = git_log.groupby(['filename', 'author']).count() ownership.groupby( [ownership.index.get_level_values(0)]).sum()