import pandas as pd
data = pd.read_csv("../demos/dataset/linux_blame_log.csv")
data['author'] = pd.Categorical(data.author)
data.head()
path | author | timestamp | line | |
---|---|---|---|---|
0 | drivers/scsi/bfa/bfad_drv.h | Anil Gurumurthy | 1448528085000000000 | 1 |
1 | drivers/scsi/bfa/bfad_drv.h | Anil Gurumurthy | 1448528085000000000 | 2 |
2 | drivers/scsi/bfa/bfad_drv.h | Anil Gurumurthy | 1448528085000000000 | 3 |
3 | drivers/scsi/bfa/bfad_drv.h | Jing Huang | 1253753175000000000 | 4 |
4 | drivers/scsi/bfa/bfad_drv.h | Anil Gurumurthy | 1448528085000000000 | 5 |
grouped_files = data.groupby(['path', 'author']).line.count()
grouped_files.head(10)
path author arch/arc/kernel/time.c Anna-Maria Gleixner 13 Daniel Lezcano 31 Noam Camus 18 Vineet Gupta 243 Viresh Kumar 6 arch/arm/common/timer-sp.c Linus Walleij 3 Michael Opdenacker 1 Rob Herring 111 Russell King 44 Stephen Boyd 3 Name: line, dtype: int64
import pygal
from pygal import Config
config = Config()
config.show_legend = False
config.human_readable = True
config.fill = True
config.margin = 0
config.width = 200
config.height = 200
config.width = 20
config.height = 20
for group in grouped_files.groupby(level=0):
treemap = pygal.Treemap(config)
filename = group[0].replace("/", "-").replace(".", "_").strip() + ".png"
data = group[1].reset_index()
for item in data.iterrows():
entry = item[1]
treemap.add(entry['author'], entry['line'])
treemap.render_to_png('treemaps_mini/' + filename)
grouped = data.groupby('path').author.nunique()
grouped.head()
path drivers/scsi/bfa/bfad_im.c 15 Name: author, dtype: int64
import pygal
treemap = pygal.Treemap()
for entry in grouped.iteritems():
treemap.add(entry[0], entry[1])
treemap.render_to_file("test.svg")