%pylab inline import requests import pandas as pd import numpy as np import matplotlib.pyplot as plt from StringIO import StringIO from datetime import datetime bugs_url = "http://bugs.mysql.com/search-csv.php?status=all&severity=all&mine=9242646" bugs_req = requests.get(bugs_url) csv_file = StringIO() csv_file.write(bugs_req.text) csv_file.seek(0) bugsdf = pd.read_csv(csv_file) bugsdf.head() fig = plt.figure(figsize=(8,10), dpi=100) bugsdf.Type.value_counts(ascending=True).plot(kind='barh') def getversion(inputver): if isinstance(inputver, float): return inputver return inputver[:3] bugsdf['major_version'] = bugsdf.Version.map(getversion) compstat = bugsdf.groupby('major_version').Type.value_counts().unstack().T fig = plt.figure(figsize=(15,10), dpi=100) plt.pcolor(compstat, vmin=0, vmax=5, cmap='Blues') plt.yticks(np.arange(0.5, len(compstat.index), 1), compstat.index) plt.xticks(np.arange(0.5, len(compstat.columns), 1), compstat.columns) fig = plt.figure(figsize=(8,10), dpi=100) plt.pcolor(compstat, vmin=0, vmax=10, cmap='Blues') plt.yticks(np.arange(0.5, len(compstat.index), 1), compstat.index) plt.xticks(np.arange(0.5, len(compstat.columns), 1), compstat.columns) bugsdf[bugsdf.major_version == '5.6'].Status.value_counts().plot(kind='bar') bugsdf.groupby('major_version').Status.value_counts().unstack().plot(kind='barh', stacked=True) bugsdf[bugsdf.Status != 'Closed'].groupby('major_version').Status.value_counts().unstack().plot(kind='bar', stacked=True) bugsdf['Entered'] = pd.to_datetime(bugsdf.Entered) bugsdf['Modified'] = pd.to_datetime(bugsdf.Modified) bugsdf[-bugsdf.Status.isin(['Closed', 'Duplicate', 'Won\'t fix', 'Can\'t repeat'])].sort(columns='Entered')[:5] bugsdf[-bugsdf.Status.isin(['Closed', 'Duplicate', 'Won\'t fix', 'Can\'t repeat'])] \ [bugsdf.Modified < datetime(2013, 6, 1)] \ [bugsdf.Modified > datetime(1970, 1, 1)]