In [1]:
%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [2]:
# Database access
from vizgrimoire.metrics.query_builder import SCMQuery, ITSQuery
# Filters to apply
from vizgrimoire.metrics.metrics_filter import MetricFilters
# Let's start playing with git activity metrics
import vizgrimoire.metrics.scm_metrics as scm
In [3]:
# Instantiate database access
# Playing with OpenStack source code database (MySQL) at 
#  http://activity.openstack.org/dash/browser/data/db/source_code.mysql.7z
# Database named as openstack_source_code_fosdem2015

user = "root"
password = ""
source_code_db = "openstack_source_code_fosdem2015"
identities_db = "openstack_source_code_fosdem2015"

dbcon = SCMQuery(user, password, source_code_db, identities_db)
In [4]:
# Instantiate some filters to play with
period = MetricFilters.PERIOD_MONTH
startdate = "'2014-01-01'"
enddate = "'2015-01-01'"

# basic filter
filters = MetricFilters(period, startdate, enddate)
# company filter
filters_company = MetricFilters(period, startdate, enddate)
filters_company.add_filter(MetricFilters.COMPANY, "Red Hat")
# company and repo filter
filters_repo_com = MetricFilters(period, startdate, enddate)
filters_repo_com.add_filter(MetricFilters.COMPANY, "Red Hat")
filters_repo_com.add_filter(MetricFilters.REPOSITORY, "nova.git")

Playing with commits

In [5]:
# Retrieving data for each filter.
# Let's start with commits
commits = scm.Commits(dbcon, filters)
commits.get_agg()
Out[5]:
{'commits': 53034L}
In [6]:
commits.get_ts()
Out[6]:
{'commits': [4826L,
  5273L,
  5182L,
  4318L,
  3645L,
  4589L,
  4824L,
  4970L,
  5050L,
  4115L,
  3006L,
  3236L],
 'date': ['Jan 2014',
  'Feb 2014',
  'Mar 2014',
  'Apr 2014',
  'May 2014',
  'Jun 2014',
  'Jul 2014',
  'Aug 2014',
  'Sep 2014',
  'Oct 2014',
  'Nov 2014',
  'Dec 2014'],
 'id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
 'month': [24169L,
  24170L,
  24171L,
  24172L,
  24173L,
  24174L,
  24175L,
  24176L,
  24177L,
  24178L,
  24179L,
  24180L],
 'unixtime': [u'1388534400',
  u'1391212800',
  u'1393632000',
  u'1396310400',
  u'1398902400',
  u'1401580800',
  u'1404172800',
  u'1406851200',
  u'1409529600',
  u'1412121600',
  u'1414800000',
  u'1417392000']}
In [7]:
plot(commits.get_ts()["commits"])
Out[7]:
[<matplotlib.lines.Line2D at 0x7fd85ad018d0>]
In [8]:
# Let's use another filter
commits_redhat = scm.Commits(dbcon, filters_company)
In [9]:
commits_redhat.get_agg()
Out[9]:
{'commits': 6551L}
In [10]:
plot(commits_redhat.get_ts()["commits"])
Out[10]:
[<matplotlib.lines.Line2D at 0x7fd85abee350>]
In [11]:
# Let's focus on an organization and a repository
commits_redhat_nova = scm.Commits(dbcon, filters_repo_com)
filters_repo_com.type_analysis
Out[11]:
['company,repository', "'Red Hat','nova.git'"]
In [12]:
#Example of sql
commits_redhat_nova._get_sql(True)
Out[12]:
"SELECT YEAR( s.author_date )*12+MONTH( s.author_date ) AS month, count(distinct(s.rev)) as commits FROM openstack_source_code_fosdem2015.upeople_companies upcom , openstack_source_code_fosdem2015.companies com , scmlog s , people_upeople pup , repositories r WHERE  s.author_date >='2014-01-01' AND  s.author_date <'2015-01-01' AND r.name ='nova.git' and s.id IN (select distinct(a.commit_id) from actions a) and upcom.company_id = com.id and com.name ='Red Hat' and s.author_date >= upcom.init and pup.upeople_id = upcom.upeople_id and r.id = s.repository_id and s.author_id = pup.people_id and s.author_date < upcom.end GROUP BY  YEAR( s.author_date ),MONTH( s.author_date ) ORDER BY YEAR( s.author_date ),MONTH( s.author_date )"
In [13]:
plot(commits_redhat_nova.get_ts()["commits"])
Out[13]:
[<matplotlib.lines.Line2D at 0x7fd85ab2a990>]

Playing with Authors

In [14]:
authors = scm.Authors(dbcon, filters)
In [15]:
authors.get_agg()
Out[15]:
{'authors': 1994L}
In [16]:
plot(authors.get_ts()["authors"])
Out[16]:
[<matplotlib.lines.Line2D at 0x7fd85aa56a50>]
In [17]:
# let's list top 10 authors
authors.get_list()
Out[17]:
{'authors': ['Jenkins',
  'OpenStack Project Creator',
  'Andreas Jaeger',
  'OpenStack Jenkins',
  'Jeremy Stanley',
  'James E. Blair',
  'Sean Dague',
  'Clark Boylan',
  'Kevin Benton',
  'Christian Berendt'],
 'commits': [39333L, 2548L, 1605L, 930L, 903L, 875L, 806L, 702L, 654L, 560L],
 'id': [2L, 239L, 194L, 135L, 294L, 15L, 49L, 7L, 567L, 361L]}
In [18]:
# Let's remove some bots
filters.people_out = ["Jenkins","OpenStack Jenkins", "OpenStack Project Creator"]
In [19]:
authors = scm.Authors(dbcon, filters)
In [20]:
authors.get_list()
Out[20]:
{'authors': ['Andreas Jaeger',
  'Jeremy Stanley',
  'James E. Blair',
  'Sean Dague',
  'Clark Boylan',
  'Kevin Benton',
  'Christian Berendt',
  'Joshua Harlow',
  'Brant Knudson',
  'Joe Gordon'],
 'commits': [1605L, 903L, 875L, 806L, 702L, 654L, 560L, 537L, 514L, 508L],
 'id': [194L, 294L, 15L, 49L, 7L, 567L, 361L, 159L, 391L, 36L]}

More in depth analysis: community structure

In [21]:
# This community structure analysis characterizes developers
# by their activity.
# Core developers are those committing the 80% of the changes
# Regular developers are those committing the next 15% of the changes
# And occasional ones are those committing the rest 5%
from vizgrimoire.analysis.onion_model import CommunityStructure
In [22]:
onion = CommunityStructure(dbcon, filters)
In [23]:
print onion.result()
{'occasional': 1247, 'core': 224, 'regular': 523}

Let's focus on ticketing activity (and efficiency)

In [24]:
import vizgrimoire.metrics.its_metrics as its
In [25]:
# Instantiate database access
# Playing with OpenStack tickets database (MySQL) at 
#  http://activity.openstack.org/dash/browser/data/db/tickets.mysql.7z
# Database named as openstack_tickets_fosdem2015

user = "root"
password = ""
source_code_db = "openstack_tickets_fosdem2015"
identities_db = "openstack_source_code_fosdem2015"

dbcon = ITSQuery(user, password, source_code_db, identities_db)
In [30]:
# Instantiate some filters to play with
period = MetricFilters.PERIOD_MONTH
startdate = "'2010-01-01'"
enddate = "'2012-01-01'"

# basic filter
filters = MetricFilters(period, startdate, enddate)
In [31]:
from vizgrimoire.ITS import ITS
ITS.set_backend("lp")
closed_tickets = its.Closed(dbcon, filters)
opened_tickets = its.Opened(dbcon, filters)
In [32]:
closed_tickets.get_agg()
Out[32]:
{'closed': 2038L}
In [33]:
opened_tickets.get_agg()
Out[33]:
{'opened': 4040L}
In [34]:
# Let's see the efficienciy of the community closing tickets (BMI index)
# This is calculated as the percentage of tickets closed out of the 
# opened ones. Values over 100, indicates that the community is closing
# more tickets than opening.
bmi = its.BMIIndex(dbcon, filters)
plot(bmi.get_ts()["bmitickets"])
Out[34]:
[<matplotlib.lines.Line2D at 0x7fd85a8d0590>]
In [35]:
# Let's look for a specific project: all integrated projects in OpenStack
filters.add_filter(filters.PROJECT, "integrated")
In [36]:
bmi = its.BMIIndex(dbcon, filters)
In [37]:
plot(bmi.get_ts()["bmitickets"])
Out[37]:
[<matplotlib.lines.Line2D at 0x7fd85a87ddd0>]