%pylab inline
Populating the interactive namespace from numpy and matplotlib
# Database access
from vizgrimoire.metrics.query_builder import SCMQuery, ITSQuery
# Filters to apply
from vizgrimoire.metrics.metrics_filter import MetricFilters
# Let's start playing with git activity metrics
import vizgrimoire.metrics.scm_metrics as scm
# Instantiate database access
# Playing with OpenStack source code database (MySQL) at
# http://activity.openstack.org/dash/browser/data/db/source_code.mysql.7z
# Database named as openstack_source_code_fosdem2015
user = "root"
password = ""
source_code_db = "openstack_source_code_fosdem2015"
identities_db = "openstack_source_code_fosdem2015"
dbcon = SCMQuery(user, password, source_code_db, identities_db)
# Instantiate some filters to play with
period = MetricFilters.PERIOD_MONTH
startdate = "'2014-01-01'"
enddate = "'2015-01-01'"
# basic filter
filters = MetricFilters(period, startdate, enddate)
# company filter
filters_company = MetricFilters(period, startdate, enddate)
filters_company.add_filter(MetricFilters.COMPANY, "Red Hat")
# company and repo filter
filters_repo_com = MetricFilters(period, startdate, enddate)
filters_repo_com.add_filter(MetricFilters.COMPANY, "Red Hat")
filters_repo_com.add_filter(MetricFilters.REPOSITORY, "nova.git")
# Retrieving data for each filter.
# Let's start with commits
commits = scm.Commits(dbcon, filters)
commits.get_agg()
{'commits': 53034L}
commits.get_ts()
{'commits': [4826L, 5273L, 5182L, 4318L, 3645L, 4589L, 4824L, 4970L, 5050L, 4115L, 3006L, 3236L], 'date': ['Jan 2014', 'Feb 2014', 'Mar 2014', 'Apr 2014', 'May 2014', 'Jun 2014', 'Jul 2014', 'Aug 2014', 'Sep 2014', 'Oct 2014', 'Nov 2014', 'Dec 2014'], 'id': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 'month': [24169L, 24170L, 24171L, 24172L, 24173L, 24174L, 24175L, 24176L, 24177L, 24178L, 24179L, 24180L], 'unixtime': [u'1388534400', u'1391212800', u'1393632000', u'1396310400', u'1398902400', u'1401580800', u'1404172800', u'1406851200', u'1409529600', u'1412121600', u'1414800000', u'1417392000']}
plot(commits.get_ts()["commits"])
[<matplotlib.lines.Line2D at 0x7fd85ad018d0>]
# Let's use another filter
commits_redhat = scm.Commits(dbcon, filters_company)
commits_redhat.get_agg()
{'commits': 6551L}
plot(commits_redhat.get_ts()["commits"])
[<matplotlib.lines.Line2D at 0x7fd85abee350>]
# Let's focus on an organization and a repository
commits_redhat_nova = scm.Commits(dbcon, filters_repo_com)
filters_repo_com.type_analysis
['company,repository', "'Red Hat','nova.git'"]
#Example of sql
commits_redhat_nova._get_sql(True)
"SELECT YEAR( s.author_date )*12+MONTH( s.author_date ) AS month, count(distinct(s.rev)) as commits FROM openstack_source_code_fosdem2015.upeople_companies upcom , openstack_source_code_fosdem2015.companies com , scmlog s , people_upeople pup , repositories r WHERE s.author_date >='2014-01-01' AND s.author_date <'2015-01-01' AND r.name ='nova.git' and s.id IN (select distinct(a.commit_id) from actions a) and upcom.company_id = com.id and com.name ='Red Hat' and s.author_date >= upcom.init and pup.upeople_id = upcom.upeople_id and r.id = s.repository_id and s.author_id = pup.people_id and s.author_date < upcom.end GROUP BY YEAR( s.author_date ),MONTH( s.author_date ) ORDER BY YEAR( s.author_date ),MONTH( s.author_date )"
plot(commits_redhat_nova.get_ts()["commits"])
[<matplotlib.lines.Line2D at 0x7fd85ab2a990>]
authors = scm.Authors(dbcon, filters)
authors.get_agg()
{'authors': 1994L}
plot(authors.get_ts()["authors"])
[<matplotlib.lines.Line2D at 0x7fd85aa56a50>]
# let's list top 10 authors
authors.get_list()
{'authors': ['Jenkins', 'OpenStack Project Creator', 'Andreas Jaeger', 'OpenStack Jenkins', 'Jeremy Stanley', 'James E. Blair', 'Sean Dague', 'Clark Boylan', 'Kevin Benton', 'Christian Berendt'], 'commits': [39333L, 2548L, 1605L, 930L, 903L, 875L, 806L, 702L, 654L, 560L], 'id': [2L, 239L, 194L, 135L, 294L, 15L, 49L, 7L, 567L, 361L]}
# Let's remove some bots
filters.people_out = ["Jenkins","OpenStack Jenkins", "OpenStack Project Creator"]
authors = scm.Authors(dbcon, filters)
authors.get_list()
{'authors': ['Andreas Jaeger', 'Jeremy Stanley', 'James E. Blair', 'Sean Dague', 'Clark Boylan', 'Kevin Benton', 'Christian Berendt', 'Joshua Harlow', 'Brant Knudson', 'Joe Gordon'], 'commits': [1605L, 903L, 875L, 806L, 702L, 654L, 560L, 537L, 514L, 508L], 'id': [194L, 294L, 15L, 49L, 7L, 567L, 361L, 159L, 391L, 36L]}
# This community structure analysis characterizes developers
# by their activity.
# Core developers are those committing the 80% of the changes
# Regular developers are those committing the next 15% of the changes
# And occasional ones are those committing the rest 5%
from vizgrimoire.analysis.onion_model import CommunityStructure
onion = CommunityStructure(dbcon, filters)
print onion.result()
{'occasional': 1247, 'core': 224, 'regular': 523}
import vizgrimoire.metrics.its_metrics as its
# Instantiate database access
# Playing with OpenStack tickets database (MySQL) at
# http://activity.openstack.org/dash/browser/data/db/tickets.mysql.7z
# Database named as openstack_tickets_fosdem2015
user = "root"
password = ""
source_code_db = "openstack_tickets_fosdem2015"
identities_db = "openstack_source_code_fosdem2015"
dbcon = ITSQuery(user, password, source_code_db, identities_db)
# Instantiate some filters to play with
period = MetricFilters.PERIOD_MONTH
startdate = "'2010-01-01'"
enddate = "'2012-01-01'"
# basic filter
filters = MetricFilters(period, startdate, enddate)
from vizgrimoire.ITS import ITS
ITS.set_backend("lp")
closed_tickets = its.Closed(dbcon, filters)
opened_tickets = its.Opened(dbcon, filters)
closed_tickets.get_agg()
{'closed': 2038L}
opened_tickets.get_agg()
{'opened': 4040L}
# Let's see the efficienciy of the community closing tickets (BMI index)
# This is calculated as the percentage of tickets closed out of the
# opened ones. Values over 100, indicates that the community is closing
# more tickets than opening.
bmi = its.BMIIndex(dbcon, filters)
plot(bmi.get_ts()["bmitickets"])
[<matplotlib.lines.Line2D at 0x7fd85a8d0590>]
# Let's look for a specific project: all integrated projects in OpenStack
filters.add_filter(filters.PROJECT, "integrated")
bmi = its.BMIIndex(dbcon, filters)
plot(bmi.get_ts()["bmitickets"])
[<matplotlib.lines.Line2D at 0x7fd85a87ddd0>]