#!/usr/bin/env python
# coding: utf-8
# # Pages report
# This notebook is a workaround to build indivual page reports. It defines the main functions to display a report (`display_report(pagename)`). That allow other notebooks to import and explore the various data about the computed data from wikipedia pages and subsequent relationshipts (networks of users-pages, pages-pages and users-users). It also include synthesis of time wise analysis like page views analytics.
#
# This page is mainly used by the [page explorer](page explorer.ipynb) notebook.
# In[1]:
get_ipython().run_line_magic('run', '"libraries.ipynb"')
get_ipython().run_line_magic('config', "InlineBackend.figure_formats=['svg']")
import networkx as nx
from IPython.display import display, HTML
# # importing datasets
# In[2]:
# list of page names
pages = codecs.open("data/pagenames.txt","r", "utf-8-sig").readlines()
pages = map(lambda x: x.strip(), pages)
# page graph obtained by projecting page-editor bi-partite graph
pages_graph = nx.read_gexf("data/pages-linked-by-coeditors.gexf")
# page graph obtained by projecting page-editor bi-partite graph
pages_editors_graph = nx.read_gexf("data/pages-editors.gexf")
# In[3]:
def table_to_html(data, cols=[]):
html = "
"
html += ""
for column_content in cols:
html += "%s | " % (column_content)
html += "
"
for d in data:
html += ""
for column_content in d:
html += "%s | " % (column_content)
html += "
"
html += "
"
return HTML(html)
# ## top editors
# In[4]:
def display_top_editors(page):
top_editors = pages_editors_graph["p:%s" % (page)]
top_editors = sorted(top_editors.items(), key=lambda x: (-x[1]["revisions"], -pages_editors_graph.node[x[0]]["revisions"]))
# print top_editors
data = []
for name, edits in top_editors[0:10]:
data.append(["{0}".format(name.split(":")[1]),
edits["revisions"],
pages_editors_graph.node[name]["revisions"]])
display(table_to_html(data, ["editor name", "edits on that page", "edits over the corpus"]))
display_top_editors("Pi")
# ## pageviews and revisions
# In[5]:
def display_pageviews_revisions(page):
pageviews = pd.DataFrame.from_csv("data/pageviews/%s.weekly.csv" % (page))
revisions = pd.DataFrame.from_csv("data/revisions/%s.weekly.csv" % (page))
pageviews.plot(figsize=(12, 2), subplots=False, linewidth="0.5", ylim=0, colormap="Spectral", rot=0)
revisions.plot(figsize=(12, 2), linewidth="0.5", ylim=0)
plt.show()
display_pageviews_revisions("Pi")
# ## local graph
# In[6]:
def display_local_graph(page):
g1 = nx.read_gexf("data/reading_maps/pages-coedited-reduced-3.gexf")
nbunch = [ page ]
nbunch.extend( list(g1.to_undirected()[page]))
g2 = g1.subgraph(nbunch)
#nx.draw_spring(g2)
pos = nx.spring_layout(g2,iterations=50)
nx.draw_networkx_nodes(g2, pos)
nx.draw_networkx_edges(g2, pos)
nx.draw_networkx_labels(g2, pos)
plt.axis('off')
plt.show()
display_local_graph("Paraboloid")
# # final report
# In[7]:
def display_report(page):
display(HTML("%s
" % (page)))
#display(HTML(""))
display(HTML("
co-edited pages
"))
nb = sorted(pages_graph["p:%s" % (page)].items(),
key=lambda (k,x): -int(x["coeditors"]))
data = []
# calculate rank in neighbor top co-edited ranking
for name, info in nb:
nb_mirror = sorted(pages_graph[name].items(),
key=lambda (k,x): -int(x["coeditors"]))
nb_mirror = [ x[0] for x in nb_mirror ]
editors = pages_editors_graph[name]
info["editors"] = len(editors)
info["exclusive editors"] = len([n for n in editors if len(pages_editors_graph[n]) == 1 ])
info["ranking"] = nb_mirror.index("p:%s" % (page)) + 1
#print nb
for name, info in nb[0:10]:
data.append([ u"
{0}".format(name.split(":")[1]),
info["editors"],
info["coeditors"],
float(info["coeditors"]) / float(info["editors"]),
info["exclusive editors"],
info["ranking"]])
display(table_to_html(data, ["page name", "editors", "co-editors", "co-editors/editors", "exclusive editors" ,"ranking"]))
#display(HTML("
"))
#display(HTML(""))
display(HTML("
ranked first in
"))
nb_list = [ x[0] for x in nb ]
data = []
nb2 = sorted(nb, key=lambda (x): x[1]["ranking"])
for name, info in nb2[0:10]:
editors = pages_editors_graph[name]
info["editors"] = len(editors)
info["exclusive editors"] = len([n for n in editors if len(pages_editors_graph[n]) == 1 ])
data.append([ u"
{0}".format(name.split(":")[1]),
info["editors"],
info["coeditors"],
float(info["coeditors"]) / float(info["editors"]),
info["exclusive editors"],
info["ranking"]])
display(table_to_html(data, ["page name", "editors", "co-editors", "co-editors/editors", "exclusive editors" ,"ranking"]))
# display(HTML("
"))
# display(HTML(""))
display(HTML("pageviews and revisions
"))
display_pageviews_revisions(page)
display(HTML("top editors
"))
display_top_editors(page)
display(HTML("local subgraph
"))
display_local_graph(page)
#display_report("3-sphere")
# In[8]:
if __name__ == "__main__":
display_report("3-sphere")