#!/usr/bin/env python # coding: utf-8 # # Pages report # This notebook is a workaround to build indivual page reports. It defines the main functions to display a report (`display_report(pagename)`). That allow other notebooks to import and explore the various data about the computed data from wikipedia pages and subsequent relationshipts (networks of users-pages, pages-pages and users-users). It also include synthesis of time wise analysis like page views analytics. # # This page is mainly used by the [page explorer](page explorer.ipynb) notebook. # In[1]: get_ipython().run_line_magic('run', '"libraries.ipynb"') get_ipython().run_line_magic('config', "InlineBackend.figure_formats=['svg']") import networkx as nx from IPython.display import display, HTML # # importing datasets # In[2]: # list of page names pages = codecs.open("data/pagenames.txt","r", "utf-8-sig").readlines() pages = map(lambda x: x.strip(), pages) # page graph obtained by projecting page-editor bi-partite graph pages_graph = nx.read_gexf("data/pages-linked-by-coeditors.gexf") # page graph obtained by projecting page-editor bi-partite graph pages_editors_graph = nx.read_gexf("data/pages-editors.gexf") # In[3]: def table_to_html(data, cols=[]): html = "" html += "" for column_content in cols: html += "" % (column_content) html += "" for d in data: html += "" for column_content in d: html += "" % (column_content) html += "" html += "
%s
%s
" return HTML(html) # ## top editors # In[4]: def display_top_editors(page): top_editors = pages_editors_graph["p:%s" % (page)] top_editors = sorted(top_editors.items(), key=lambda x: (-x[1]["revisions"], -pages_editors_graph.node[x[0]]["revisions"])) # print top_editors data = [] for name, edits in top_editors[0:10]: data.append(["{0}".format(name.split(":")[1]), edits["revisions"], pages_editors_graph.node[name]["revisions"]]) display(table_to_html(data, ["editor name", "edits on that page", "edits over the corpus"])) display_top_editors("Pi") # ## pageviews and revisions # In[5]: def display_pageviews_revisions(page): pageviews = pd.DataFrame.from_csv("data/pageviews/%s.weekly.csv" % (page)) revisions = pd.DataFrame.from_csv("data/revisions/%s.weekly.csv" % (page)) pageviews.plot(figsize=(12, 2), subplots=False, linewidth="0.5", ylim=0, colormap="Spectral", rot=0) revisions.plot(figsize=(12, 2), linewidth="0.5", ylim=0) plt.show() display_pageviews_revisions("Pi") # ## local graph # In[6]: def display_local_graph(page): g1 = nx.read_gexf("data/reading_maps/pages-coedited-reduced-3.gexf") nbunch = [ page ] nbunch.extend( list(g1.to_undirected()[page])) g2 = g1.subgraph(nbunch) #nx.draw_spring(g2) pos = nx.spring_layout(g2,iterations=50) nx.draw_networkx_nodes(g2, pos) nx.draw_networkx_edges(g2, pos) nx.draw_networkx_labels(g2, pos) plt.axis('off') plt.show() display_local_graph("Paraboloid") # # final report # In[7]: def display_report(page): display(HTML("

%s

" % (page))) #display(HTML("
")) display(HTML("

co-edited pages

")) nb = sorted(pages_graph["p:%s" % (page)].items(), key=lambda (k,x): -int(x["coeditors"])) data = [] # calculate rank in neighbor top co-edited ranking for name, info in nb: nb_mirror = sorted(pages_graph[name].items(), key=lambda (k,x): -int(x["coeditors"])) nb_mirror = [ x[0] for x in nb_mirror ] editors = pages_editors_graph[name] info["editors"] = len(editors) info["exclusive editors"] = len([n for n in editors if len(pages_editors_graph[n]) == 1 ]) info["ranking"] = nb_mirror.index("p:%s" % (page)) + 1 #print nb for name, info in nb[0:10]: data.append([ u"{0}".format(name.split(":")[1]), info["editors"], info["coeditors"], float(info["coeditors"]) / float(info["editors"]), info["exclusive editors"], info["ranking"]]) display(table_to_html(data, ["page name", "editors", "co-editors", "co-editors/editors", "exclusive editors" ,"ranking"])) #display(HTML("
")) #display(HTML("
")) display(HTML("

ranked first in

")) nb_list = [ x[0] for x in nb ] data = [] nb2 = sorted(nb, key=lambda (x): x[1]["ranking"]) for name, info in nb2[0:10]: editors = pages_editors_graph[name] info["editors"] = len(editors) info["exclusive editors"] = len([n for n in editors if len(pages_editors_graph[n]) == 1 ]) data.append([ u"{0}".format(name.split(":")[1]), info["editors"], info["coeditors"], float(info["coeditors"]) / float(info["editors"]), info["exclusive editors"], info["ranking"]]) display(table_to_html(data, ["page name", "editors", "co-editors", "co-editors/editors", "exclusive editors" ,"ranking"])) # display(HTML("
")) # display(HTML("
")) display(HTML("

pageviews and revisions

")) display_pageviews_revisions(page) display(HTML("

top editors

")) display_top_editors(page) display(HTML("

local subgraph

")) display_local_graph(page) #display_report("3-sphere") # In[8]: if __name__ == "__main__": display_report("3-sphere")