%load_ext autoreload
%autoreload 2
%store -r the_page
if 'the_page' not in locals():
import pickle
print("Loading default data...")
the_page = pickle.load(open("data/the_page.p",'rb'))
You have just opened a collection of notebooks that lets you inspect the evolution of the revision history of a Wikipedia article, up to now (From the English language edition). It also allows you to highlight article- or word-specific conflicts as well as the productivity of any given editor.
Specifically, for the notebooks after this initial one, it interfaces with the API of a specialized service called WikiWho, which provides fine-grained change information about the tokens (words) in an article.
It is written in a way that you can explore it like a Web app, without interacting with the code behind it, or - if you choose to - click on "edit app" in the Juypter navigation bar and play around with the code yourself.
The default introduction example is the article "The Camp of the Saints" (a novel), which we recommend to start with. You can enter/search an article of your choice and explore it as well.
Let's first get live data of some general statistics from Wikipedias own API and a service called Xtools:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# A. Basic Info from Wikipedia"))
display(md(f"***Search for an article on the English Wikipedia***"))
from ipywidgets import widgets, Output
from IPython.display import display, clear_output
from external.wikipedia import WikipediaDV, WikipediaAPI
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
# the method that listens to the click event
def on_button_clicked(b):
global the_page
# use the out widget so the output is overwritten when two or more
# searches are performed
with out:
try:
# query wikipedia
search_result = wikipedia_dv.search_page(searchTerm.value)
the_page = wikipedia_dv.get_page(search_result)
%store the_page
clear_output()
display(the_page.to_frame('value'))
display(md(f'You selected:'))
display(the_page['title'])
except:
clear_output()
display(md(f'The page title *"{searchTerm.value}"* was not found'))
# by default display the last search
try:
searchTerm = widgets.Text(the_page['title'], description='Page title:')
except:
searchTerm = widgets.Text("The Camp of the Saints", description='Page title:')
# create and display the button
button = widgets.Button(description="Search")
example = md("e.g. *The Camp of the Saints*")
display(searchTerm,example,button)
# the output widget is used to remove the output after the search field
out = Output()
display(out)
# set the event
button.on_click(on_button_clicked)
# trigger the event with the default value
on_button_clicked(button)
Text(value='The Camp of the Saints', description='Page title:')
e.g. The Camp of the Saints
Button(description='Search', style=ButtonStyle())
Output()
from ipywidgets import widgets
from IPython.display import display, Javascript
def run_below(ev):
display(Javascript('IPython.notebook.execute_cells_below()'))
display(md(f'If this is correct, load the data and set this as the article to explore.'))
button = widgets.Button(description="Load data", button_style='info', min_width=500)
button.on_click(run_below)
display(button)
If this is correct, load the data and set this as the article to explore.
Button(button_style='info', description='Load data', style=ButtonStyle())
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# B. General Statistics "))
display(md(f"Provided through the Xtools API (1)"))
display(md(f"***Page: {the_page['title']}***"))
from IPython.display import display, Markdown as md
from external.xtools import XtoolsAPI, XtoolsDV
xtools_api = XtoolsAPI(project = 'en.wikipedia.org')
xtools_dv = XtoolsDV(xtools_api)
page_info = xtools_dv.get_page_info(the_page['title'])
page_info['assessment'] = page_info['assessment']['value']
page_info = page_info.to_frame('value').rename(index={
'project': 'Project name',
'page': 'Page name',
'watchers': 'Watchers (2)', 'pageviews': f"Page Views (per {page_info['pageviews_offset']} days)",
'revisions': 'Revisions',
'editors': 'Editors',
'author': 'Creator of the page',
'created_at': 'Creation Date',
'created_rev_id': 'Creation revision id',
'modified_at': 'Last modified',
'last_edit_id': 'Last revision id',
'assessment': 'Content Assessment (3)',
}).drop(index = ['pageviews_offset', 'author_editcount', 'secs_since_last_edit','elapsed_time'])
display(page_info)
display(md("<sup>**(1)** *A community-built service for article statistics at xtools.wmflabs.org* **(2)** *Users that added this page to their watchlist.* **(3)** *See [Wikipedia Content Assessment](https://en.wikipedia.org/wiki/Wikipedia:Content_assessment)*</sup>"))
value | |
---|---|
Project name | en.wikipedia.org |
Page name | The Camp of the Saints |
Watchers (2) | 86 |
Page Views (per 30 days) | 23941 |
Revisions | 504 |
Editors | 237 |
minor_edits | 99 |
Creator of the page | Morning star |
Creation Date | 2005-03-22 |
Creation revision id | 12053908 |
Last modified | 2019-08-15 02:15 |
Last revision id | 910874720 |
Content Assessment (3) | C |
(1) A community-built service for article statistics at xtools.wmflabs.org (2) Users that added this page to their watchlist. (3) See Wikipedia Content Assessment
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# C. Page Views"))
display(md(f"Provided through the Wikimedia API"))
display(md(f"***Page: {the_page['title']}***"))
# Query request
from external.wikimedia import WikiMediaDV, WikiMediaAPI
wikimedia_api = WikiMediaAPI(project='en.wikipedia')
wikimedia_dv = WikiMediaDV(wikimedia_api)
views = wikimedia_dv.get_pageviews(the_page['title'], 'daily')
# Visualization
from visualization.views_listener import ViewsListener
from ipywidgets import interact
from ipywidgets.widgets import Dropdown
listener = ViewsListener(views)
interact(listener.listen,
begin=Dropdown(options=views.timestamp),
end=Dropdown(options=views.timestamp.sort_values(ascending=False)),
granularity=Dropdown(options=['Yearly', 'Monthly', 'Weekly', 'Daily'], value='Monthly'))
# The df_plotted keeps a reference to the plotted data above
listener.df_plotted['views'].agg({
'Total views': sum,
'Max views period': max,
'Min views period': min,
'Average views': min,}).to_frame('Value')
interactive(children=(Dropdown(description='begin', options=(Timestamp('2015-07-01 00:00:00'), Timestamp('2015…
Value | |
---|---|
Total views | 547154 |
Max views period | 76388 |
Min views period | 4082 |
Average views | 4082 |
After we have no seen some general statistics of the article and the views it attracted, we will go on to take a look at what specific kinds of changes by which editors it was subject to over time.
Click below to go to the next notebook. You can later come back to this notebook and simply enter another article name to start the process over with that new article.
from utils.notebooks import get_next_notebook
from IPython.display import HTML
display(HTML(f'<a href="{get_next_notebook()}" target="_blank">Go to next workbook</a>'))