#!/usr/bin/env python # coding: utf-8 # In[ ]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('store', '-r the_page') if 'the_page' not in locals(): import pickle print("Loading default data...") the_page = pickle.load(open("data/the_page.p",'rb')) # # # Welcome! # # You have just opened a collection of notebooks that lets you inspect the evolution of the revision history of a Wikipedia article, up to now (From the English language edition). It also allows you to highlight **article- or word-specific conflicts as well as the productivity of any given editor.** # # Specifically, for the notebooks after this initial one, it interfaces with the API of a specialized service called [WikiWho](www.wikiwho.net), which provides fine-grained change information about the tokens (words) in an article. # # It is written in a way that you can **explore it like a Web app, without interacting with the code behind it**, or - if you choose to - click on "edit app" in the Juypter navigation bar and play around with the code yourself. # # The default introduction example is the article "The Camp of the Saints" (a novel), which we recommend to start with. You can enter/search an article of your choice and explore it as well. # # Let's first get live data of some general statistics from Wikipedias own API and a service called Xtools: # In[ ]: from IPython.display import display, Markdown as md display(md("---")) display(md(f"# A. Basic Info from Wikipedia")) display(md(f"***Search for an article on the English Wikipedia***")) # In[ ]: from ipywidgets import widgets, Output from IPython.display import display, clear_output from external.wikipedia import WikipediaDV, WikipediaAPI wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org')) # the method that listens to the click event def on_button_clicked(b): global the_page # use the out widget so the output is overwritten when two or more # searches are performed with out: try: # query wikipedia search_result = wikipedia_dv.search_page(searchTerm.value) the_page = wikipedia_dv.get_page(search_result) get_ipython().run_line_magic('store', 'the_page') clear_output() display(the_page.to_frame('value')) display(md(f'You selected:')) display(the_page['title']) except: clear_output() display(md(f'The page title *"{searchTerm.value}"* was not found')) # by default display the last search try: searchTerm = widgets.Text(the_page['title'], description='Page title:') except: searchTerm = widgets.Text("The Camp of the Saints", description='Page title:') # create and display the button button = widgets.Button(description="Search") example = md("e.g. *The Camp of the Saints*") display(searchTerm,example,button) # the output widget is used to remove the output after the search field out = Output() display(out) # set the event button.on_click(on_button_clicked) # trigger the event with the default value on_button_clicked(button) # In[ ]: from ipywidgets import widgets from IPython.display import display, Javascript def run_below(ev): display(Javascript('IPython.notebook.execute_cells_below()')) display(md(f'If this is correct, load the data and set this as the article to explore.')) button = widgets.Button(description="Load data", button_style='info', min_width=500) button.on_click(run_below) display(button) # In[ ]: from IPython.display import display, Markdown as md display(md("---")) display(md(f"# B. General Statistics ")) display(md(f"Provided through the Xtools API (1)")) display(md(f"***Page: {the_page['title']}***")) # In[ ]: from IPython.display import display, Markdown as md from external.xtools import XtoolsAPI, XtoolsDV xtools_api = XtoolsAPI(project = 'en.wikipedia.org') xtools_dv = XtoolsDV(xtools_api) page_info = xtools_dv.get_page_info(the_page['title']) page_info['assessment'] = page_info['assessment']['value'] page_info = page_info.to_frame('value').rename(index={ 'project': 'Project name', 'page': 'Page name', 'watchers': 'Watchers (2)', 'pageviews': f"Page Views (per {page_info['pageviews_offset']} days)", 'revisions': 'Revisions', 'editors': 'Editors', 'author': 'Creator of the page', 'created_at': 'Creation Date', 'created_rev_id': 'Creation revision id', 'modified_at': 'Last modified', 'last_edit_id': 'Last revision id', 'assessment': 'Content Assessment (3)', }).drop(index = ['pageviews_offset', 'author_editcount', 'secs_since_last_edit','elapsed_time']) display(page_info) display(md("**(1)** *A community-built service for article statistics at xtools.wmflabs.org* **(2)** *Users that added this page to their watchlist.* **(3)** *See [Wikipedia Content Assessment](https://en.wikipedia.org/wiki/Wikipedia:Content_assessment)*")) # In[ ]: from IPython.display import display, Markdown as md display(md("---")) display(md(f"# C. Page Views")) display(md(f"Provided through the Wikimedia API")) display(md(f"***Page: {the_page['title']}***")) # In[ ]: # Query request from external.wikimedia import WikiMediaDV, WikiMediaAPI wikimedia_api = WikiMediaAPI(project='en.wikipedia') wikimedia_dv = WikiMediaDV(wikimedia_api) views = wikimedia_dv.get_pageviews(the_page['title'], 'daily') # Visualization from visualization.views_listener import ViewsListener from ipywidgets import interact from ipywidgets.widgets import Dropdown listener = ViewsListener(views) interact(listener.listen, begin=Dropdown(options=views.timestamp), end=Dropdown(options=views.timestamp.sort_values(ascending=False)), granularity=Dropdown(options=['Yearly', 'Monthly', 'Weekly', 'Daily'], value='Monthly')) # The df_plotted keeps a reference to the plotted data above listener.df_plotted['views'].agg({ 'Total views': sum, 'Max views period': max, 'Min views period': min, 'Average views': min,}).to_frame('Value') # # After we have no seen some general statistics of the article and the views it attracted, we will go on to take a look at what specific kinds of changes by which editors it was subject to over time. # # Click below to go to the next notebook. You can later come back to this notebook and simply enter another article name to start the process over with that new article. # In[ ]: from utils.notebooks import get_next_notebook from IPython.display import HTML display(HTML(f'Go to next workbook'))