%load_ext autoreload
%autoreload 2
%store -r the_page
if 'the_page' not in locals():
import pickle
print("Loading default data...")
the_page = pickle.load(open("data/the_page.p",'rb'))
Let's take a look at the evolution of the revision history of an English Wikipedia edition article, up to now. Example: the novel "The Camp of the Saints". We can get live data from various sources:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# A. Basic Info from Wikipedia"))
display(md(f"***Search for a Wikipedia Page***"))
from ipywidgets import widgets, Output
from IPython.display import display, clear_output
from external.wikipedia import WikipediaDV, WikipediaAPI
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
# the method that listens to the click event
# the method that listens to the click event
def on_button_clicked(b):
global the_page
# use the out widget so the output is overwritten when two or more
# searches are performed
with out:
try:
# query wikipedia
search_result = wikipedia_dv.search_page(searchTerm.value)
the_page = wikipedia_dv.get_page(search_result)
%store the_page
clear_output()
display(the_page.to_frame('value'))
except:
clear_output()
display(md(f'The page title *"{searchTerm.value}"* was not found'))
# by default display the last search
try:
searchTerm = widgets.Text(the_page['title'], description='Page title:')
except:
searchTerm = widgets.Text("The Camp of the Saints", description='Page title:')
# create and display the button
button = widgets.Button(description="Search")
example = md("e.g. *The Camp of the Saints*")
display(searchTerm,example,button)
# the output widge is used to remove the output after the search field
out = Output()
display(out)
# set the event
button.on_click(on_button_clicked)
# trigger the event with the default value
on_button_clicked(button)
from ipywidgets import widgets
from IPython.display import display, Javascript
def run_below(ev):
display(Javascript('IPython.notebook.execute_cells_below()'))
button = widgets.Button(description="Refresh Notebook", button_style='info', min_width=500)
button.on_click(run_below)
display(button)
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# B. General Statistics "))
display(md(f"Provided through the Xtools API"))
display(md(f"***Page: {the_page['title']}***"))
from external.xtools import XtoolsAPI, XtoolsDV
xtools_api = XtoolsAPI(project = 'en.wikipedia.org')
xtools_dv = XtoolsDV(xtools_api)
page_info = xtools_dv.get_page_info(the_page['title'])
page_info.to_frame('value')
#Unclear xtools fields, delete or describe better: author (=creator of the page),
#pageviews_offset (delete), watchers (=Users that have added this page to their watchlist)
#author_editcount(delete), secs_since_last_edit (delete), elapsed_time (?, delete=)
# what is "assessement"? --> retain "value" if it means article quality and category
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# C. Page Views"))
display(md(f"Provided by Wikimedia page view API (only available since 2015)"))
display(md(f"***Page: {the_page['title']}***"))
#Graph of page views has 'actions' as the y-axis label, when it should be "views"
# Query request
from external.wikimedia import WikiMediaDV, WikiMediaAPI
wikimedia_api = WikiMediaAPI(project='en.wikipedia')
wikimedia_dv = WikiMediaDV(wikimedia_api)
views = wikimedia_dv.get_pageviews(the_page['title'], 'daily')
# Visualization
from visualization.views_listener import ViewsListener
from ipywidgets import interact
from ipywidgets.widgets import Dropdown
listener = ViewsListener(views)
interact(listener.listen,
begin=Dropdown(options=views.timestamp),
end=Dropdown(options=views.timestamp.sort_values(ascending=False)),
granularity=Dropdown(options=['Yearly', 'Monthly', 'Weekly', 'Daily'], value='Monthly'))
# The df_plotted keeps a reference to the plotted data above
listener.df_plotted['views'].agg({
'Total views': sum,
'Max views period': max,
'Min views period': min,
'Average views': min,}).to_frame('Value')
from utils.notebooks import get_next_notebook
from IPython.display import HTML
display(HTML(f'<a href="{get_next_notebook()}" target="_blank">Go to next workbook</a>'))