%load_ext autoreload
%autoreload 2
%store -r the_page
%store -r the_editor
%store -r editor_inputname
%store -r calculator
%store -r editors_conflicts
if ('the_page' not in locals() or
'the_editor' not in locals() or
'editor_inputname' not in locals() or
'calculator' not in locals() or
'editors_conflicts' not in locals()):
import pickle
print("Loading default data...")
the_page = pickle.load(open("data/the_page.p",'rb'))
the_editor = pickle.load(open("data/the_editor.p",'rb'))
editor_inputname = pickle.load(open("data/editor_inputname.p",'rb'))
calculator = pickle.load(open("data/calculator.p",'rb'))
editors_conflicts = pickle.load(open("data/editors_conflicts.p",'rb'))
the_editor.to_frame('value')
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# A. Select an editor to analyze their conflicting editors"))
display(md(f"***Page: {the_page['title']}***"))
display(md("The table below presents the conflict score and other related metrics per editor "
f"(*editor_id* and *editor* column). Select one editor of the page \"{the_page['title']}\" to analyze "
"the general Wikipedia metadata of the editor. At the end you can select created pages of the editor "
"in order to restart the analysis in a different page:"))
display(md("""
- **conflict_n**: the total number of conflicts
- **conflict**: the sum of conflict scores of all actions (without division)
- **actions**: the total number of actions performed by the editor
- **conflict_score**: the sum of conflict scores of all actions divided by the number of elegible actions
- **conflict_ratio**: the count of all conflicts divided by the number of elegible actions
"""))
from visualization.conflicts_listener import ConflictsListener
from external.wikipedia import WikipediaDV, WikipediaAPI
graph_description = """
In the above graph you can select the *date range* and *granularity* (yearly, montly)
of the timeline (X-axis), and plot any of the following counts in the black and red lines:
- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions
- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)
- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions
- **Number of Conflicts**: the total number of conflicts
- **Total Elegible Actions**: the total number of elegible actions
- **Total Conflict Time**: the sum of all the times (*time_diff_secs*) that has been taken by conflict actions
- **Total Elegible Time**: the sum of all the times (*time_diff_secs*) that has been taken by elegible actions
- **Time per Conflict Action**: average time of conflict actions
- **Time per Elegible Action**: average time of elegible actions
"""
def display_conflict_score(eleg_actions):
global listener
from visualization.conflicts_listener import ConflictsListener
listener = ConflictsListener(eleg_actions)
metrics = ['Conflict Score', 'Absolute Conflict Score',
'Conflict Ratio', 'Number of Conflicts',
'Total Elegible Actions',
'Total Conflict Time', 'Total Elegible Time',
'Time per Conflict Action', 'Time per Elegible Action']
display(md(f'*Total Page conflict score: {calculator.get_page_conflict_score()}*'))
# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range = get_date_slider_from_datetime(eleg_actions['rev_time']),
granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),
black=Dropdown(options=metrics, value='Conflict Score'),
red=Dropdown(options= ['None'] + metrics, value='None'))
def select_editor(editor):
global editor_df
global the_editor
global editor_inputname
editor_inputname=editor
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
try:
the_editor = wikipedia_dv.get_editor(int(editor_inputname))
except:
the_editor = wikipedia_dv.get_editor(editor_inputname[2:])
with out:
%store the_editor
%store editor_inputname
clear_output()
display(md("### Current Selection:"))
if 'invalid' in the_editor:
display(f"The editor {editor_inputname} was not found, try a different editor")
else:
# display the data that will be passed to the next notebook
display(the_editor.to_frame('values'))
display(md(f"#### Evolution of the Conflict Score of *{the_editor['name']}*"))
editor_df = calculator.elegible_actions[
calculator.elegible_actions['editor'] == editor_inputname].copy()
display_conflict_score(editor_df)
def on_selection_change(change):
try:
select_editor(qg_obj.get_selected_df().iloc[0].name)
except:
print('Problem parsing the name. Execute the cell again and try a different editor.')
import qgrid
qgrid.set_grid_option('maxVisibleRows', 5)
qg_obj = qgrid.show_grid(editors_conflicts)
qg_obj.observe(on_selection_change, names=['_selected_rows'])
from ipywidgets import Output
from IPython.display import display, clear_output, Markdown as md
display(md("### Select one editor (row) to continue the demo:"))
display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))
display(qg_obj)
out = Output()
display(out)
display(md(graph_description))
select_editor(editor_inputname)
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# B. Basic editor information"))
display(md(f"Provided by Wikipedia"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md(f"""The following is information about the editor directly available in Wikipedia."""))
the_editor.to_frame('value')
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# C. Modified pages of an editor"))
display(md(f"Provided through the Xtools API"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md(f"""The following is some metadata about the creation and deletion
of pages in Wikipedia by the editor."""))
from external.xtools import XtoolsAPI, XtoolsDV
from IPython.display import display, clear_output
xtools_api = XtoolsAPI(project = 'en.wikipedia.org')
xtools_dv = XtoolsDV(xtools_api)
try:
editor_info = xtools_dv.get_modified_pages_counts_per_editor(the_editor['name'])
display(editor_info.to_frame('value'))
except:
clear_output()
display(md(f'**There are no modified pages by this editor.**'))
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# D. Select one page created by an editor"))
display(md(f"Provided through the Xtools API"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md(f"""The following table shows a list of created paged by the editor with some general
information about the page:
- *page_title*: title of the page
- *page_len*: an estimated amount of words in the page
- *rev_id*: the id of the last revision
- *rev_len*: the number of revisions made on that page
- *rev_timestamp*: the timestamp of the last revision (last modification)"""))
from external.wikipedia import WikipediaDV, WikipediaAPI
from IPython.display import HTML, display, clear_output
from utils.notebooks import get_notebook_by_number
from external.xtools import XtoolsAPI, XtoolsDV
xtools_api = XtoolsAPI(project = 'en.wikipedia.org')
xtools_dv = XtoolsDV(xtools_api)
try:
created_pages = xtools_dv.get_created_pages_per_editor(the_editor['name'])
except:
clear_output()
display(md(f'**There are no created pages by this editor.**'))
display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to next workbook</a>'))
from visualization.conflicts_listener import ConflictsListener
def select_page(page):
global the_page
page_inputname=page
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
try:
the_page = wikipedia_dv.get_page(int(page_inputname))
except:
the_page = wikipedia_dv.get_page(page_inputname)
with out:
%store the_page
clear_output()
display(md("### Current Selection:"))
if 'invalid' in the_page:
display(f"The page {page_inputname} was not found, try a different page")
else:
# display the data that will be passed to the next notebook
display(the_page.to_frame('values'))
display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to next workbook</a>'))
def on_selection_change(change):
try:
select_page(qg_obj.get_selected_df().iloc[0].page_title)
except:
print('Problem parsing the name. Execute the cell again and try a different page.')
display(HTML(f'<a href="{get_notebook_by_number(1)}" target="_blank">Go to next workbook</a>'))
if 'created_pages' in locals():
import qgrid
qg_obj = qgrid.show_grid(created_pages[['page_title', 'page_len', 'rev_id', 'rev_len', 'rev_timestamp']])
qg_obj.observe(on_selection_change, names=['_selected_rows'])
if 'qg_obj' in locals():
from ipywidgets import Output
from IPython.display import display, clear_output, Markdown as md
display(md("### Select one page row for the next notebook:"))
display(qg_obj)
out = Output()
display(out)