%load_ext autoreload
%autoreload 2
%store -r the_page
%store -r the_editor
%store -r agg_actions
%store -r editor_inputname
%store -r calculator
%store -r editors_conflicts
if ('the_page' not in locals() or
'the_editor' not in locals() or
'agg_actions' not in locals() or
'editor_inputname' not in locals() or
'calculator' not in locals() or
'editors_conflicts' not in locals()):
import pickle
print("Loading default data...")
the_page = pickle.load(open("data/the_page.p",'rb'))
the_editor = pickle.load(open("data/the_editor.p",'rb'))
agg_actions = pickle.load(open("data/agg_actions.p",'rb'))
editor_inputname = pickle.load(open("data/editor_inputname.p",'rb'))
calculator = pickle.load(open("data/calculator.p",'rb'))
editors_conflicts = pickle.load(open("data/editors_conflicts.p",'rb'))
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# A. Select an editor to analyze their conflicting editors"))
display(md(f"***Page: {the_page['title']}***"))
display(md("""The table below presents the conflict score and other related metrics per editor
(*editor_id* and *editor* column). Select one editor to analyze the editors that enter into
conflict with her:
- **conflict_n**: the total number of conflicts
- **conflict**: the sum of conflict scores of all actions (without division)
- **actions**: the total number of actions performed by the editor
- **conflict_score**: the sum of conflict scores of all actions divided by the number of elegible actions
- **conflict_ratio**: the count of all conflicts divided by the number of elegible actions
"""))
from external.wikipedia import WikipediaDV, WikipediaAPI
graph_description = """
In the above graph you can select the *date range* and *granularity* (yearly, montly)
of the timeline (X-axis), and plot any of the following counts in the black and red lines:
- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions
- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)
- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions
- **Number of Conflicts**: the total number of conflicts
- **Total Elegible Actions**: the total number of elegible actions
- **Total Conflict Time**: the sum of all the times (*time_diff_secs*) that has been taken by conflict actions
- **Total Elegible Time**: the sum of all the times (*time_diff_secs*) that has been taken by elegible actions
- **Time per Conflict Action**: average time of conflict actions
- **Time per Elegible Action**: average time of elegible actions
"""
def display_conflict_score(eleg_actions):
global listener
from visualization.calculator_listener import ConflictCalculatorListener
listener = ConflictCalculatorListener(eleg_actions)
metrics = ['Conflict Score', 'Absolute Conflict Score',
'Conflict Ratio', 'Number of Conflicts',
'Total Elegible Actions',
'Total Conflict Time', 'Total Elegible Time',
'Time per Conflict Action', 'Time per Elegible Action']
display(md(f'*Total Page conflict score: {calculator.get_page_conflict_score()}*'))
# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range = get_date_slider_from_datetime(eleg_actions['rev_time']),
granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),
black=Dropdown(options=metrics, value='Conflict Score'),
red=Dropdown(options= ['None'] + metrics, value='None'))
def select_editor(editor):
global editor_df
global the_editor
global editor_inputname
editor_inputname=editor
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
try:
the_editor = wikipedia_dv.get_editor(int(editor_inputname))
except:
the_editor = wikipedia_dv.get_editor(editor_inputname[2:])
with out:
%store the_editor
%store editor_inputname
clear_output()
display(md("### Current Selection:"))
if 'invalid' in the_editor:
display(f"The editor {editor_inputname} was not found, try a different editor")
else:
# display the data that will be passed to the next notebook
display(the_editor.to_frame('values'))
display(md(f"#### Evolution of the Conflict Score of *{the_editor['name']}*"))
editor_df = calculator.elegible_actions[
calculator.elegible_actions['editor'] == str(editor_inputname)].copy()
display_conflict_score(editor_df)
def on_selection_change(change):
try:
select_editor(qg_obj.get_selected_df().iloc[0].name)
except:
print('Problem parsing the name. Execute the cell again and try a different editor.')
import qgrid
qgrid.set_grid_option('maxVisibleRows', 5)
qg_obj = qgrid.show_grid(editors_conflicts)
qg_obj.observe(on_selection_change, names=['_selected_rows'])
from ipywidgets import Output
from IPython.display import display, clear_output, Markdown as md
display(md("### Select one editor (row) to continue the demo:"))
display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))
display(qg_obj)
out = Output()
display(out)
display(md(graph_description))
select_editor(editor_inputname)
from ipywidgets import widgets
from IPython.display import display, Javascript
def run_below(ev):
display(Javascript('IPython.notebook.execute_cells_below()'))
button = widgets.Button(description="Refresh Notebook", button_style='info', min_width=500)
button.on_click(run_below)
display(button)
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# B. Detecting conflicting editors"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md(f"""If editor {the_editor['name']} undo and action of editor B, editor B is called
a conflicting editor. The following table shows the conflicting editors of {the_editor['name']}
including their conflict score and other related metrics (see section A)."""))
editor_inputname
calculator.elegible.shift(-1)['editor'].dtype
conflicting_actions = calculator.get_conflicting_actions(str(editor_inputname))
from external.wikipedia import WikipediaDV, WikipediaAPI
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
conflicting_editors = conflicting_actions['editor'].unique().tolist()
editors = wikipedia_dv.get_editors([int(x) for x in conflicting_editors if x[:2] != '0|'])
editors['userid'] = editors['userid'].astype('str')
full_editors_conflicts = calculator.get_conflict_score_per_editor()
editors[['userid','name','registration']].merge( full_editors_conflicts,
right_index=True, left_on='userid',how='left').set_index('userid')
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# C. Activity of conflicting editors"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md("""In the following graph you can select the conflicting *editor*, *date range* and
*granularity* (yearly, montly) of the timeline (X-axis), and plot any of the follow counts in
the black, red, blue and green lines:
- **adds**: number of first-time insertions
- **adds_surv_48h**: number of insertions for the first time that survived at least 48 hours
- **adds_persistent**: number of insertions for the first time that survived until, at least, the end of the month
- **adds_stopword_count**: number of insertions that were stop words
- **dels**: number of deletions
- **dels_surv_48h**: number of deletions that were not resinserted in the next 48 hours
- **dels_persistent**: number of deletions that were not resinserted until, at least, the end of the month
- **dels_stopword_count**: number of deletions that were stop words
- **reins**: number of reinsertions
- **reins_surv_48h**: number of reinsertionsthat survived at least 48 hours
- **reins_persistent**: number of reinsertionsthat survived until the end of the month
- **reins_stopword_count**: number of reinsertionsthat were stop words
"""))
editors['userid'] = editors['userid'].astype('int')
conf_editor_agg_actions = editors[['userid','name','registration']].merge(agg_actions,
left_on='userid', right_on='editor_id', how='left').set_index('userid')
# Listener
from visualization.actions_listener import ActionsListener
listener = ActionsListener(conf_editor_agg_actions)
actions = (conf_editor_agg_actions.loc[:,'total':'total_stopword_count'].columns.append(
conf_editor_agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()
# Visualization
from ipywidgets import interact
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range = get_date_slider_from_datetime(conf_editor_agg_actions['year_month']),
editor=Dropdown(options=['All'] + editors['name'].values.tolist(), value='All'),
granularity=Dropdown(options=['Yearly', 'Monthly'], value='Monthly'),
black=Dropdown(options=actions, value='total'),
red=Dropdown(options= ['None'] + actions, value='total_surv_48h'),
green=Dropdown(options= ['None'] + actions, value='None'),
blue=Dropdown(options= ['None'] + actions, value='None'))
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# D. Tokens of conflicting editors"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md(""" The WordCloud displays the most common token strings (words) that a particular editor
inserted or deleted and that enter into conflict with other editors. The size of the token string in
the WordCloud indicates frequency of actions.
In the controls, you can select the conflicting *editor*, the *date range*, the type of *action*
(insertion or deletion), and the *source*. The *source* can be any of the following:
- **Only Conflicts**: use only the actions that are in conflict
- **All Undos**: use all actions that involve and undo
"""))
editors['userid'] = editors['userid'].astype('str')
editor_conflicts = editors[['userid','name','registration']].merge(conflicting_actions,
left_on='userid', right_on='editor', how='left').set_index('userid')
sources = {
f'Elegible Actions': editor_conflicts,
f'Only Conflicts': editor_conflicts[~editor_conflicts['conflict'].isnull()]
}
# listener
from visualization.wordcloud_listener import WCListener
listener = WCListener(sources)
# visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range=get_date_slider_from_datetime(editor_conflicts['rev_time']),
editor=Dropdown(options=['All'] + editors['name'].values.tolist(), value='All', layout={'width': '400px'}),
source=Dropdown(options=list(listener.sources.keys()), value= f'Elegible Actions', layout={'width': '400px'}),
action=Dropdown(options=['Both', 'Just Insertions', 'Just Deletions'], value='Both', layout={'width': '400px'}))
from IPython.display import HTML
from utils.notebooks import get_next_notebook
display(HTML(f'<a href="{get_next_notebook()}" target="_blank">Go to next workbook</a>'))