%load_ext autoreload
%autoreload 2
%store -r the_page
%store -r agg_actions
%store -r calculator
if ('the_page' not in locals() or
'agg_actions' not in locals() or
'calculator' not in locals()):
import pickle
print("Loading default data...")
the_page = pickle.load(open("data/the_page.p",'rb'))
agg_actions = pickle.load(open("data/agg_actions.p",'rb'))
calculator = pickle.load(open("data/calculator.p",'rb'))
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# A. Select a conflicting editor"))
display(md(f"***Page: {the_page['title']}***"))
#same explanation of terms related to conflict missing as in notebook 2
#user 'MelbourneStar' in the 'Barack_Obama' article shows weird behavior in the first two line graphs, with total actions in the first one (per month) being higher than in the second one (see december 2014). also, in the first one, why are total and conflict_score shown in the graph when they are 0? and how can there be a tag cloud with conflicted words if conflict score = 0 all the time?
from external.wikipedia import WikipediaDV, WikipediaAPI
# Grab the editors conflicts
editors_conflicts = calculator.get_conflict_score_per_editor()
elegible_actions = calculator.elegible_actions
# Grab user names from wikipedia and merge them to the editors_conflict dataframe
from external.wikipedia import WikipediaDV, WikipediaAPI
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
editors = wikipedia_dv.get_editors([int(x) for x in editors_conflicts.index if x.isdigit()])
editors['userid'] = editors['userid'].astype('str')
editors_conflicts = editors[['userid','name','registration']].merge( editors_conflicts,
right_index=True, left_on='userid',how='left').set_index('userid')
from visualization.conflicts_listener import ConflictsListener
def display_conflict_score(eleg_actions):
from visualization.conflicts_listener import ConflictsListener
listener = ConflictsListener(eleg_actions)
metrics = ['Conflict Score', 'Conflict Ratio', 'Total Conflicts',
'Total Elegible Actions', 'Total Actions', 'Total Time',
'Time per Elegible Action']
display(md(f'**Page conflict score: {calculator.get_page_conflict_score()}**'))
# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range = get_date_slider_from_datetime(eleg_actions['rev_time']),
granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),
black=Dropdown(options=metrics, value='Conflict Score'),
red=Dropdown(options= ['None'] + metrics, value='None'))
def select_editor(editor):
global the_editor
global editor_inputname
editor_inputname=editor
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
try:
the_editor = wikipedia_dv.get_editor(int(editor_inputname))
except:
the_editor = wikipedia_dv.get_editor(editor_inputname[2:])
with out:
%store the_editor
%store editor_inputname
clear_output()
display(md("### Current Selection:"))
if 'invalid' in the_editor:
display(f"The editor {editor_inputname} was not found, try a different editor")
else:
# display the data that will be passed to the next notebook
display(the_editor.to_frame('values'))
display(md(f"#### Evolution of the Conflict Score of *{the_editor['name']}*"))
display_conflict_score(elegible_actions[elegible_actions['editor'] == editor_inputname].copy())
def on_selection_change(change):
try:
select_editor(qg_obj.get_selected_df().iloc[0].name)
except:
print('Problem parsing the name. Execute the cell again and try a different editor.')
import qgrid
qgrid.set_grid_option('maxVisibleRows', 5)
qg_obj = qgrid.show_grid(editors_conflicts)
qg_obj.observe(on_selection_change, names=['_selected_rows'])
from ipywidgets import Output
from IPython.display import display, clear_output, Markdown as md
display(md("### Select one editor row for the next notebook:"))
display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))
display(qg_obj)
out = Output()
display(out)
# select an editor that does not contain 0| at the beginning
for ed in editors_conflicts.index:
if ed[:2] != '0|':
select_editor(ed)
break
from ipywidgets import widgets
from IPython.display import display, Javascript
def run_below(ev):
display(Javascript('IPython.notebook.execute_cells_below()'))
button = widgets.Button(description="Refresh Notebook", button_style='info', min_width=500)
button.on_click(run_below)
display(button)
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# B. History of editor on a page"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"***Editor: {the_editor['name']}***"))
editor_agg_actions = agg_actions[agg_actions['editor_id']==the_editor.userid]
#Listener
from visualization.actions_listener import ActionsListener
listener = ActionsListener(editor_agg_actions)
actions = (editor_agg_actions.loc[:,'total':'total_stopword_count'].columns.append(
editor_agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()
# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact, fixed
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range = get_date_slider_from_datetime(agg_actions['year_month']),
editor=fixed('All'),
granularity=Dropdown(options=['Yearly', 'Monthly'], value='Monthly'),
black=Dropdown(options=actions, value='total'),
red=Dropdown(options= ['None'] + actions, value='total_surv_48h'),
green=Dropdown(options= ['None'] + actions, value='None'),
blue=Dropdown(options= ['None'] + actions, value='None'))
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# C. Tokens that enter into conflict with other editors"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"***Editor: {the_editor['name']}***"))
sources = {
'All actions': calculator.all_actions[calculator.all_actions['editor']==editor_inputname],
'Elegible Actions': calculator.elegible_actions[calculator.elegible_actions['editor']==editor_inputname],
'Only Conflicts': calculator.conflicts[calculator.conflicts['editor']==editor_inputname],
}
# listener
from visualization.wordcloud_listener import WCListener
listener = WCListener(sources)
# visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact, fixed
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range=get_date_slider_from_datetime(calculator.all_actions['rev_time']),
source=Dropdown(options=list(listener.sources.keys()), value='Only Conflicts'),
action=Dropdown(options=['Both', 'Just Insertions', 'Just Deletions'], value='Both'),
editor=fixed('All'))
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# D. Tokens on the page owned by the editor"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"***Editor: {the_editor['name']}***"))
from visualization.owned_listener import OwnedListener
owned = calculator.all_actions
listener = OwnedListener(owned, editor_inputname)
traces = ['Tokens Owned', 'Tokens Owned (%)']
# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range = get_date_slider_from_datetime(owned['rev_time']),
granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),
trace=Dropdown(options=traces, value='Tokens Owned (%)'))
from IPython.display import HTML
from utils.notebooks import get_next_notebook, get_previous_notebook
editor_actions = calculator.elegible_actions[calculator.elegible_actions['editor']==editor_inputname]
if len(editor_actions) > 0:
display(HTML(f'<a href="{get_next_notebook()}" target="_blank">Go to next workbook</a>'))
else:
display(HTML(f'<a href="{get_previous_notebook()}" target="_blank">Go back to the previous workbook</a>'))