In [ ]:
%load_ext autoreload
%autoreload 2

%store -r the_page
%store -r the_editor
%store -r editor_inputname
%store -r calculator
%store -r editors_conflicts

if ('the_page' not in locals() or 
    'the_editor' not in locals() or 
    'editor_inputname' not in locals() or 
    'calculator' not in locals() or 
    'editors_conflicts' not in locals()):
    
    import pickle
    print("Loading default data...")
    the_page = pickle.load(open("data/the_page.p",'rb'))
    the_editor = pickle.load(open("data/the_editor.p",'rb'))
    editor_inputname = pickle.load(open("data/editor_inputname.p",'rb'))
    calculator = pickle.load(open("data/calculator.p",'rb'))
    editors_conflicts = pickle.load(open("data/editors_conflicts.p",'rb'))

the_editor.to_frame('value')

from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# A.  Select an editor to analyze their conflicting editors"))
display(md(f"***Page: {the_page['title']}***"))
display(md("The table below presents the conflict score and other related  metrics per editor "
f"(*editor_id* and *editor* column). Select one editor of the page \"{the_page['title']}\" to analyze " 
"the overall activity of the editor in the entire Wikipedia:"))

display(md("""
- **conflict_n**: the total number of conflicts
- **conflict**: the sum of conflict scores of all actions (without division)
- **actions**: the total number of actions performed by the editor
- **conflict_score**: the sum of conflict scores of all actions divided by the number of elegible actions
- **conflict_ratio**: the count of all conflicts divided by the number of elegible actions
"""))
In [ ]:
from visualization.conflicts_listener import ConflictsListener
from external.wikipedia import WikipediaDV, WikipediaAPI

graph_description = """
In the above graph you can select the *date range* and *granularity* (yearly, montly) 
of the timeline (X-axis), and plot any of the following counts in the black and red lines:
   
- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions
- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)
- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions
- **Number of Conflicts**: the total number of conflicts
- **Total Elegible Actions**: the total number of elegible actions
- **Total Conflict Time**: the sum of all the times (*time_diff_secs*) that has been taken by conflict actions
- **Total Elegible Time**: the sum of all the times (*time_diff_secs*) that has been taken by elegible actions
- **Time per Conflict Action**: average time of conflict actions
- **Time per Elegible Action**: average time of elegible actions
"""

def display_conflict_score(eleg_actions):
    global listener
    
    from visualization.conflicts_listener import ConflictsListener
    listener = ConflictsListener(eleg_actions)

    metrics = ['Conflict Score', 'Absolute Conflict Score', 
               'Conflict Ratio',  'Number of Conflicts', 
               'Total Elegible Actions', 
               'Total Conflict Time', 'Total Elegible Time', 
               'Time per Conflict Action', 'Time per Elegible Action']

    display(md(f'*Total Page conflict score: {calculator.get_page_conflict_score()}*'))

    # Visualization
    from utils.notebooks import get_date_slider_from_datetime
    from ipywidgets import interact
    from ipywidgets.widgets import Dropdown

    interact(listener.listen,
             _range = get_date_slider_from_datetime(eleg_actions['rev_time']),
             granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),
             black=Dropdown(options=metrics, value='Conflict Score'),
             red=Dropdown(options= ['None'] + metrics, value='None'))

def select_editor(editor):
    global editor_df
    global the_editor
    global editor_inputname

    editor_inputname=editor
    
    wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
    try:
        the_editor = wikipedia_dv.get_editor(int(editor_inputname))
    except:
        the_editor = wikipedia_dv.get_editor(editor_inputname[2:])

    with out:
        %store the_editor
        %store editor_inputname

        clear_output()
        display(md("### Current Selection:"))
        
        if 'invalid' in the_editor:
            display(f"The editor {editor_inputname} was not found, try a different editor")
        else:
            # display the data that will be passed to the next notebook
            display(the_editor.to_frame('values'))
            display(md(f"#### Evolution of the Conflict Score of *{the_editor['name']}*"))

            editor_df = calculator.elegible_actions[
                calculator.elegible_actions['editor'] == editor_inputname].copy()


            display_conflict_score(editor_df)


def on_selection_change(change):

    try:
        select_editor(qg_obj.get_selected_df().iloc[0].name)
    except:
        print('Problem parsing the name. Execute the cell again and try a different editor.')

import qgrid
qgrid.set_grid_option('maxVisibleRows', 5)
qg_obj = qgrid.show_grid(editors_conflicts)
qg_obj.observe(on_selection_change, names=['_selected_rows'])
                       
from ipywidgets import Output
from IPython.display import display, clear_output, Markdown as md
display(md("### Select one editor (row) to continue the demo:"))
display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))
display(qg_obj)
out = Output()
display(out)
display(md(graph_description))
select_editor(editor_inputname)
In [ ]:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# B. Actions per page"))
display(md(f"***Editor: {the_editor['name']}***"))
display(md("""The following table shows the total number of actions (insertions + deletions) per month 
(`year_month` column), and page (`page_id` columns)."""))
display(md("""**Columns description:**
- **total**: total number of actions (insertions, and deletions)
- **total**: total number of actions (insertions, and deletions)
- **total_surv_48h**: total number of actions that survived at least 48 hours
- **total_persistent**:  total number of actions that survived until, at least, the end of the month
- **total_stopword_count**:  total number of actions that were performed in stop words"""))
In [ ]:
from wikiwho_wrapper import WikiWho
import pandas as pd

wikiwho = WikiWho(lng='en')
agg_actions = wikiwho.dv.edit_persistence(editor_id = the_editor.userid)

# convert to datetime
agg_actions['year_month'] = pd.to_datetime(agg_actions['year_month'])

# define total columns
total_columns = ['total', 'total_surv_48h', 'total_persistent', 'total_stopword_count']

# add columns with the total actions
agg_actions = agg_actions.join(pd.DataFrame(
    agg_actions.loc[:,'adds':'adds_stopword_count'].values +\
    agg_actions.loc[:,'dels':'dels_stopword_count'].values +\
    agg_actions.loc[:,'reins':'reins_stopword_count'].values, 
    index=agg_actions.index, 
    columns=total_columns
))


import qgrid
qgrid.set_grid_option('maxVisibleRows', 5)
qgrid.show_grid(agg_actions[['year_month', 'page_id'] + total_columns])
In [ ]:
display(md("""## C. Visualization of editor actions per month"""))
display(md(f"***Editor: {the_editor['name']}***"))
display(md("""In the following graph you can select the *date range* and *granularity* (yearly, montly) 
of the timeline (X-axis), and plot any of the follow counts in the black, red, blue and green lines:
   
- **adds**: number of first-time insertions
- **adds_surv_48h**: number of insertions for the first time that survived at least 48 hours
- **adds_persistent**:  number of insertions for the first time that survived until, at least, the end of the month
- **adds_stopword_count**:  number of insertions that were stop words
- **dels**: number of deletions
- **dels_surv_48h**: number of deletions that were not resinserted in the next 48 hours
- **dels_persistent**: number of deletions that were not resinserted until, at least, the end of the month
- **dels_stopword_count**: number of deletions that were stop words
- **reins**: number of reinsertions
- **reins_surv_48h**: number of reinsertionsthat survived at least 48 hours
- **reins_persistent**: number of reinsertionsthat survived until the end of the month
- **reins_stopword_count**: number of reinsertionsthat were stop words
"""))

# Listener
from visualization.actions_listener import ActionsListener
listener = ActionsListener(agg_actions)

actions = (agg_actions.loc[:,'total':'total_stopword_count'].columns.append(
    agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()


# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact, fixed
from ipywidgets.widgets import Dropdown

interact(listener.listen, 
         _range = get_date_slider_from_datetime(agg_actions['year_month']),
         editor=fixed('All'),
         granularity=Dropdown(options=['Yearly', 'Monthly'], value='Monthly'),
         black=Dropdown(options=actions, value='total'), 
         red=Dropdown(options= ['None'] + actions, value='total_surv_48h'),
         green=Dropdown(options= ['None'] + actions, value='None'), 
         blue=Dropdown(options= ['None'] + actions, value='None'))
In [ ]:
from IPython.display import HTML
from utils.notebooks import get_next_notebook

display(HTML(f'<a href="{get_next_notebook()}" target="_blank">Go to next workbook</a>'))