Notebook

In [ ]:

%load_ext autoreload
%autoreload 2

%store -r the_page
%store -r agg_actions
%store -r calculator

if ('the_page' not in locals() or 
    'agg_actions' not in locals() or 
    'calculator' not in locals()):
    import pickle
    print("Loading default data...")
    the_page = pickle.load(open("data/the_page.p",'rb'))
    agg_actions = pickle.load(open("data/agg_actions.p",'rb'))
    calculator = pickle.load(open("data/calculator.p",'rb'))
    
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# A.  Select a conflicting editor"))
display(md(f"***Page: {the_page['title']}***"))

In [ ]:

#same explanation of terms related to conflict missing as in notebook 2
#user 'MelbourneStar' in the 'Barack_Obama' article shows weird behavior in the first two line graphs, with total actions in the first one (per month) being higher than in the second one (see december 2014). also, in the first one, why are total and conflict_score shown in the graph when they are 0? and how can there be a tag cloud with conflicted words if conflict score = 0 all the time?  

In [ ]:

from external.wikipedia import WikipediaDV, WikipediaAPI
# Grab the editors conflicts
editors_conflicts = calculator.get_conflict_score_per_editor()
elegible_actions = calculator.elegible_actions

# Grab user names from wikipedia and merge them to the editors_conflict dataframe
from external.wikipedia import WikipediaDV, WikipediaAPI
wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))

editors = wikipedia_dv.get_editors([int(x) for x in editors_conflicts.index if x.isdigit()])
editors['userid'] = editors['userid'].astype('str')

editors_conflicts = editors[['userid','name','registration']].merge( editors_conflicts, 
         right_index=True, left_on='userid',how='left').set_index('userid')

In [ ]:

from visualization.conflicts_listener import ConflictsListener

def display_conflict_score(eleg_actions):
    from visualization.conflicts_listener import ConflictsListener
    listener = ConflictsListener(eleg_actions)

    metrics = ['Conflict Score', 'Conflict Ratio', 'Total Conflicts', 
               'Total Elegible Actions', 'Total Actions', 'Total Time',
              'Time per Elegible Action']

    display(md(f'**Page conflict score: {calculator.get_page_conflict_score()}**'))

    # Visualization
    from utils.notebooks import get_date_slider_from_datetime
    from ipywidgets import interact
    from ipywidgets.widgets import Dropdown

    interact(listener.listen,
             _range = get_date_slider_from_datetime(eleg_actions['rev_time']),
             granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),
             black=Dropdown(options=metrics, value='Conflict Score'),
             red=Dropdown(options= ['None'] + metrics, value='None'))

def select_editor(editor):
    global the_editor
    global editor_inputname
    editor_inputname=editor
    
    wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))
    try:
        the_editor = wikipedia_dv.get_editor(int(editor_inputname))
    except:
        the_editor = wikipedia_dv.get_editor(editor_inputname[2:])

    with out:
        %store the_editor
        %store editor_inputname

        clear_output()
        display(md("### Current Selection:"))
        
        if 'invalid' in the_editor:
            display(f"The editor {editor_inputname} was not found, try a different editor")
        else:
            # display the data that will be passed to the next notebook
            display(the_editor.to_frame('values'))
            display(md(f"#### Evolution of the Conflict Score of *{the_editor['name']}*"))
 
            display_conflict_score(elegible_actions[elegible_actions['editor'] == editor_inputname].copy())


def on_selection_change(change):

    try:
        select_editor(qg_obj.get_selected_df().iloc[0].name)
    except:
        print('Problem parsing the name. Execute the cell again and try a different editor.')

import qgrid
qgrid.set_grid_option('maxVisibleRows', 5)
qg_obj = qgrid.show_grid(editors_conflicts)
qg_obj.observe(on_selection_change, names=['_selected_rows'])
                       
from ipywidgets import Output
from IPython.display import display, clear_output, Markdown as md
display(md("### Select one editor row for the next notebook:"))
display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))
display(qg_obj)
out = Output()
display(out)

# select an editor that does not contain 0| at the beginning
for ed in editors_conflicts.index:
    if ed[:2] != '0|':
        select_editor(ed)
        break

                      

In [ ]:

from ipywidgets import  widgets
from IPython.display import display, Javascript

def run_below(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))

button = widgets.Button(description="Refresh Notebook", button_style='info', min_width=500)
button.on_click(run_below)
display(button)

In [ ]:

from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# B. History of editor on a page"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"***Editor: {the_editor['name']}***"))

In [ ]:

editor_agg_actions = agg_actions[agg_actions['editor_id']==the_editor.userid]

#Listener
from visualization.actions_listener import ActionsListener
listener = ActionsListener(editor_agg_actions)
actions = (editor_agg_actions.loc[:,'total':'total_stopword_count'].columns.append(
    editor_agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()

# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact, fixed
from ipywidgets.widgets import Dropdown

interact(listener.listen, 
         _range = get_date_slider_from_datetime(agg_actions['year_month']),
         editor=fixed('All'),
         granularity=Dropdown(options=['Yearly', 'Monthly'], value='Monthly'),
         black=Dropdown(options=actions, value='total'), 
         red=Dropdown(options= ['None'] + actions, value='total_surv_48h'),
         green=Dropdown(options= ['None'] + actions, value='None'), 
         blue=Dropdown(options= ['None'] + actions, value='None'))
         

In [ ]:

from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# C. Tokens that enter into conflict with other editors"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"***Editor: {the_editor['name']}***"))

In [ ]:

sources = {
    'All actions': calculator.all_actions[calculator.all_actions['editor']==editor_inputname],
    'Elegible Actions': calculator.elegible_actions[calculator.elegible_actions['editor']==editor_inputname],
    'Only Conflicts': calculator.conflicts[calculator.conflicts['editor']==editor_inputname],
}

# listener
from visualization.wordcloud_listener import WCListener

listener = WCListener(sources)

# visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact, fixed
from ipywidgets.widgets import Dropdown

interact(listener.listen, 
         _range=get_date_slider_from_datetime(calculator.all_actions['rev_time']),
         source=Dropdown(options=list(listener.sources.keys()), value='Only Conflicts'),
         action=Dropdown(options=['Both', 'Just Insertions', 'Just Deletions'], value='Both'),
         editor=fixed('All'))

In [ ]:

from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# D. Tokens on the page owned by the editor"))
display(md(f"***Page: {the_page['title']}***"))
display(md(f"***Editor: {the_editor['name']}***"))

In [ ]:

from visualization.owned_listener import OwnedListener
owned = calculator.all_actions
listener = OwnedListener(owned, editor_inputname)

traces = ['Tokens Owned', 'Tokens Owned (%)']

# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact
from ipywidgets.widgets import Dropdown

interact(listener.listen,
         _range = get_date_slider_from_datetime(owned['rev_time']),
         granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),
         trace=Dropdown(options=traces, value='Tokens Owned (%)'))

In [ ]:

from IPython.display import HTML
from utils.notebooks import get_next_notebook, get_previous_notebook

editor_actions = calculator.elegible_actions[calculator.elegible_actions['editor']==editor_inputname]

if len(editor_actions) > 0:
    display(HTML(f'<a href="{get_next_notebook()}" target="_blank">Go to next workbook</a>'))
else:
    display(HTML(f'<a href="{get_previous_notebook()}" target="_blank">Go back to the previous workbook</a>'))