#!/usr/bin/env python
# coding: utf-8
# In[ ]:
get_ipython().run_line_magic('load_ext', 'autoreload')
get_ipython().run_line_magic('autoreload', '2')
get_ipython().run_line_magic('store', '-r the_page')
if 'the_page' not in locals():
import pickle
print("Loading default data...")
the_page = pickle.load(open("data/the_page.p",'rb'))
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# A. Insertions, Deletions, Reinsertions (Actions)"))
display(md(f" Provided by the [WikiWho API](https://www.wikiwho.net/en/api/v1.0.0-beta/)"))
display(md(f"Please give the background processes time to load (see cog wheel symbol right of 'edit app') before interacting with the controls too often!"))
display(md(f"***Page: {the_page['title']}***"))
# In[ ]:
#missing description of the action types, directly after the header, see the tocktrack paper for explanations
# please add editor names instead of "editor id". for IPs just put "Unregistered editors"
# Fails for larger articles like 'Evolution' on GESIS notebooks, Kernel dies when ranges are selected. Solution?
# In[ ]:
from wikiwho_wrapper import WikiWho
import pandas as pd
import qgrid
# set the default max number of rows to 10 so the larger DataFrame we render don't take up to much space
qgrid.set_grid_option('maxVisibleRows', 5)
wikiwho = WikiWho(lng='en')
agg_actions = wikiwho.dv.actions(the_page.page_id)
# define total columns
total_columns = ['total', 'total_surv_48h', 'total_persistent', 'total_stopword_count']
# add columns with the total actions
agg_actions = agg_actions.join(pd.DataFrame(
agg_actions.loc[:,'adds':'adds_stopword_count'].values +\
agg_actions.loc[:,'dels':'dels_stopword_count'].values +\
agg_actions.loc[:,'reins':'reins_stopword_count'].values,
index=agg_actions.index,
columns=total_columns
))
qgrid.show_grid(agg_actions[['year_month', 'editor_id'] + total_columns])
# In[ ]:
# Convert to datetime
agg_actions['year_month'] = pd.to_datetime(agg_actions['year_month'])
# Group the data by year month and page (drop the editor information)
agg_actions.drop('editor_id', axis=1).groupby(['year_month','page_id']).sum()
# Listener
from visualization.actions_listener import ActionsListener
listener = ActionsListener(agg_actions)
action_types = (agg_actions.loc[:,'total':'total_stopword_count'].columns.append(
agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()
# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact, fixed
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range = get_date_slider_from_datetime(agg_actions['year_month']),
editor=fixed('All'),
granularity=Dropdown(options=['Yearly', 'Monthly'], value='Yearly'),
black=Dropdown(options=action_types, value='total'),
red=Dropdown(options= ['None'] + action_types, value='total_surv_48h'),
green=Dropdown(options= ['None'] + action_types, value='None'),
blue=Dropdown(options= ['None'] + action_types, value='None'))
# In[ ]:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f"# B. Measuring conflict"))
display(md(f'## B.1 Token Conflict Score'))
display(md(f"***Page: {the_page['title']}***"))
# In[ ]:
# create the api
from wikiwho_wrapper import WikiWho
wikiwho = WikiWho(lng='en')
from IPython.display import display, Markdown as md
# Get the content and revisions from the wikiwho api
display(md("Downloading all_content from the WikiWhoApi..."))
all_content = wikiwho.dv.all_content(the_page['page_id'])
display(md("Downloading revisions from the WikiWhoApi..."))
revisions = wikiwho.dv.rev_ids_of_article(the_page['page_id'])
from IPython.display import clear_output
clear_output()
# In[ ]:
from metrics.conflict import ConflictManager
from wikiwho_wrapper import WikiWho
from IPython.display import clear_output
from IPython.display import HTML
from utils.notebooks import get_next_notebook, get_previous_notebook
# call the calculator
calculator = ConflictManager(all_content, revisions)
calculator.calculate()
clear_output()
# display the tokens, the difference in seconds and its corresponding conflict score
conflicts = calculator.conflicts.copy()
conflicts['time_diff_secs'] = conflicts['time_diff'].dt.total_seconds()
if len(conflicts) > 0:
display(qgrid.show_grid(conflicts[[
'action', 'token', 'token_id', 'rev_id',
'editor', 'time_diff_secs', 'conflict']].sort_values('conflict', ascending=False)))
else:
display(md(f'**There are no conflicting tokes in this page.**'))
display(HTML(f'Go back to the previous workbook'))
# In[ ]:
# explain the columns of the table
# In[ ]:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f'## B.2 Conflicting tokens per page'))
display(md(f"***Page: {the_page['title']}***"))
# In[ ]:
#explain: Eligible Actions, all actions, only conflicts, before the selection boxes and how the score is calculated (at least the basics)
# explain what the colors in the tag cloud mean (why are insertions red and deletions blue, not the other way around?)
# In[ ]:
# listener
from visualization.wordcloud_listener import WCListener
listener = WCListener(sources = {
'All actions': calculator.all_actions,
'Eligible Actions': calculator.elegible_actions,
'Only Conflicts': calculator.conflicts
})
# visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact, fixed
from ipywidgets.widgets import Dropdown
interact(listener.listen,
_range=get_date_slider_from_datetime(calculator.all_actions['rev_time']),
source=Dropdown(options=list(listener.sources.keys()), value='Only Conflicts'),
action=Dropdown(options=['Both', 'Just Insertions', 'Just Deletions'], value='Both'),
editor=fixed('All'))
# In[ ]:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f'## B.3 Conflict Score and related metrics'))
display(md(f"***Page: {the_page['title']}***"))
# In[ ]:
# Visualization
from visualization.conflicts_listener import ConflictsListener
elegible_actions = calculator.elegible_actions.copy()
listener = ConflictsListener(elegible_actions)
metrics = ['Conflict Score', 'Conflict Ratio', 'Total Conflicts',
'Total Elegible Actions', 'Total Actions', 'Total Time',
'Time per Elegible Action']
display(md(f'**Page conflict score: {calculator.get_page_conflict_score()}**'))
# Visualization
from utils.notebooks import get_date_slider_from_datetime
from ipywidgets import interact
from ipywidgets.widgets import Dropdown
if (calculator.get_page_conflict_score() != 0):
interact(listener.listen,
_range = get_date_slider_from_datetime(elegible_actions['rev_time']),
granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Monthly'),
black=Dropdown(options=metrics, value='Conflict Score'),
red=Dropdown(options= ['None'] + metrics, value='None'))
# In[ ]:
#explain difference betweet conflict ratioo, scores, total conflicts...time per el...
# In[ ]:
from IPython.display import display, Markdown as md
display(md("---"))
display(md(f'## B.4 Conflict Score per Editor'))
display(md(f"***Page: {the_page['title']}***"))
# In[ ]:
editors_conflicts = calculator.get_conflict_score_per_editor()
qg_obj = qgrid.show_grid(editors_conflicts)
if len(editors_conflicts) > 0:
display(qg_obj)
else:
display(md(f'**There is no Conflict Score**'))
# In[ ]:
from IPython.display import HTML
from utils.notebooks import get_next_notebook, get_previous_notebook
get_ipython().run_line_magic('store', 'agg_actions')
get_ipython().run_line_magic('store', 'calculator')
clear_output()
if len(editors_conflicts) > 0:
display(HTML(f'Go to next workbook'))
else:
display(HTML(f'Go back to the previous workbook'))