#!/usr/bin/env python # coding: utf-8 # In[ ]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('store', '-r the_page') if 'the_page' not in locals(): import pickle print("Loading default data...") the_page = pickle.load(open("data/the_page.p",'rb')) from IPython.display import display, Markdown as md display(md("---")) display(md(f"# A. Insertions, Deletions, Reinsertions (Actions)")) display(md(f" Provided by the [WikiWho API](https://www.wikiwho.net/en/api/v1.0.0-beta/)")) display(md(f"Please give the background processes time to load (see cog wheel symbol right of 'edit app') before interacting with the controls too often!")) display(md(f"***Page: {the_page['title']}***")) # In[ ]: #missing description of the action types, directly after the header, see the tocktrack paper for explanations # please add editor names instead of "editor id". for IPs just put "Unregistered editors" # Fails for larger articles like 'Evolution' on GESIS notebooks, Kernel dies when ranges are selected. Solution? # In[ ]: from wikiwho_wrapper import WikiWho import pandas as pd import qgrid # set the default max number of rows to 10 so the larger DataFrame we render don't take up to much space qgrid.set_grid_option('maxVisibleRows', 5) wikiwho = WikiWho(lng='en') agg_actions = wikiwho.dv.actions(the_page.page_id) # define total columns total_columns = ['total', 'total_surv_48h', 'total_persistent', 'total_stopword_count'] # add columns with the total actions agg_actions = agg_actions.join(pd.DataFrame( agg_actions.loc[:,'adds':'adds_stopword_count'].values +\ agg_actions.loc[:,'dels':'dels_stopword_count'].values +\ agg_actions.loc[:,'reins':'reins_stopword_count'].values, index=agg_actions.index, columns=total_columns )) qgrid.show_grid(agg_actions[['year_month', 'editor_id'] + total_columns]) # In[ ]: # Convert to datetime agg_actions['year_month'] = pd.to_datetime(agg_actions['year_month']) # Group the data by year month and page (drop the editor information) agg_actions.drop('editor_id', axis=1).groupby(['year_month','page_id']).sum() # Listener from visualization.actions_listener import ActionsListener listener = ActionsListener(agg_actions) action_types = (agg_actions.loc[:,'total':'total_stopword_count'].columns.append( agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist() # Visualization from utils.notebooks import get_date_slider_from_datetime from ipywidgets import interact, fixed from ipywidgets.widgets import Dropdown interact(listener.listen, _range = get_date_slider_from_datetime(agg_actions['year_month']), editor=fixed('All'), granularity=Dropdown(options=['Yearly', 'Monthly'], value='Yearly'), black=Dropdown(options=action_types, value='total'), red=Dropdown(options= ['None'] + action_types, value='total_surv_48h'), green=Dropdown(options= ['None'] + action_types, value='None'), blue=Dropdown(options= ['None'] + action_types, value='None')) # In[ ]: from IPython.display import display, Markdown as md display(md("---")) display(md(f"# B. Measuring conflict")) display(md(f'## B.1 Token Conflict Score')) display(md(f"***Page: {the_page['title']}***")) # In[ ]: # create the api from wikiwho_wrapper import WikiWho wikiwho = WikiWho(lng='en') from IPython.display import display, Markdown as md # Get the content and revisions from the wikiwho api display(md("Downloading all_content from the WikiWhoApi...")) all_content = wikiwho.dv.all_content(the_page['page_id']) display(md("Downloading revisions from the WikiWhoApi...")) revisions = wikiwho.dv.rev_ids_of_article(the_page['page_id']) from IPython.display import clear_output clear_output() # In[ ]: from metrics.conflict import ConflictManager from wikiwho_wrapper import WikiWho from IPython.display import clear_output from IPython.display import HTML from utils.notebooks import get_next_notebook, get_previous_notebook # call the calculator calculator = ConflictManager(all_content, revisions) calculator.calculate() clear_output() # display the tokens, the difference in seconds and its corresponding conflict score conflicts = calculator.conflicts.copy() conflicts['time_diff_secs'] = conflicts['time_diff'].dt.total_seconds() if len(conflicts) > 0: display(qgrid.show_grid(conflicts[[ 'action', 'token', 'token_id', 'rev_id', 'editor', 'time_diff_secs', 'conflict']].sort_values('conflict', ascending=False))) else: display(md(f'**There are no conflicting tokes in this page.**')) display(HTML(f'Go back to the previous workbook')) # In[ ]: # explain the columns of the table # In[ ]: from IPython.display import display, Markdown as md display(md("---")) display(md(f'## B.2 Conflicting tokens per page')) display(md(f"***Page: {the_page['title']}***")) # In[ ]: #explain: Eligible Actions, all actions, only conflicts, before the selection boxes and how the score is calculated (at least the basics) # explain what the colors in the tag cloud mean (why are insertions red and deletions blue, not the other way around?) # In[ ]: # listener from visualization.wordcloud_listener import WCListener listener = WCListener(sources = { 'All actions': calculator.all_actions, 'Eligible Actions': calculator.elegible_actions, 'Only Conflicts': calculator.conflicts }) # visualization from utils.notebooks import get_date_slider_from_datetime from ipywidgets import interact, fixed from ipywidgets.widgets import Dropdown interact(listener.listen, _range=get_date_slider_from_datetime(calculator.all_actions['rev_time']), source=Dropdown(options=list(listener.sources.keys()), value='Only Conflicts'), action=Dropdown(options=['Both', 'Just Insertions', 'Just Deletions'], value='Both'), editor=fixed('All')) # In[ ]: from IPython.display import display, Markdown as md display(md("---")) display(md(f'## B.3 Conflict Score and related metrics')) display(md(f"***Page: {the_page['title']}***")) # In[ ]: # Visualization from visualization.conflicts_listener import ConflictsListener elegible_actions = calculator.elegible_actions.copy() listener = ConflictsListener(elegible_actions) metrics = ['Conflict Score', 'Conflict Ratio', 'Total Conflicts', 'Total Elegible Actions', 'Total Actions', 'Total Time', 'Time per Elegible Action'] display(md(f'**Page conflict score: {calculator.get_page_conflict_score()}**')) # Visualization from utils.notebooks import get_date_slider_from_datetime from ipywidgets import interact from ipywidgets.widgets import Dropdown if (calculator.get_page_conflict_score() != 0): interact(listener.listen, _range = get_date_slider_from_datetime(elegible_actions['rev_time']), granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Monthly'), black=Dropdown(options=metrics, value='Conflict Score'), red=Dropdown(options= ['None'] + metrics, value='None')) # In[ ]: #explain difference betweet conflict ratioo, scores, total conflicts...time per el... # In[ ]: from IPython.display import display, Markdown as md display(md("---")) display(md(f'## B.4 Conflict Score per Editor')) display(md(f"***Page: {the_page['title']}***")) # In[ ]: editors_conflicts = calculator.get_conflict_score_per_editor() qg_obj = qgrid.show_grid(editors_conflicts) if len(editors_conflicts) > 0: display(qg_obj) else: display(md(f'**There is no Conflict Score**')) # In[ ]: from IPython.display import HTML from utils.notebooks import get_next_notebook, get_previous_notebook get_ipython().run_line_magic('store', 'agg_actions') get_ipython().run_line_magic('store', 'calculator') clear_output() if len(editors_conflicts) > 0: display(HTML(f'Go to next workbook')) else: display(HTML(f'Go back to the previous workbook'))