This notebook creates a dashboard for evaluation of openness of scientific literature
It was submitted to the John Hunter Excellence in Plotting Contest 2020
Content under CC-BY-NC-SA 4.0 license
Code under GNU-GPL v3.0 license
© 2020 Serena Bonaretti
Create jupyter-flex parameters:
(i.e. the following cells are tagged as "parameters")
# title of the dashboard (cell tagged as "parameters")
flex_title = "Open Data, Open Software, and Open Access Publications in Knee Cartilage Segmentation Literature"
# add link to the source code in the top bar
flex_source_code = "https://github.com/sbonaretti/Hunter_viz_2020/blob/master/open_literature_flex.ipynb"
# layout
flex_orientation = "rows"
Imports:
Load the data:
# for Jupyter notebook and Binder (not JupyterLab)
#alt.renderers.enable('notebook')
# file name and zenodo url
file_name = "cart_segm_literature_viz.csv"
zenodo_url = "https://zenodo.org/record/3872040/files/" # are the last digits of the specific version of dataset DOI
# download
wget.download(zenodo_url + file_name, "./" + file_name) # input, output
# load literature table
literature = pd.read_csv("./" + file_name)
# replace underscore with space and opening bracket
literature["bibtex_id"] = literature["bibtex_id"].str.replace('_',' (')
# adding closing bracket
literature["bibtex_id"] = literature["bibtex_id"].astype(str) + ")"
# adding little randomness to latitude and longitude to avoid dot overlaps
np.random.seed(seed=3) # if this is not present, the cell is not reproducible
# add randomness to latitude
random_lat = np.random.uniform(low=0.0, high=2.5, size=(len(literature["latitude"]),))
literature["latitude_random"] = literature["latitude"] + pd.Series(random_lat)
# add randomness to longitude
random_lon = np.random.uniform(low=0.0, high=2.5, size=(len(literature["longitude"]),))
literature["longitude_random"] = literature["longitude"] + pd.Series(random_lon)
The following markdown cells corresponding to titles are used by jupyter-flex to create the sessions of the dashboard
Cells containing comments start with ->
-> Create the sidebar:
# declare the ipywidgets of the left sidebar
open_original_data = widgets.Checkbox(
value=False,
description='Open original data',
disabled=False,
indent=False
)
open_derived_data = widgets.Checkbox(
value=False,
description='Open derived data',
disabled=False,
indent=False
)
open_software = widgets.Checkbox(
value=False,
description='Open-source software',
disabled=False,
indent=False
)
open_access = widgets.Checkbox(
value=False,
description='Open access paper',
disabled=False,
indent=False
)
# combine the widgets in a vertical box
VBox([open_original_data, open_derived_data, open_software, open_access])
VBox(children=(Checkbox(value=False, description='Open original data', indent=False), Checkbox(value=False, de…
-> Create the literature map:
# decleare the widget where the map is going to be displayed
output_map = widgets.Output()
# create the output map using plotly
fig = go.FigureWidget()
fig = px.scatter_geo(literature, lat="latitude_random", lon= "longitude_random", projection="equirectangular",
hover_name = "bibtex_id" )
fig.data[0]['marker']['color'] = "black"
fig.data[0]['marker']['size'] = 8
margin = go.layout.Margin(l=20, r=20, b=20, t=20)
fig = fig.update_layout(margin=margin)
def on_value_change(change):
output_map.clear_output()
# update the map
with output_map:
flag_checkbox = 1
# select rows that match the criteria
# one checkbox is clicked
if (open_original_data.value == True) & (open_derived_data.value == False) & (open_software.value == False) & (open_access.value == False):
current_data = literature.query(' link_to_open_original_data != "not_available" ')
end_result_text = "open original data"
elif (open_original_data.value == False) & (open_derived_data.value == True) & (open_software.value == False) & (open_access.value == False):
current_data = literature.query(' link_to_open_derived_data != "not_available" ')
end_result_text = "open derived data"
elif (open_original_data.value == False) & (open_derived_data.value == False) & (open_software.value == True) & (open_access.value == False):
current_data = literature.query(' link_to_open_source_code != "not_available" ')
end_result_text = "open-source software"
elif (open_original_data.value == False) & (open_derived_data.value == False) & (open_software.value == False) & (open_access.value == True):
current_data = literature.query(' link_to_open_access != "not_available" ')
end_result_text = "open access publication"
# two checkboxes are clicked
elif (open_original_data.value == True) & (open_derived_data.value == True) & (open_software.value == False) & (open_access.value == False):
current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_derived_data != "not_available"')
end_result_text = "open original data and open derived data"
elif (open_original_data.value == True) & (open_derived_data.value == False) & (open_software.value == True) & (open_access.value == False):
current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_source_code != "not_available"')
end_result_text = "open original data and open-source code"
elif (open_original_data.value == True) & (open_derived_data.value == False) & (open_software.value == False) & (open_access.value == True):
current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_access != "not_available"')
end_result_text = "open original data and access publication"
elif (open_original_data.value == False) & (open_derived_data.value == True) & (open_software.value == True) & (open_access.value == False):
current_data = literature.query(' link_to_open_derived_data != "not_available" & link_to_open_source_code != "not_available"')
end_result_text = "open derived data and open source data"
elif (open_original_data.value == False) & (open_derived_data.value == True) & (open_software.value == False) & (open_access.value == True):
current_data = literature.query(' link_to_open_derived_data != "not_available" & link_to_open_access != "not_available"')
end_result_text = "open derived data and open access publication"
elif (open_original_data.value == False) & (open_derived_data.value == False) & (open_software.value == True) & (open_access.value == True):
current_data = literature.query(' link_to_open_source_code != "not_available" & link_to_open_access != "not_available"')
end_result_text = "open-source code and open access publication"
# # three checkboxes are clicked
elif (open_original_data.value == False) & (open_derived_data.value == True) & (open_software.value == True) & (open_access.value == True):
current_data = literature.query(' link_to_open_derived_data != "not_available" & link_to_open_source_code != "not_available" & link_to_open_access != "not_available"')
end_result_text = "open derived data, open-source code, and open access publication"
elif (open_original_data.value == True) & (open_derived_data.value == False) & (open_software.value == True) & (open_access.value == True):
current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_source_code != "not_available" & link_to_open_access != "not_available"')
end_result_text = "open original data, open-source code, and open access publication"
elif (open_original_data.value == True) & (open_derived_data.value == True) & (open_software.value == False) & (open_access.value == True):
current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_derived_data != "not_available" & link_to_open_access != "not_available"')
end_result_text = "open original data, open derived data, and open access publication"
elif (open_original_data.value == True) & (open_derived_data.value == True) & (open_software.value == True) & (open_access.value == False):
current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_derived_data != "not_available" & link_to_open_source_code != "not_available"')
end_result_text = "open original data, open derived data, and open-source code"
# all checkboxes are clicked
elif (open_original_data.value == True) & (open_derived_data.value == True) & (open_software.value == True) & (open_access.value == True):
current_data = literature.query(' link_to_open_original_data != "not_available" & link_to_open_derived_data != "not_available" & link_to_open_source_code != "not_available" & link_to_open_access != "not_available"')
end_result_text = "open original data, open derived data, open-source code, and open access publication"
# no checkbox is clicked
elif (open_original_data.value == False) & (open_derived_data.value == False) & (open_software.value == False) & (open_access.value == False):
current_data = literature # for output_table
flag_checkbox = 0
# create color list and transform it to a pandas series
color_list = ["black"]*literature.shape[0]
color_series = pd.Series(color_list)
# change color to red for cells satisfing the query (when at least a checkbox is clicked, i.e. exclude the last elif where color_flag == 1)
if flag_checkbox == 1:
color_series.loc[current_data.index.values] = "red"
# update the colors in the figure
fig.data[0]["marker"]['color'] = color_series
display(fig)
# update the table output
output_table.clear_output()
with output_table:
if flag_checkbox == 1:
# print out the text
if current_data.shape[0] == 0 or current_data.shape[0] == 1: # has
report.value = "Out of " + str(literature.shape[0]) + " papers, " + str(current_data.shape[0]) + " has " + end_result_text
else: # have
report.value = "Out of " + str(literature.shape[0]) + " papers, " + str(current_data.shape[0]) + " have " + end_result_text
# clear the table for print out
current_data = current_data.drop(["algorithm_type", "bibtex_id", "latitude", "longitude", "latitude_random", "longitude_random"], axis=1)
current_data = current_data.rename(columns={"author_1": "First Author", "country_last_author":"Country", "title": "Title", "year":"Year", "link_to_open_access":"Open Access Publication", \
"link_to_open_original_data": "Original Data", "link_to_open_derived_data": "Derived Data", "link_to_open_source_code":"Open Source Code" })
# print out the table
display (current_data)
else:
report.value = " "
# observe functions for the widgets
open_original_data.observe(on_value_change, names = "value")
open_derived_data.observe(on_value_change, names = "value")
open_software.observe(on_value_change, names = "value")
open_access.observe(on_value_change, names = "value")
# show output map
output_map
Output()
-> Write the outputs of the selections:
# create the widgets for the session
report = widgets.Label(" ") # it will contain the sentence about how many paper match a criteria
output_table = widgets.Output() # it will show the output table
# show the widgets
VBox([report, output_table])
VBox(children=(Label(value=' '), Output()))
-> Footer
-> Dependencies for reproducibility of this notebook
%load_ext watermark
# python, ipython, packages, and machine characteristics
%watermark -v -m -p wget,pandas,numpy,plotly,jupyter_flex,voila,watermark
CPython 3.7.6 IPython 7.13.0 wget 3.2 pandas 1.0.3 numpy 1.18.1 plotly 4.8.0 jupyter_flex 0.5.0 voila 0.1.21 watermark 2.0.2 compiler : Clang 4.0.1 (tags/RELEASE_401/final) system : Darwin release : 19.4.0 machine : x86_64 processor : i386 CPU cores : 4 interpreter: 64bit