By running the code cell below, you can browse the csv-datafiles that can be found from CERN OpenData portal, CMS doc database and cms-opendata-education GitHub organisation. These files have been listed in a single file called 'csvDatabase.csv'. If you notice that a file is missing, please add it to the database.
Currently using this notebook you can filter the files by:
# -----------------------------------------------------------------------------
''' This program can be used to filter a csv-database full of csv-files that
contain data for CMS OpenData in education material. This program is helpful
if you want to search for a spesific kind of datafile for example a file that
contains data of more than 100 000 collision events.
To use this program, just run the code and start searching for files.
NOTE! This program only works on platforms that support ipywidgets. At the
time this was written, the widgets did not work on JupyterLab by default.
This program should work fine at least in Jupyter Notebook and MyBinder.
This code doesn't have error check, so the user input must be correct so that
the program doesn't crash.
If you have any questions, please contact the author.
Author: Juha Teuho, juha.teuho@gmail.com
'''
# -----------------------------------------------------------------------------
# Import modules and set display options.
# -----------------------------------------------------------------------------
import ipywidgets as widgets
import pandas as pd
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
pd.options.display.max_colwidth = 80
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
''' Read the csv file and save few datacolumns to variables.
The file must contain columns 'Name', 'n-events', 'ParentDataset',
'Source' and 'Parameters'
'''
file = pd.read_csv("csvDatabase.csv")
try:
filenames = file['Name']
events = file['n-events']
parent = file['ParentDataset']
sources = file['Source']
parameters = file['Parameters']
except:
print("Invalid file!")
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# Initiate variables and widgets.
# Create global lists to use buttons.
buttons = []
indeces = []
# Create text widget
t = widgets.Text()
# Create widgets for event filter.
min_event = widgets.Text(value="0",description='Min:')
max_event = widgets.Text(value="999999",description='Max:')
search_button = widgets.Button(description="Search",button_style='success')
event_widgets = [min_event, max_event, search_button]
# creates a checkbox widget with a desired description.
def make_cb(description):
return widgets.Checkbox(value=False,description=description,disabled=False)
# Create checkboxes for some parameters and filters.
checkboxes = [make_cb('M'), make_cb('(px,py,pz)'), make_cb('η'),
make_cb('One particle'),make_cb('Two particles'),
make_cb('Four particles')]
''' Define the parameters, which will be used to filter the files.
Invariant mass: if file contains parameter 'M'
Momentum: if file contains 'py' or 'py1'.
Not using "px", because there were inconsistencies with
those in the datafiles.
Pseudorapidity: if file contains 'eta'
One particle: if file contains 'Q'.
Two particles: if file contains 'Q2' but not 'Q4'
Four particles: if file contains 'Q4'
'''
checkbox_params = [['M'],['py','py1'],
['eta','eta1'],['Q'],
['Q2','Q4',True],['Q4']]
# Widget to display and hide program output.
out = widgets.Output(layout={'border': '1px solid black'})
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
''' FUNCTIONS
The functions are divided in five parts:
1: name search
2: event search
3: parameter search
4: button operation
5: other
'''
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# 1. Name search funtions
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def handle_submit(sender):
''' This function gets called, when the namesearch-button is pressed.
'''
namesearch(t.value)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def namesearch(text):
''' Searches for names according to the user input.
Prints the corresponding filenames as buttons.
'''
hide()
display_name_widgets()
for i in range(len(filenames)):
if text.lower() in filenames[i].lower():
create_buttons(i)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def display_name_widgets():
print("Enter the filename or part of it and press ENTER")
print("See more information of a datafile by clicking the name of the file.")
print("Note that the file information will appear at the end of the output!")
display(t)
t.on_submit(handle_submit)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# 2. Event search funtions
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def search_events(sender):
''' Search for the number of events according to the user input.
Prints the corresponding filenames and event number as buttons.
'''
hide()
display_event_widgets()
with out:
for i in range(len(events)):
if int(min_event.value) <= events[i] <= int(max_event.value):
create_buttons(i, True)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def display_event_widgets():
print("Choose the minimum and maximum number of events and click 'Search'.")
print("See more information of a datafile by clicking the name of the file.")
print("Note that the file information will appear at the end of the output!")
for widget in event_widgets:
display(widget)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# 3. Parameter search funtions
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
#
def check_params(paramlist):
''' Checks if chosen parameters are in the files.
Returns a list of indices in the original csv-file.
At those indices the parameter conditions are met.
Input: paramlist (2, 3 or 4 parameters)
params[0]: list of string, where searches are done
params[1]: a spesific pattern to be searched
params[2]: another pattern to be searched
params[3]: True or False depending on if both params[1]
and params[2] should be in params or params[1]
is and params[2] is not
Output: match
list of indeces in params[0] on which the conditions were met
'''
params = parameters
param1 = paramlist[0]
try:
param2 = paramlist[1]
except:
param2=None
try:
inverse = paramlist[2]
except:
inverse=False
match = []
if param2 and not inverse:
for i in range(len(params)):
if param1 in params[i] or param2 in params[i]:
match.append(i)
elif param2 and inverse:
for i in range(len(params)):
if param1 in params[i] and param2 not in params[i]:
match.append(i)
else:
for i in range(len(params)):
if param1 in params[i]:
match.append(i)
return match
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def parameter_search(change):
''' Search for the parameter according to the user selection.
Prints the corresponding filenames as buttons.
'''
hide()
display_checkboxes()
chosen = []
for i in range(len(checkboxes)): #Loop over all checkboxes
if checkboxes[i].value: # If checkbox is checked
chosen_box = check_params(checkbox_params[i])
if chosen:
chosen = [value for value in chosen if value in chosen_box]
else:
chosen = chosen_box
if chosen:
for index in chosen:
create_buttons(index)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def display_checkboxes():
print("Filter by parameter. For the number of particles, choose only one at a time to avoid errors.")
print("See more information of a datafile by clicking the name of the file.")
print("Note that the file information will appear at the end of the output!")
for box in checkboxes:
display(box)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# 4. Button operation funtions
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def create_buttons(i, print_events=False):
''' Creates and displays buttons
'''
global buttons
global indeces
if print_events:
des = str(filenames[i] + " " + str(events[i]))
newbutton = widgets.ToggleButton(layout=Layout(width='initial'),
description=des)
else:
newbutton = widgets.ToggleButton(layout=Layout(width='initial'),
description=filenames[i])
display(newbutton)
# If button is clicked, call function 'button_click'
newbutton.observe(button_click)
# Append buttons to list to know which button was clicked
buttons.append(newbutton)
# Append indeces to know, which files correspond to buttons
indeces.append(i)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def button_click(change):
''' When a button is clicked, it prints out the information corresponding
to that file.
'''
global buttons
for i in range(len(buttons)):
if buttons[i].value == True:
buttons[i].value = False
print_info(i)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# 5. Other funtions
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def hide():
''' Clears all output and global lists
'''
out.clear_output()
buttons = []
indeces = []
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def print_info(i):
''' Prints the information from row i in the database.
'''
print("Filename:",filenames[indeces[i]])
print("Events:",events[indeces[i]])
print("Parent dataset:",parent[indeces[i]])
print("Source:",sources[indeces[i]])
print("Parameters:",parameters[indeces[i]],"\n")
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
def dropdown_menu(change):
''' The change in user interface, when a spesific value on dropdown menu
is selected.
'''
if change['type'] == 'change' and change['name'] == 'value':
if change['new'] == 'Name':
hide()
with out:
display_name_widgets()
elif change['new'] == 'Events':
hide()
with out:
display_event_widgets()
search_button.on_click(search_events)
elif change['new'] == 'Parameters':
hide()
with out:
display_checkboxes()
for box in checkboxes:
box.observe(parameter_search)
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
''' Create the user interface and display it
'''
w = widgets.Dropdown(options=['Name', 'Events', 'Parameters'],
value=None, description='Filter by:')
w.observe(dropdown_menu)
display(w)
display(out)
# -----------------------------------------------------------------------------