alt text

Exam Scheduling with Graph Theory

In [1]:
#Running this cell displays a button to toggle hidden code
#From: http://chris-said.io/2016/02/13/how-to-make-polished-jupyter-presentations-with-optional-code-visibility/

from IPython.display import HTML

HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }
  
  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
Out[1]:

This notebook takes data containing student ID numbers and what classes they take in order to create an optimal (minimum number of days) exam schedule with no conflicts. The conditions for this schedule are:

  • There are two sessions for exams on each day - AM and PM.
  • No student will write two exams on the same day.
  • The exam space is limited to 150 students at any time.

This notebook works with data where column 1 contains the grade of the student, column 2 contains student numbers, and column 3 contains class names.

Exam Scheduling Code

In [2]:
'''
Setting up the tools we'll be using in this notebook:
'''

import networkx as nx
import grinpy as gp
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display, HTML
from collections import OrderedDict

%matplotlib inline
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-2-538ec4c05161> in <module>()
      4 
      5 import networkx as nx
----> 6 import grinpy as gp
      7 import pandas as pd
      8 import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'grinpy'
In [ ]:
'''
Reading the data into the notebook:
'''

#Opening excel file of data:
data = pd.read_excel('files/Exam Scheduling Input Data.xlsx', header = None)

#Putting the data into a pandas dataframe:
data_df = pd.DataFrame(data)
data_df.columns =['Grade','Student ID','Class']

#Displaying the data:
data_df.head()
In [ ]:
'''
Creating a dictionary that lists each student and their classes:
'''

#Getting the list of unique students:
students = data.iloc[:, 1].tolist()
students = set(students)

#Creating a dictionary that will be in the form: {student1 : [class1, class2], student2 : [class2, class4] ...}
student_dictionary = {}

#Iterates through each row in the data:
for row in data.itertuples():
    
    #If this row is a student not yet added to the dictionary:
    if row[2] not in student_dictionary:
        
        #Add this student and the class for this row
        student_dictionary[row[2]] = [row[3]]
            
    #If the student already has a dictionary entry:
    else:
        
        #Add the class to the value list:
        student_dictionary[row[2]].append(row[3])
        
#Converting student_dictionary to a pandas dataframe:
student_data = pd.DataFrame([(k, v) for k, v in student_dictionary.items()])
student_data.columns =['Student ID','Classes']

#Displaying the dataframe:
student_data.head()
In [3]:
'''
Creating a dictionary that lists each class and the number of students in that class:
'''

#Getting the list of unique classes
classes = data.iloc[:, 2].tolist()
classes = set(classes)

#Create a dictionary that will be in the form: {class1: number of students in class1, ...}
class_dictionary = {}

#Iterates through each row in the data:
for row in data.itertuples():
    
    #If this row is a class not yet added to the dictionary:
    if row[3] not in class_dictionary:
        
        #Add one to the number of students in this class:
        class_dictionary[row[3]] = 1
        
    #If the student already has a dictionary entry:
    else:
        
        #Add one to the number of students in this class:
        class_dictionary[row[3]] += 1
        
#Converting class_dictionary to a pandas dataframe:
class_data = pd.DataFrame([(k, v) for k, v in class_dictionary.items()])
class_data.columns =['Class','Number of Students']

#Displaying the dataframe:
class_data.head()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-e272099a047b> in <module>()
      4 
      5 #Getting the list of unique classes
----> 6 classes = data.iloc[:, 2].tolist()
      7 classes = set(classes)
      8 

NameError: name 'data' is not defined
In [4]:
'''
Creating the conflict graph, and counting the number of nodes and edges:
'''

#Creating graph G
G = nx.Graph()

#Adding each unique class as a node in the graph
for item in classes:
    G.add_node(item)
    
#Iterates through the dictionary of students, adding conflict edges between all the classes each student has:
for student in student_dictionary:
    
    #Iterates through all pairs of classes for the student:
    for x in range(0, len(student_dictionary[student])):
        for y in range (x+1, len(student_dictionary[student])):
            
            #Creates edge for conflict:
            G.add_edge(student_dictionary[student][x], student_dictionary[student][y])
            
#Finding number of nodes and edges in the conflict graph
node_count = G.number_of_nodes()
edge_count = G.number_of_edges()

G_data = {'Nodes': [node_count], 'Edges': [edge_count]}

G_df = pd.DataFrame(G_data)

#Displaying the dataframe:
display(HTML(G_df.to_html(index=False)))
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-1f22f210979d> in <module>()
      7 
      8 #Adding each unique class as a node in the graph
----> 9 for item in classes:
     10     G.add_node(item)
     11 

NameError: name 'classes' is not defined
In [7]:
'''
Draws the conflict graph:
'''

#Each node is a class, each edge is a scheduling conflict.
nx.draw_circular(G, with_labels=False, width = .5, node_color=range(29), node_size=800, cmap=plt.cm.Blues)
In [8]:
'''
Finds a possible graph colouring (not final). Colors are named 0, 1, 2, etc.
'''

sample_color = nx.greedy_color(G, strategy = 'random_sequential')

#Converting the colouring to a pandas dataframe:
color_data = pd.DataFrame([(k, v) for k, v in sample_color.items()])
color_data.columns =['Class','Colour']
In [9]:
'''
Computes and displays the final schedule:
'''

#Finding the chromatic number:
chro_num = gp.chromatic_number(G)

#The final solution will go into these lists:
final_solution_AM = []
final_solution_PM = []

#This variable is for the while loop below. If it is set to True, the loop will run.
#If it is set to False by the time the loop is completed, that means a working solution has been found.
#If it is set to True by the time the loop is completed, a solution was not found, and the loop will run again.
run = True

#This loop applies the random-greedy coloring algorithm until all conditions are met and a solution is found:
while run == True:
    
    #Resetting variables:
    final_solution_AM = []
    final_solution_PM = []
    run = False
    
    #Tries a random-greedy algorithm for coloring the graph:
    color = nx.greedy_color(G, strategy = 'random_sequential')

    #Checks that generated coloring has chromatic number of colors, sets loop to run again if it doesn't:
    for key in color:
        if color[key] >= chro_num:
            run = True
    
    #Checks that no single color has more than 300 students total taking an exam, sets loop to run again if failed:
    if run == False:
        
        student_count = []
        
        #Sets up student_count to be the appropriate size:
        for x in range(0, chro_num):
            student_count.append(0)
            
        for key in color:            
            student_count[color[key]] += class_dictionary[key]
            
        for x in student_count:
            if x >= 300:
                run = True
    
    
    #Tries to fill the AM/PM slots so that no more than 150 students are writing at a time:
    if run == False:
        
        #Setting up counts for the number of students in the gym during each time slot:
        student_count_AM = []
        student_count_PM = []
        
        #Setting up lists of classes for the solution (AM and PM slots), as well as the student count lists
        # so that they have one entry for every day.
        #The index will represent the day number (eg. the first set of classes will be for the first day, etc.)
        for x in range(0, chro_num):
            final_solution_AM.append([])
            final_solution_PM.append([])
            student_count_AM.append(0)
            student_count_PM.append(0)
        
        #Iterates through all the classes on a given day (or a given color),
        # in order to sort them into the AM and PM sessions:
        for x in range(0, chro_num): 
            for key in color:
                if color[key] == x:
                    
                    #Adds the class to the solution in the AM slot, if there is still room for it (less than 150 students):
                    if student_count_AM[x] + class_dictionary[key] <= 150:
                        
                        student_count_AM[x] += class_dictionary[key]
                        final_solution_AM[x].append(key)
                    
                    #If the AM session is full, the class is added to the PM slot:
                    else:
                        student_count_PM[x] += class_dictionary[key]
                        final_solution_PM[x].append(key)
            
            #If the PM session now has more than 150 students, this solution doesn't work, and the loop resets to try again:
            if student_count_PM[x] > 150:
                run = True            
            
color
Out[9]:
{'Science10': 0,
 'Calculus12': 0,
 'Physics11': 1,
 'Chemistry11Enriched': 2,
 'PreCalculus12': 3,
 'PreCalculus11': 3,
 'French9': 0,
 'BiologyLifeSciences11Enriched': 0,
 'Physics12': 1,
 'Chemistry11': 2,
 'Mathematics9': 1,
 'FoundationsofMathematics12': 0,
 'PreCalculus11Accelerated': 3,
 'Chemistry12': 2,
 'SocialStudies11': 4,
 'Science9': 2,
 'Science10Enriched': 0,
 'FoundationsofMathematics11': 0,
 'PreCalculus12Accelerated': 3,
 'Spanish9': 4,
 'English9': 5,
 'Law12': 4,
 'BiologyLifeSciences11': 5,
 'Geology12': 1,
 'History12': 6,
 'French9Enriched': 0,
 'FoundationsofMathPreCalc.10': 3,
 'SocialStudies9': 6,
 'BiologyAnatomyPhysiology12': 5}
In [10]:
#The rest of the code puts the schedule into a pandas dataframe, and applies some formatting:    
day_col = []
session_col = []
class_col = []
students_col = []

for day in range(0, chro_num):
    
    for class_ in final_solution_AM[day]:
        day_col.append("Day " + str(day+1) + ": " + str(student_count_AM[day]) + " / " + str(student_count_PM[day]) + " students")
        session_col.append("AM")
        class_col.append(class_)
        students_col.append(class_dictionary[class_])
    
    for class_ in final_solution_PM[day]:
        day_col.append("Day " + str(day+1) + ": " + str(student_count_AM[day]) + " / " + str(student_count_PM[day]) + " students")
        session_col.append("PM")
        class_col.append(class_)
        students_col.append(class_dictionary[class_])

d = {
    'Day': day_col,
    'Session': session_col,
    'Class': class_col,     
    'Students': students_col}

df = pd.DataFrame(d,columns=['Day','Session','Class','Students'])
df1=df.set_index(['Day', 'Session'])

df1
Out[10]:
Class Students
Day Session
Day 1: 146 / 104 students AM Science10 63
AM Calculus12 71
AM BiologyLifeSciences11Enriched 12
PM French9 35
PM FoundationsofMathematics12 18
PM Science10Enriched 22
PM FoundationsofMathematics11 12
PM French9Enriched 17
Day 2: 125 / 69 students AM Physics11 77
AM Physics12 34
AM Geology12 14
PM Mathematics9 69
Day 3: 133 / 124 students AM Chemistry11Enriched 56
AM Chemistry11 25
AM Chemistry12 52
PM Science9 124
Day 4: 144 / 127 students AM PreCalculus12 54
AM PreCalculus11 69
AM PreCalculus11Accelerated 21
PM PreCalculus12Accelerated 18
PM FoundationsofMathPreCalc.10 109
Day 5: 98 / 57 students AM SocialStudies11 40
AM Spanish9 58
PM Law12 57
Day 6: 141 / 43 students AM English9 117
AM BiologyLifeSciences11 24
PM BiologyAnatomyPhysiology12 43
Day 7: 48 / 124 students AM History12 48
PM SocialStudies9 124
In [ ]:
 

alt text