Author: Dee
Email: deetungsten@gmail.com
Date: December 12th, 2018
Version: 0.9
Purpose: For Pytorch Facebook Challenge Student-Mentor Program. Optimizes and pairs students and mentors automatically according to their experience and language.
!pip install deap
!pip install pydrive
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
# choose a local (colab) directory to store the data.
local_download_path = os.path.expanduser('~/data')
try:
os.makedirs(local_download_path)
except: pass
# 2. Auto-iterate using the query syntax
# https://developers.google.com/drive/v2/web/search-parameters
file_list = drive.ListFile(
{'q': "'REPLACE FOLDER ID HERE (KEEP PARENTHESIS)' in parents"}).GetList()
for f in file_list:
# 3. Create & download by id.
print('title: %s, id: %s' % (f['title'], f['id']))
fname = os.path.join(local_download_path, f['title'])
print('downloading to {}'.format(fname))
f_ = drive.CreateFile({'id': f['id']})
f_.GetContentFile(fname)
print("Files successfully imported from Google Drive")
Imports the CSV files and load them to a pandas dataframe
Sets the weights of importance of the traits (500 for language, 200 for skill difference)
Goes through all of the students and mentors and assign the relationship weights
import random
import numpy as np
from deap import base, creator, tools
import pandas as pd
language_weight = 500
skill_weights = 200
skill = {'Beginner':0, 'Intermediate':1, 'Advanced':2}
students = pd.read_csv("/root/data/students.csv")
mentors = pd.read_csv("/root/data/mentors.csv")
print("List of Students")
display(students)
print("List of Mentors")
display(mentors)
number_of_students = len(students)
number_of_mentors = len(mentors)
relationship_m = np.zeros((number_of_students, number_of_mentors))
for index_s, student in students.iterrows():
student_level = skill[student['Skill']]
for index_m, mentor in mentors.iterrows():
score = 0
if student['Language'] == mentor['Language']:
score += language_weight
mentor_level = skill[mentor['Skill']]
skill_difference = mentor_level - student_level
if skill_difference > 0:
score += skill_weights
relationship_m[index_s,index_m] = score
print("Relationship Matrix")
print(relationship_m)
List of Students
Name | Skill | Language | |
---|---|---|---|
0 | Test1 | Beginner | Chinese |
1 | Test2 | Intermediate | Italian |
2 | Test3 | Beginner | English |
3 | Test4 | Beginner | Vietnamese |
4 | Test5 | Intermediate | English |
List of Mentors
Name | Skill | Language | |
---|---|---|---|
0 | Test6 | Advanced | Chinese |
1 | Test7 | Advanced | Italian |
2 | Test8 | Intermediate | English |
3 | Test9 | Intermediate | Vietnamese |
4 | Test10 | Advanced | English |
[[700. 200. 200. 200. 200.] [200. 700. 0. 0. 200.] [200. 200. 700. 200. 700.] [200. 200. 200. 700. 200.] [200. 200. 500. 0. 700.]]
import array
import random
import json
import numpy
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
from random import shuffle
distance_map = relationship_m
IND_SIZE = students.shape[0]
toolbox = base.Toolbox()
creator.create("FitnessMin", base.Fitness, weights=(1.0,))
creator.create("Individual", list, typecode='i', fitness=creator.FitnessMin)
toolbox.register("indices1", random.sample, range(IND_SIZE), IND_SIZE)
toolbox.register("indices2", random.sample, range(IND_SIZE), IND_SIZE)
toolbox.register("individual", tools.initCycle, creator.Individual,
(toolbox.indices1, toolbox.indices2))
def myMutation(individual):
shuffle(individual[0])
shuffle(individual[1])
return (individual,)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
def evalrel(individual):
cost = 0
for gene1, gene2 in zip(individual[0], individual[1]):
cost += relationship_m[gene1][gene2]
return cost,
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("select", tools.selBest)
toolbox.register("evaluate", evalrel)
toolbox.register("mutate", myMutation)
random.seed(169)
NGEN = 50
MU = 50
LAMBDA = 100
CXPB = 0.7
MUTPB = 0.2
pop = toolbox.population(n=MU)
hof = tools.ParetoFront()
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", numpy.mean, axis=0)
stats.register("std", numpy.std, axis=0)
stats.register("min", numpy.min, axis=0)
stats.register("max", numpy.max, axis=0)
algorithms.eaMuPlusLambda(pop, toolbox, MU, LAMBDA, CXPB, MUTPB, NGEN, stats,
halloffame=hof)
gen nevals avg std min max 0 50 [1498.] [564.08864552] [600.] [2500.] 1 89 [2160.] [328.6335345] [1800.] [3500.] 2 87 [2428.] [249.03011866] [2100.] [3500.] 3 95 [2604.] [369.3020444] [2300.] [3500.] 4 90 [2732.] [394.68468427] [2500.] [3500.] 5 92 [2852.] [435.54104284] [2500.] [3500.] 6 89 [3024.] [434.30864601] [2500.] [3500.] 7 88 [3372.] [96.] [3300.] [3500.] 8 95 [3396.] [99.91996797] [3300.] [3500.] 9 91 [3456.] [82.84926071] [3300.] [3500.] 10 86 [3500.] [0.] [3500.] [3500.] 11 90 [3500.] [0.] [3500.] [3500.] 12 88 [3500.] [0.] [3500.] [3500.] 13 94 [3500.] [0.] [3500.] [3500.] 14 85 [3500.] [0.] [3500.] [3500.] 15 96 [3500.] [0.] [3500.] [3500.] 16 94 [3500.] [0.] [3500.] [3500.] 17 89 [3500.] [0.] [3500.] [3500.] 18 87 [3500.] [0.] [3500.] [3500.] 19 89 [3500.] [0.] [3500.] [3500.] 20 90 [3500.] [0.] [3500.] [3500.] 21 91 [3500.] [0.] [3500.] [3500.] 22 93 [3500.] [0.] [3500.] [3500.] 23 90 [3500.] [0.] [3500.] [3500.] 24 93 [3500.] [0.] [3500.] [3500.] 25 91 [3500.] [0.] [3500.] [3500.] 26 95 [3500.] [0.] [3500.] [3500.] 27 94 [3500.] [0.] [3500.] [3500.] 28 90 [3500.] [0.] [3500.] [3500.] 29 89 [3500.] [0.] [3500.] [3500.] 30 89 [3500.] [0.] [3500.] [3500.] 31 90 [3500.] [0.] [3500.] [3500.] 32 93 [3500.] [0.] [3500.] [3500.] 33 91 [3500.] [0.] [3500.] [3500.] 34 94 [3500.] [0.] [3500.] [3500.] 35 91 [3500.] [0.] [3500.] [3500.] 36 94 [3500.] [0.] [3500.] [3500.] 37 93 [3500.] [0.] [3500.] [3500.] 38 90 [3500.] [0.] [3500.] [3500.] 39 93 [3500.] [0.] [3500.] [3500.] 40 89 [3500.] [0.] [3500.] [3500.] 41 90 [3500.] [0.] [3500.] [3500.] 42 91 [3500.] [0.] [3500.] [3500.] 43 85 [3500.] [0.] [3500.] [3500.] 44 87 [3500.] [0.] [3500.] [3500.] 45 88 [3500.] [0.] [3500.] [3500.] 46 91 [3500.] [0.] [3500.] [3500.] 47 90 [3500.] [0.] [3500.] [3500.] 48 92 [3500.] [0.] [3500.] [3500.] 49 90 [3500.] [0.] [3500.] [3500.] 50 86 [3500.] [0.] [3500.] [3500.]
([[[1, 0, 4, 2, 3], [1, 0, 4, 2, 3]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[0, 2, 3, 4, 1], [0, 2, 3, 4, 1]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 0, 2], [4, 3, 1, 0, 2]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 0, 2], [4, 3, 1, 0, 2]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[1, 0, 4, 2, 3], [1, 0, 4, 2, 3]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[1, 0, 4, 2, 3], [1, 0, 4, 2, 3]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[1, 0, 4, 2, 3], [1, 0, 4, 2, 3]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[1, 0, 4, 2, 3], [1, 0, 4, 2, 3]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[0, 2, 3, 4, 1], [0, 2, 3, 4, 1]], [[0, 2, 3, 4, 1], [0, 2, 3, 4, 1]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 0, 2], [4, 3, 1, 0, 2]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]], [[4, 3, 1, 2, 0], [4, 3, 1, 2, 0]]], [{'avg': array([1498.]), 'gen': 0, 'max': array([2500.]), 'min': array([600.]), 'nevals': 50, 'std': array([564.08864552])}, {'avg': array([2160.]), 'gen': 1, 'max': array([3500.]), 'min': array([1800.]), 'nevals': 89, 'std': array([328.6335345])}, {'avg': array([2428.]), 'gen': 2, 'max': array([3500.]), 'min': array([2100.]), 'nevals': 87, 'std': array([249.03011866])}, {'avg': array([2604.]), 'gen': 3, 'max': array([3500.]), 'min': array([2300.]), 'nevals': 95, 'std': array([369.3020444])}, {'avg': array([2732.]), 'gen': 4, 'max': array([3500.]), 'min': array([2500.]), 'nevals': 90, 'std': array([394.68468427])}, {'avg': array([2852.]), 'gen': 5, 'max': array([3500.]), 'min': array([2500.]), 'nevals': 92, 'std': array([435.54104284])}, {'avg': array([3024.]), 'gen': 6, 'max': array([3500.]), 'min': array([2500.]), 'nevals': 89, 'std': array([434.30864601])}, {'avg': array([3372.]), 'gen': 7, 'max': array([3500.]), 'min': array([3300.]), 'nevals': 88, 'std': array([96.])}, {'avg': array([3396.]), 'gen': 8, 'max': array([3500.]), 'min': array([3300.]), 'nevals': 95, 'std': array([99.91996797])}, {'avg': array([3456.]), 'gen': 9, 'max': array([3500.]), 'min': array([3300.]), 'nevals': 91, 'std': array([82.84926071])}, {'avg': array([3500.]), 'gen': 10, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 86, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 11, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 90, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 12, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 88, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 13, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 94, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 14, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 85, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 15, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 96, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 16, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 94, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 17, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 89, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 18, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 87, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 19, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 89, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 20, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 90, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 21, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 91, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 22, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 93, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 23, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 90, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 24, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 93, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 25, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 91, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 26, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 95, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 27, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 94, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 28, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 90, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 29, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 89, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 30, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 89, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 31, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 90, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 32, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 93, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 33, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 91, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 34, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 94, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 35, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 91, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 36, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 94, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 37, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 93, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 38, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 90, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 39, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 93, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 40, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 89, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 41, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 90, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 42, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 91, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 43, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 85, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 44, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 87, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 45, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 88, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 46, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 91, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 47, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 90, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 48, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 92, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 49, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 90, 'std': array([0.])}, {'avg': array([3500.]), 'gen': 50, 'max': array([3500.]), 'min': array([3500.]), 'nevals': 86, 'std': array([0.])}])
print("MATCHED RESULTS")
student_list = []
mentors_list = []
language_list = []
for student_idx,mentor_idx in zip(hof.items[-1][0],hof.items[-1][1]):
flag_language = True if (students['Language'][student_idx] == mentors['Language'][mentor_idx]) else print("Language Error")
student_skill = skill[students['Skill'][student_idx]]
mentor_skill = skill[mentors['Skill'][student_idx]]
flag_skill = True if (mentor_skill >=student_skill) else print("Skill Error")
if flag_language and flag_skill:
student_list.append(students['Name'][student_idx])
mentors_list.append(mentors['Name'][mentor_idx])
language_list.append(mentors['Language'][mentor_idx])
result = pd.DataFrame({'Students':student_list, "Mentors":mentors_list, "Langugage":language_list})
display(result)
MATCHED RESULTS
Langugage | Mentors | Students | |
---|---|---|---|
0 | Italian | Test7 | Test2 |
1 | Chinese | Test6 | Test1 |
2 | English | Test10 | Test5 |
3 | English | Test8 | Test3 |
4 | Vietnamese | Test9 | Test4 |