In this notebook you find the code developed for the experimental section of our paper [1].
The reminder of this notebook goes as follows:
import sys, os, math, re, gzip, xml.etree.ElementTree, urllib.request, plotly
import pandas as pd
import numpy as np
import scipy.stats as stats
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from tqdm import tqdm_notebook
from scipy import signal
from bs4 import BeautifulSoup as bs
plotly.offline.init_notebook_mode(connected=True)
From TREC (trec.nist.gov) we download the dataset. This dataset consists of:
However, in order to download the search results you need to request username and password following the instructions you find on this link.
if not os.path.isfile('qrels.txt'):
url_qrels = "https://trec.nist.gov/data/session/2014/judgments.txt"
urllib.request.urlretrieve(url_qrels, "qrels.txt")
print(url_qrels)
else:
print('qrels.txt file exists')
https://trec.nist.gov/data/session/2014/judgments.txt
if not os.path.isfile('session_to_topic.txt'):
url_session_to_topic = "https://trec.nist.gov/data/session/2014/session-topic-mapping.txt"
!rm -f session_to_topic.txt
urllib.request.urlretrieve(url_session_to_topic, "session_to_topic.txt")
print(url_session_to_topic)
else:
print('session_to_topic.txt file exists')
https://trec.nist.gov/data/session/2014/session-topic-mapping.txt
if not os.path.isfile('sessions.xml'):
url_sessions = "https://trec.nist.gov/data/session/2014/sessiontrack2014.xml.gz"
urllib.request.urlretrieve(url_sessions, "sessions.xml.gz")
!gunzip sessions.xml.gz
print(url_sessions)
else:
print('sessions.xml file exists')
https://trec.nist.gov/data/session/2014/sessiontrack2014.xml.gz
username = "username"
password = "password"
if not os.path.isdir('session') or len(os.listdir('session')) == 0:
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
top_level_url = "https://trec.nist.gov/results"
password_mgr.add_password(None, top_level_url, username, password)
handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
opener = urllib.request.build_opener(handler)
urllib.request.install_opener(opener)
!rm -f -r session
!mkdir session
runs_page = opener.open("https://trec.nist.gov/results/trec23/session.input.html")
soup = bs(runs_page)
for link in soup.findAll('a', attrs={'href': re.compile(".*\.gz")}):
run_path = link.get("href")
url_run = "https://trec.nist.gov/results/trec23/"+run_path
print(url_run)
urllib.request.urlretrieve(url_run, run_path)
else:
print('a session folder exists and is not empty')
https://trec.nist.gov/results/trec23/./session/input-ECxCGxPRF.RL1.gz https://trec.nist.gov/results/trec23/./session/input-ECxCGxPRF.RL2.gz https://trec.nist.gov/results/trec23/./session/input-ECxCGxPRF.RL3.gz https://trec.nist.gov/results/trec23/./session/input-ECxSRMxOS.RL1.gz https://trec.nist.gov/results/trec23/./session/input-ECxSRMxOS.RL2.gz https://trec.nist.gov/results/trec23/./session/input-ECxSRMxOS.RL3.gz https://trec.nist.gov/results/trec23/./session/input-ECxSRMxPRF.RL1.gz https://trec.nist.gov/results/trec23/./session/input-ECxSRMxPRF.RL2.gz https://trec.nist.gov/results/trec23/./session/input-ECxSRMxPRF.RL3.gz https://trec.nist.gov/results/trec23/./session/input-GUS14Run1.RL1.gz https://trec.nist.gov/results/trec23/./session/input-GUS14Run1.RL2.gz https://trec.nist.gov/results/trec23/./session/input-GUS14Run1.RL3.gz https://trec.nist.gov/results/trec23/./session/input-GUS14Run2.RL1.gz https://trec.nist.gov/results/trec23/./session/input-GUS14Run2.RL2.gz https://trec.nist.gov/results/trec23/./session/input-GUS14Run2.RL3.gz https://trec.nist.gov/results/trec23/./session/input-GUS14Run3.RL1.gz https://trec.nist.gov/results/trec23/./session/input-GUS14Run3.RL2.gz https://trec.nist.gov/results/trec23/./session/input-GUS14Run3.RL3.gz https://trec.nist.gov/results/trec23/./session/input-ICTNET14SER1.RL1.gz https://trec.nist.gov/results/trec23/./session/input-ICTNET14SER1.RL2.gz https://trec.nist.gov/results/trec23/./session/input-ICTNET14SER1.RL3.gz https://trec.nist.gov/results/trec23/./session/input-ICTNET14SER2.RL1.gz https://trec.nist.gov/results/trec23/./session/input-ICTNET14SER2.RL2.gz https://trec.nist.gov/results/trec23/./session/input-ICTNET14SER2.RL3.gz https://trec.nist.gov/results/trec23/./session/input-ICTNET14SER3.RL1.gz https://trec.nist.gov/results/trec23/./session/input-ICTNET14SER3.RL2.gz https://trec.nist.gov/results/trec23/./session/input-ICTNET14SER3.RL3.gz https://trec.nist.gov/results/trec23/./session/input-RAMA.RL1.gz https://trec.nist.gov/results/trec23/./session/input-RAMA.RL2.gz https://trec.nist.gov/results/trec23/./session/input-RAMA.RL3.gz https://trec.nist.gov/results/trec23/./session/input-SCIAITeamC.RL1.gz https://trec.nist.gov/results/trec23/./session/input-SCIAITeamC.RL2.gz https://trec.nist.gov/results/trec23/./session/input-SCIAITeamF.RL1.gz https://trec.nist.gov/results/trec23/./session/input-SCIAITeamF.RL2.gz https://trec.nist.gov/results/trec23/./session/input-SCIAITeamL.RL1.gz https://trec.nist.gov/results/trec23/./session/input-SCIAITeamL.RL2.gz https://trec.nist.gov/results/trec23/./session/input-UMASS1.RL1.gz https://trec.nist.gov/results/trec23/./session/input-UMASS1.RL2.gz https://trec.nist.gov/results/trec23/./session/input-UMASS1.RL3.gz https://trec.nist.gov/results/trec23/./session/input-UMASS2.RL1.gz https://trec.nist.gov/results/trec23/./session/input-UMASS2.RL2.gz https://trec.nist.gov/results/trec23/./session/input-UMASS2.RL3.gz https://trec.nist.gov/results/trec23/./session/input-UMASS3.RL1.gz https://trec.nist.gov/results/trec23/./session/input-UMASS3.RL2.gz https://trec.nist.gov/results/trec23/./session/input-UMASS3.RL3.gz https://trec.nist.gov/results/trec23/./session/input-UMASS4.RL1.gz https://trec.nist.gov/results/trec23/./session/input-UMASS4.RL2.gz https://trec.nist.gov/results/trec23/./session/input-UMASS4.RL3.gz https://trec.nist.gov/results/trec23/./session/input-WLZNTJU.RL1.gz https://trec.nist.gov/results/trec23/./session/input-WLZNTJU.RL2.gz https://trec.nist.gov/results/trec23/./session/input-WLZNTJU.RL3.gz https://trec.nist.gov/results/trec23/./session/input-ecnusession1.RL1.gz https://trec.nist.gov/results/trec23/./session/input-ecnusession1.RL2.gz https://trec.nist.gov/results/trec23/./session/input-ecnusession1.RL3.gz https://trec.nist.gov/results/trec23/./session/input-ecnusession2.RL1.gz https://trec.nist.gov/results/trec23/./session/input-ecnusession2.RL2.gz https://trec.nist.gov/results/trec23/./session/input-ecnusession2.RL3.gz https://trec.nist.gov/results/trec23/./session/input-ecnusession3.RL1.gz https://trec.nist.gov/results/trec23/./session/input-ecnusession3.RL2.gz https://trec.nist.gov/results/trec23/./session/input-ecnusession3.RL3.gz https://trec.nist.gov/results/trec23/./session/input-uclbaseline.RL1.gz https://trec.nist.gov/results/trec23/./session/input-udel14Run1.RL1.gz https://trec.nist.gov/results/trec23/./session/input-udel14Run1.RL3.gz https://trec.nist.gov/results/trec23/./session/input-udelitu.RL1.gz https://trec.nist.gov/results/trec23/./session/input-webis2014act.RL1.gz https://trec.nist.gov/results/trec23/./session/input-webis2014act.RL2.gz https://trec.nist.gov/results/trec23/./session/input-webis2014act.RL3.gz https://trec.nist.gov/results/trec23/./session/input-webis2014db.RL1.gz https://trec.nist.gov/results/trec23/./session/input-webis2014db.RL2.gz https://trec.nist.gov/results/trec23/./session/input-webis2014db.RL3.gz https://trec.nist.gov/results/trec23/./session/input-webisSt14ax.RL1.gz https://trec.nist.gov/results/trec23/./session/input-webisSt14ax.RL2.gz https://trec.nist.gov/results/trec23/./session/input-webisSt14ax.RL3.gz
qrels = {}
with open("qrels.txt") as file:
for line in tqdm_notebook(file.readlines()):
elem = line.split(" ")
topic = int(elem[0])
document = elem[2]
rel = int(elem[3])
if not topic in qrels:
qrels[topic] = {}
if not document in qrels[topic]:
qrels[topic][document] = {}
qrels[topic][document][0] = rel
HBox(children=(IntProgress(value=0, max=16949), HTML(value='')))
session_qrels_remap = {}
with open("session_to_topic.txt") as file:
for line in tqdm_notebook(file.readlines()):
elem = re.split("\s+", line.strip())
if not elem[0].startswith('#') and len(elem) >= 2:
session = int(elem[0])
topic = int(elem[1])
session_qrels_remap[session] = topic
HBox(children=(IntProgress(value=0, max=101), HTML(value='')))
Each session is represented by a list of strings having the following meaning:
q-{n}: The session has started. n is the number of judged relevant documents for the topic associated to this session;
r: The user has reformulated the query;
e-{r}: The user has examined the snippet or clicked on the document link. r is 1 when the document is relevant to the topic and 0 otherwise;
n-{r}: The user has not examined the snippet or clicked on the document link. r is 1 when the document is relevant to the topic and 0 otherwise;
f: The end of the session.
xml_sessions = xml.etree.ElementTree.parse("sessions.xml").getroot()
sessions = {}
for xml_session in tqdm_notebook(xml_sessions.findall('session')):
session = int(xml_session.get('num'))
topic = 0
if session in session_qrels_remap:
topic = session_qrels_remap[session]
max_reformulate = 0
rels_found = set()
sessions[session] = []
for xml_interaction in xml_session.findall('interaction'):
interaction = int(xml_interaction.get("num"))
interaction_type = xml_interaction.get("type")
if interaction == 1:
if topic > 0:
n_rel = 0
for doc in qrels[topic]:
if qrels[topic][doc][0] > 0:
n_rel += 1
sessions[session].append("q-"+str(n_rel))
else:
sessions[session].append("q")
max_reformulate = 0
elif interaction_type == "reformulate":
sessions[session].append("r")
max_reformulate +=1
rels = {}
if topic > 0:
for xml_result in xml_interaction.find("results").findall("result"):
rank = int(xml_result.get("rank"))
document = xml_result.find("clueweb12id").text
rels[rank] = 0
if document in qrels[topic] and document not in rels_found and qrels[topic][document][0] > 0:
#rels_found.add(document)
rels[rank] = 1
max_rank = -1
for xml_result in xml_interaction.find("results").findall("result"):
max_rank = int(xml_result.get("rank"))
break
if xml_interaction.find("clicked"):
for xml_click in xml_interaction.find("clicked").findall("click"):
max_rank = int(xml_click.find("rank").text)
for xml_result in xml_interaction.find("results").findall("result"):
rank = int(xml_result.get("rank"))
if topic > 0:
if rank <= max_rank:
if rels[rank] > 0:
sessions[session].append("e-1")
else:
sessions[session].append("e-0")
else:
if rels[rank] > 0:
sessions[session].append("n-1")
else:
sessions[session].append("n-0")
else:
if rank <= max_rank:
sessions[session].append("e")
else:
sessions[session].append("n")
is_e = False
i = len(sessions[session]) - 1
while i >= 0:
if sessions[session][i] == "q" or sessions[session][i] == "r":
break
elif sessions[session][i].startswith("e"):
is_e = True
elif is_e:
sessions[session][i] = sessions[session][i].replace("n", "e")
i-=1
sessions[session].append("f")
HBox(children=(IntProgress(value=0, max=1257), HTML(value='')))
runs = {}
for path, dirs, files in os.walk('./session'):
for run_name in tqdm_notebook(files):
run_path = "session/"+run_name
with gzip.open(run_path, "rb") as file:
runs[run_name] = {}
for line in file:
elem = str(line).split(" ")[0:4]
session = int(elem[0].split("'")[1])
topic = 0
if session in session_qrels_remap:
topic = session_qrels_remap[session]
if topic > 0 and topic in qrels:
if session not in runs[run_name]:
runs[run_name][session] = []
document = elem[2]
rank = elem[3]
rel = 0
if document in qrels[topic] and qrels[topic][document][0] > 0:
rel = 1
if rank == 1:
runs[run_name][session].append("r")
runs[run_name][session].append("n-" + str(rel))
HBox(children=(IntProgress(value=0, max=73), HTML(value='')))
max_reformulation = 0
max_rank = 0
for session in sessions:
m = 0
n = 0
for event in range(len(sessions[session])):
if sessions[session][event].startswith("q"):
m = 0
n = 0
if sessions[session][event].startswith("e"):
n += 1
if sessions[session][event] == "r":
m += 1
n = 0
if m > max_reformulation:
max_reformulation = m
if n > max_rank:
max_rank = n
count = [[0] * max_rank for i in range(max_reformulation+1)]
for session in sessions:
m = 0
n = 0
for event in range(len(sessions[session])):
if sessions[session][event].startswith("q"):
m = 0
n = 0
if sessions[session][event].startswith("e"):
count[m][n] += 1
n += 1
if sessions[session][event] == "r":
m += 1
n = 0
df = pd.DataFrame(count)
df = df/df.values.sum()
gt = df.values
df.transpose()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.159816 | 0.096758 | 0.069824 | 0.046209 | 0.028593 | 0.014935 | 0.008552 | 0.004468 | 0.002042 | 0.001532 | 0.000766 | 0.000638 | 0.000511 | 0.000255 | 0.000128 |
1 | 0.042890 | 0.028976 | 0.015445 | 0.009574 | 0.005489 | 0.002553 | 0.001149 | 0.000638 | 0.000255 | 0.000128 | 0.000000 | 0.000128 | 0.000128 | 0.000128 | 0.000128 |
2 | 0.032550 | 0.021828 | 0.012254 | 0.008042 | 0.004468 | 0.001915 | 0.000894 | 0.000638 | 0.000255 | 0.000128 | 0.000000 | 0.000128 | 0.000128 | 0.000128 | 0.000128 |
3 | 0.024636 | 0.017871 | 0.009701 | 0.006255 | 0.003829 | 0.001532 | 0.000766 | 0.000511 | 0.000255 | 0.000128 | 0.000000 | 0.000000 | 0.000128 | 0.000000 | 0.000000 |
4 | 0.019403 | 0.014680 | 0.008552 | 0.004978 | 0.003319 | 0.001404 | 0.000638 | 0.000383 | 0.000255 | 0.000128 | 0.000000 | 0.000000 | 0.000128 | 0.000000 | 0.000000 |
5 | 0.016211 | 0.011999 | 0.007021 | 0.004212 | 0.002681 | 0.001149 | 0.000638 | 0.000255 | 0.000255 | 0.000128 | 0.000000 | 0.000000 | 0.000128 | 0.000000 | 0.000000 |
6 | 0.013531 | 0.010212 | 0.006127 | 0.003702 | 0.002681 | 0.001149 | 0.000638 | 0.000255 | 0.000255 | 0.000000 | 0.000000 | 0.000000 | 0.000128 | 0.000000 | 0.000000 |
7 | 0.011105 | 0.008552 | 0.005234 | 0.003319 | 0.002681 | 0.001021 | 0.000638 | 0.000255 | 0.000255 | 0.000000 | 0.000000 | 0.000000 | 0.000128 | 0.000000 | 0.000000 |
8 | 0.008808 | 0.007276 | 0.004595 | 0.003319 | 0.002553 | 0.000894 | 0.000638 | 0.000128 | 0.000255 | 0.000000 | 0.000000 | 0.000000 | 0.000128 | 0.000000 | 0.000000 |
9 | 0.006255 | 0.005744 | 0.004212 | 0.003064 | 0.002553 | 0.000894 | 0.000511 | 0.000128 | 0.000255 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
10 | 0.004978 | 0.004468 | 0.003702 | 0.002808 | 0.002170 | 0.000894 | 0.000511 | 0.000128 | 0.000255 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
11 | 0.002042 | 0.001149 | 0.000766 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
12 | 0.001915 | 0.001149 | 0.000766 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
13 | 0.001787 | 0.001149 | 0.000766 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
14 | 0.001787 | 0.001149 | 0.000766 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
15 | 0.001787 | 0.001149 | 0.000766 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
16 | 0.001659 | 0.000894 | 0.000638 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
17 | 0.001659 | 0.000894 | 0.000638 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
18 | 0.001659 | 0.000766 | 0.000638 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
19 | 0.001532 | 0.000766 | 0.000638 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
20 | 0.001532 | 0.000638 | 0.000638 | 0.000766 | 0.000511 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
21 | 0.000511 | 0.000255 | 0.000128 | 0.000128 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
22 | 0.000383 | 0.000255 | 0.000128 | 0.000128 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
23 | 0.000383 | 0.000255 | 0.000128 | 0.000128 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
24 | 0.000383 | 0.000255 | 0.000128 | 0.000128 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
25 | 0.000383 | 0.000255 | 0.000128 | 0.000128 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
26 | 0.000383 | 0.000255 | 0.000128 | 0.000128 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
27 | 0.000383 | 0.000255 | 0.000128 | 0.000128 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
28 | 0.000383 | 0.000255 | 0.000128 | 0.000128 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
29 | 0.000383 | 0.000255 | 0.000128 | 0.000128 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
31 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
32 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
33 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
34 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
35 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
36 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
37 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
38 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
39 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
40 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
41 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
42 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
43 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
44 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
45 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
46 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
47 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
48 | 0.000128 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
49 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
50 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
51 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
52 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
53 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
54 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
55 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
56 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
57 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
58 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
59 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
60 | 0.000000 | 0.000128 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
61 rows × 15 columns
We do the same but this time assuming every query as part of its own session, i.e. queries and reformulations are treated independently.
count = [0] * max_rank
for session in sessions:
m = 0
n = 0
for event in range(len(sessions[session])):
if sessions[session][event].startswith("q"):
m = 0
n = 0
if sessions[session][event].startswith("e"):
count[n] += 1
n += 1
if sessions[session][event] == "r":
m += 1
n = 0
df = pd.DataFrame(count)
df = df/df.values.sum()
nt = df.values
df
0 | |
---|---|
0 | 0.435027 |
1 | 0.107608 |
2 | 0.083482 |
3 | 0.065611 |
4 | 0.053868 |
5 | 0.044677 |
6 | 0.038678 |
7 | 0.033189 |
8 | 0.028593 |
9 | 0.023615 |
10 | 0.019913 |
11 | 0.005361 |
12 | 0.005234 |
13 | 0.005106 |
14 | 0.005106 |
15 | 0.005106 |
16 | 0.004595 |
17 | 0.004595 |
18 | 0.004468 |
19 | 0.004340 |
20 | 0.004212 |
21 | 0.001276 |
22 | 0.001149 |
23 | 0.001149 |
24 | 0.001149 |
25 | 0.001149 |
26 | 0.001149 |
27 | 0.001149 |
28 | 0.001149 |
29 | 0.001149 |
... | ... |
31 | 0.000255 |
32 | 0.000255 |
33 | 0.000255 |
34 | 0.000255 |
35 | 0.000255 |
36 | 0.000255 |
37 | 0.000255 |
38 | 0.000255 |
39 | 0.000255 |
40 | 0.000255 |
41 | 0.000255 |
42 | 0.000255 |
43 | 0.000255 |
44 | 0.000255 |
45 | 0.000255 |
46 | 0.000255 |
47 | 0.000255 |
48 | 0.000255 |
49 | 0.000128 |
50 | 0.000128 |
51 | 0.000128 |
52 | 0.000128 |
53 | 0.000128 |
54 | 0.000128 |
55 | 0.000128 |
56 | 0.000128 |
57 | 0.000128 |
58 | 0.000128 |
59 | 0.000128 |
60 | 0.000128 |
61 rows × 1 columns
Here we define the discount functions for sDCG and sRBP.
def d_sdcg(bq, b, m, n):
return 1.0/((1.0 + math.log(m+1, bq))*math.log(n+2, b))
def d_srbp(b, p, m, n):
def spow(base, exp):
if base == 0 and exp == 0:
return 1.0
else:
return base**exp
if b == 1.0 and p == 1.0:
if m == 0:
return spow(b*p, n)
else:
return 0.0
else:
return spow((p - b*p)/(1.0 - b*p), m)*spow(b*p, n)
We now fit the discount functions on the observed user behaviours using as a loss function the total squared error (TSE).
M = gt.shape[0]
N = gt.shape[1]
min_err = sys.float_info.max
res = 100
best_p = -1
best_b = -1
grid = {}
def tse_srbp(b, p):
norm = 0.0
for i in range(M*N):
norm += d_srbp(b, p, i//N, i%N)
err = 0.0
for i in range(M*N):
err += (gt[i//N, i%N]-d_srbp(b, p, i//N, i%N)/norm)**2
return err
t = tqdm_notebook(np.linspace(0, 1, res+1), desc = "TSE: {:.4f}".format(min_err))
for p_i, p in enumerate(t):
grid[p_i] = []
for b in np.linspace(0, 1, res+1):
err = tse_srbp(b, p)
grid[p_i].append(err)
if err < min_err:
min_err = err
best_p = p
best_b = b
t.set_description("TSE: {:.4f}".format(min_err))
grid = pd.DataFrame.from_dict(grid)
srbp_best_b = best_b
srbp_best_p = best_p
print("b =", best_b, ", p =", best_p, ", TSE = {:.4f}".format(min_err))
HBox(children=(IntProgress(value=0, description='TSE: 17976931348623157081452742373170435679807056752584499659…
b = 0.64 , p = 0.86 , TSE = 0.0046
x_min = []
y_min = []
z_min = []
g_min = sys.float_info.max
for i in range(grid.shape[0]):
_z_min = sys.float_info.max
for j in range(grid.shape[1]):
if grid[i][j] < _z_min:
_z_min = grid[i][j]
_x_min = i
_y_min = j
count_min = 0
for j in range(grid.shape[1]):
if grid[i][j] == _z_min:
count_min += 1
if count_min == 1:
x_min.append(_x_min/res)
y_min.append(_y_min/res)
z_min.append(_z_min)
g_min = _z_min
x_min_p = x_min
y_min_p = y_min
z_min_p = z_min
g_min_p = g_min
x_min = []
y_min = []
z_min = []
g_min = sys.float_info.max
for j in range(grid.shape[0]):
_z_min = sys.float_info.max
for i in range(grid.shape[1]):
if grid[i][j] < _z_min:
_z_min = grid[i][j]
_x_min = i
_y_min = j
count_min = 0
for i in range(grid.shape[1]):
if grid[i][j] == _z_min:
count_min += 1
if count_min == 1:# and _z_min < g_min:
x_min.append(_x_min/res)
y_min.append(_y_min/res)
z_min.append(_z_min)
g_min = _z_min
x_min_b = x_min
y_min_b = y_min
z_min_b = z_min
g_min_b = g_min
data = [
go.Surface(x=np.linspace(0, 1, res+1), y=np.linspace(0, 1, res+1), z=grid.values,
colorscale='Viridis', reversescale=True),
go.Scatter3d(x=signal.savgol_filter(x_min_p, 7, 1), y=signal.savgol_filter(y_min_p, 7, 1), z=z_min_p,
mode='lines',
line=dict(color='black', width=5),
showlegend=False),
go.Scatter3d(x=signal.savgol_filter(x_min_b, 7, 1), y=signal.savgol_filter(y_min_b, 7, 1), z=z_min_b,
mode='lines',
line=dict(color='black', width=5),
showlegend=False)]
layout = go.Layout(
scene=dict(
xaxis=dict(title='p', showbackground=True, backgroundcolor='rgb(230, 230,230)', gridcolor='rgb(255, 255, 255)', zerolinecolor='rgb(255, 255, 255)'),
yaxis=dict(title='b', showbackground=True, backgroundcolor='rgb(230, 230,230)', gridcolor='rgb(255, 255, 255)', zerolinecolor='rgb(255, 255, 255)'),
zaxis=dict(title='TSE', showbackground=True, backgroundcolor='rgb(230, 230,230)', gridcolor='rgb(255, 255, 255)', zerolinecolor='rgb(255, 255, 255)')))
fig = go.Figure(data=data, layout=layout)
iplot(fig)
best_p_i = -1
best_b_i = -1
for p_i, p in enumerate(np.linspace(0, 1, res+1)):
for b_i, b in enumerate(np.linspace(0, 1, res+1)):
if min_err == grid.iat[p_i, b_i]:
best_p_i = p_i
best_b_i = b_i
def plot_line(name_x, x, name_y, y):
trace = go.Scatter(
x = x,
y = y)
layout = go.Layout(
xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text=name_x)),
yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text=name_y)))
data = [trace]
fig = go.Figure(data=data, layout=layout)
iplot(fig)
y = grid.values[best_p_i]
x = np.linspace(0, 1, res+1)
plot_line("b", x, "TSE", y)
y = grid.values[:,best_b_i]
x = np.linspace(0, 1, res+1)
plot_line("p", x, "TSE", y)
dist_srbp = [[0] * N for i in range(M)]
norm = 0.0
for m in range(M):
for n in range(N):
dist_srbp[m][n] = d_srbp(srbp_best_b, srbp_best_p, m, n)
norm += dist_srbp[m][n]
dist_srbp = pd.DataFrame(dist_srbp)
dist_srbp = dist_srbp/norm
dist_srbp.transpose()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.405216e-01 | 9.676489e-02 | 6.663348e-02 | 4.588462e-02 | 3.159670e-02 | 2.175787e-02 | 1.498273e-02 | 1.031729e-02 | 7.104612e-03 | 4.892322e-03 | 3.368912e-03 | 2.319874e-03 | 1.597493e-03 | 1.100053e-03 | 7.575098e-04 |
1 | 7.734311e-02 | 5.325940e-02 | 3.667507e-02 | 2.525489e-02 | 1.739083e-02 | 1.197553e-02 | 8.246497e-03 | 5.678638e-03 | 3.910379e-03 | 2.692734e-03 | 1.854249e-03 | 1.276858e-03 | 8.792602e-04 | 6.054692e-04 | 4.169334e-04 |
2 | 4.256965e-02 | 2.931397e-02 | 2.018596e-02 | 1.390029e-02 | 9.571910e-03 | 6.591333e-03 | 4.538872e-03 | 3.125522e-03 | 2.152272e-03 | 1.482081e-03 | 1.020579e-03 | 7.027829e-04 | 4.839448e-04 | 3.332502e-04 | 2.294801e-04 |
3 | 2.343033e-02 | 1.613441e-02 | 1.111035e-02 | 7.650722e-03 | 5.268380e-03 | 3.627870e-03 | 2.498195e-03 | 1.720287e-03 | 1.184611e-03 | 8.157373e-04 | 5.617265e-04 | 3.868117e-04 | 2.663632e-04 | 1.834209e-04 | 1.263059e-04 |
4 | 1.289606e-02 | 8.880380e-03 | 6.115137e-03 | 4.210957e-03 | 2.899716e-03 | 1.996780e-03 | 1.375007e-03 | 9.468462e-04 | 6.520097e-04 | 4.489818e-04 | 3.091743e-04 | 2.129012e-04 | 1.466063e-04 | 1.009549e-04 | 6.951875e-05 |
5 | 7.097989e-03 | 4.887761e-03 | 3.365771e-03 | 2.317711e-03 | 1.596004e-03 | 1.099027e-03 | 7.568036e-04 | 5.211441e-04 | 3.588662e-04 | 2.471196e-04 | 1.701695e-04 | 1.171808e-04 | 8.069212e-05 | 5.556557e-05 | 3.826312e-05 |
6 | 3.906733e-03 | 2.690224e-03 | 1.852521e-03 | 1.275668e-03 | 8.784405e-04 | 6.049047e-04 | 4.165447e-04 | 2.868377e-04 | 1.975199e-04 | 1.360146e-04 | 9.366131e-05 | 6.449631e-05 | 4.441294e-05 | 3.058329e-05 | 2.106002e-05 |
7 | 2.150266e-03 | 1.480699e-03 | 1.019627e-03 | 7.021277e-04 | 4.834936e-04 | 3.329396e-04 | 2.292662e-04 | 1.578755e-04 | 1.087150e-04 | 7.486244e-05 | 5.155118e-05 | 3.549877e-05 | 2.444488e-05 | 1.683304e-05 | 1.159144e-05 |
8 | 1.183506e-03 | 8.149768e-04 | 5.612029e-04 | 3.864511e-04 | 2.661149e-04 | 1.832499e-04 | 1.261881e-04 | 8.689467e-05 | 5.983672e-05 | 4.120429e-05 | 2.837377e-05 | 1.953852e-05 | 1.345446e-05 | 9.264906e-06 | 6.379926e-06 |
9 | 6.514019e-04 | 4.485632e-04 | 3.088861e-04 | 2.127027e-04 | 1.464696e-04 | 1.008608e-04 | 6.945394e-05 | 4.782683e-05 | 3.293413e-05 | 2.267884e-05 | 1.561692e-05 | 1.075400e-05 | 7.405337e-06 | 5.099404e-06 | 3.511512e-06 |
10 | 3.585316e-04 | 2.468892e-04 | 1.700109e-04 | 1.170716e-04 | 8.061689e-05 | 5.551376e-05 | 3.822745e-05 | 2.632388e-05 | 1.812695e-05 | 1.248243e-05 | 8.595555e-06 | 5.919003e-06 | 4.075897e-06 | 2.806712e-06 | 1.932736e-06 |
11 | 1.973358e-04 | 1.358878e-04 | 9.357399e-05 | 6.443618e-05 | 4.437154e-05 | 3.055478e-05 | 2.104039e-05 | 1.448867e-05 | 9.977071e-06 | 6.870332e-06 | 4.730994e-06 | 3.257819e-06 | 2.243374e-06 | 1.544814e-06 | 1.063778e-06 |
12 | 1.086136e-04 | 7.479265e-05 | 5.150313e-05 | 3.546568e-05 | 2.442209e-05 | 1.681735e-05 | 1.158063e-05 | 7.974562e-06 | 5.491380e-06 | 3.781431e-06 | 2.603939e-06 | 1.793104e-06 | 1.234753e-06 | 8.502658e-07 | 5.855033e-07 |
13 | 5.978094e-05 | 4.116588e-05 | 2.834732e-05 | 1.952031e-05 | 1.344192e-05 | 9.256269e-06 | 6.373979e-06 | 4.389199e-06 | 3.022455e-06 | 2.081299e-06 | 1.433208e-06 | 9.869243e-07 | 6.796080e-07 | 4.679863e-07 | 3.222610e-07 |
14 | 3.290343e-05 | 2.265770e-05 | 1.560236e-05 | 1.074398e-05 | 7.398433e-06 | 5.094650e-06 | 3.508238e-06 | 2.415815e-06 | 1.663559e-06 | 1.145547e-06 | 7.888376e-07 | 5.432031e-07 | 3.740563e-07 | 2.575797e-07 | 1.773725e-07 |
15 | 1.811005e-05 | 1.247080e-05 | 8.587542e-06 | 5.913485e-06 | 4.072097e-06 | 2.804096e-06 | 1.930934e-06 | 1.329665e-06 | 9.156231e-07 | 6.305092e-07 | 4.341762e-07 | 2.989790e-07 | 2.058806e-07 | 1.417718e-07 | 9.762581e-08 |
16 | 9.967769e-06 | 6.863927e-06 | 4.726583e-06 | 3.254782e-06 | 2.241282e-06 | 1.543374e-06 | 1.062786e-06 | 7.318474e-07 | 5.039590e-07 | 3.470322e-07 | 2.389706e-07 | 1.645580e-07 | 1.133167e-07 | 7.803123e-08 | 5.373325e-08 |
17 | 5.486260e-06 | 3.777905e-06 | 2.601511e-06 | 1.791432e-06 | 1.233602e-06 | 8.494731e-07 | 5.849575e-07 | 4.028088e-07 | 2.773790e-07 | 1.910065e-07 | 1.315294e-07 | 9.057275e-08 | 6.236949e-08 | 4.294839e-08 | 2.957478e-08 |
18 | 3.019638e-06 | 2.079359e-06 | 1.431872e-06 | 9.860042e-07 | 6.789745e-07 | 4.675500e-07 | 3.219606e-07 | 2.217060e-07 | 1.526694e-07 | 1.051300e-07 | 7.239379e-08 | 4.985124e-08 | 3.432817e-08 | 2.363879e-08 | 1.627796e-08 |
19 | 1.662009e-06 | 1.144479e-06 | 7.881022e-07 | 5.426967e-07 | 3.737075e-07 | 2.573395e-07 | 1.772071e-07 | 1.220270e-07 | 8.402924e-08 | 5.786355e-08 | 3.984554e-08 | 2.743812e-08 | 1.889422e-08 | 1.301079e-08 | 8.959388e-09 |
20 | 9.147695e-07 | 6.299214e-07 | 4.337715e-07 | 2.987003e-07 | 2.056886e-07 | 1.416397e-07 | 9.753480e-08 | 6.716364e-08 | 4.624970e-08 | 3.184810e-08 | 2.193099e-08 | 1.510194e-08 | 1.039938e-08 | 7.161139e-09 | 4.931247e-09 |
21 | 5.034891e-07 | 3.467087e-07 | 2.387478e-07 | 1.644046e-07 | 1.132110e-07 | 7.795848e-08 | 5.368315e-08 | 3.696687e-08 | 2.545583e-08 | 1.752919e-08 | 1.207082e-08 | 8.312109e-09 | 5.723819e-09 | 3.941491e-09 | 2.714158e-09 |
22 | 2.771204e-07 | 1.908285e-07 | 1.314068e-07 | 9.048831e-08 | 6.231135e-08 | 4.290835e-08 | 2.954721e-08 | 2.034656e-08 | 1.401089e-08 | 9.648069e-09 | 6.643777e-09 | 4.574985e-09 | 3.150390e-09 | 2.169397e-09 | 1.493873e-09 |
23 | 1.525271e-07 | 1.050320e-07 | 7.232630e-08 | 4.980477e-08 | 3.429616e-08 | 2.361675e-08 | 1.626278e-08 | 1.119875e-08 | 7.711594e-09 | 5.310297e-09 | 3.656735e-09 | 2.518072e-09 | 1.733975e-09 | 1.194036e-09 | 8.222276e-10 |
24 | 8.395091e-08 | 5.780961e-08 | 3.980840e-08 | 2.741254e-08 | 1.887661e-08 | 1.299866e-08 | 8.951036e-09 | 6.163791e-09 | 4.244461e-09 | 2.922787e-09 | 2.012667e-09 | 1.385947e-09 | 9.543797e-10 | 6.571974e-10 | 4.525541e-10 |
25 | 4.620658e-08 | 3.181841e-08 | 2.191054e-08 | 1.508786e-08 | 1.038969e-08 | 7.154463e-09 | 4.926650e-09 | 3.392551e-09 | 2.336152e-09 | 1.608702e-09 | 1.107772e-09 | 7.628251e-10 | 5.252906e-10 | 3.617214e-10 | 2.490858e-10 |
26 | 2.543210e-08 | 1.751285e-08 | 1.205956e-08 | 8.304360e-09 | 5.718483e-09 | 3.937817e-09 | 2.711628e-09 | 1.867260e-09 | 1.285818e-09 | 8.854297e-10 | 6.097176e-10 | 4.198589e-10 | 2.891199e-10 | 1.990915e-10 | 1.370968e-10 |
27 | 1.399783e-08 | 9.639074e-09 | 6.637583e-09 | 4.570720e-09 | 3.147453e-09 | 2.167374e-09 | 1.492480e-09 | 1.027740e-09 | 7.077141e-10 | 4.873405e-10 | 3.355886e-10 | 2.310903e-10 | 1.591316e-10 | 1.095800e-10 | 7.545808e-11 |
28 | 7.704405e-09 | 5.305346e-09 | 3.653326e-09 | 2.515724e-09 | 1.732358e-09 | 1.192923e-09 | 8.214611e-10 | 5.656680e-10 | 3.895258e-10 | 2.682322e-10 | 1.847079e-10 | 1.271921e-10 | 8.758604e-11 | 6.031281e-11 | 4.153213e-11 |
29 | 4.240504e-09 | 2.920063e-09 | 2.010790e-09 | 1.384655e-09 | 9.534899e-10 | 6.565847e-10 | 4.521322e-10 | 3.113437e-10 | 2.143950e-10 | 1.476350e-10 | 1.016633e-10 | 7.000655e-11 | 4.820736e-11 | 3.319617e-11 | 2.285928e-11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
31 | 1.284619e-09 | 8.846042e-10 | 6.091492e-10 | 4.194675e-10 | 2.888504e-10 | 1.989059e-10 | 1.369690e-10 | 9.431851e-11 | 6.494886e-11 | 4.472457e-11 | 3.079788e-11 | 2.120779e-11 | 1.460394e-11 | 1.005645e-11 | 6.924995e-12 |
32 | 7.070543e-10 | 4.868862e-10 | 3.352757e-10 | 2.308749e-10 | 1.589833e-10 | 1.094778e-10 | 7.538773e-11 | 5.191291e-11 | 3.574785e-11 | 2.461641e-11 | 1.695115e-11 | 1.167277e-11 | 8.038011e-12 | 5.535071e-12 | 3.811517e-12 |
33 | 3.891627e-10 | 2.679821e-10 | 1.845357e-10 | 1.270735e-10 | 8.750438e-11 | 6.025658e-11 | 4.149341e-11 | 2.857286e-11 | 1.967562e-11 | 1.354887e-11 | 9.329915e-12 | 6.424693e-12 | 4.424121e-12 | 3.046503e-12 | 2.097859e-12 |
34 | 2.141952e-10 | 1.474974e-10 | 1.015685e-10 | 6.994128e-11 | 4.816241e-11 | 3.316522e-11 | 2.283797e-11 | 1.572650e-11 | 1.082946e-11 | 7.457298e-12 | 5.135185e-12 | 3.536151e-12 | 2.435036e-12 | 1.676795e-12 | 1.154662e-12 |
35 | 1.178930e-10 | 8.118255e-11 | 5.590329e-11 | 3.849568e-11 | 2.650859e-11 | 1.825414e-11 | 1.257002e-11 | 8.655868e-12 | 5.960535e-12 | 4.104497e-12 | 2.826406e-12 | 1.946297e-12 | 1.340244e-12 | 9.229082e-13 | 6.355258e-13 |
36 | 6.488831e-11 | 4.468288e-11 | 3.076917e-11 | 2.118802e-11 | 1.459033e-11 | 1.004708e-11 | 6.918539e-12 | 4.764190e-12 | 3.280679e-12 | 2.259115e-12 | 1.555654e-12 | 1.071242e-12 | 7.376703e-13 | 5.079687e-13 | 3.497934e-13 |
37 | 3.571453e-11 | 2.459346e-11 | 1.693535e-11 | 1.166189e-11 | 8.030517e-12 | 5.529911e-12 | 3.807964e-12 | 2.622210e-12 | 1.805685e-12 | 1.243417e-12 | 8.562319e-13 | 5.896117e-13 | 4.060137e-13 | 2.795860e-13 | 1.925263e-13 |
38 | 1.965728e-11 | 1.353624e-11 | 9.321217e-12 | 6.418703e-12 | 4.419997e-12 | 3.043663e-12 | 2.095903e-12 | 1.443264e-12 | 9.938493e-13 | 6.843767e-13 | 4.712700e-13 | 3.245223e-13 | 2.234700e-13 | 1.538841e-13 | 1.059665e-13 |
39 | 1.081936e-11 | 7.450346e-12 | 5.130398e-12 | 3.532854e-12 | 2.432766e-12 | 1.675232e-12 | 1.153585e-12 | 7.943727e-13 | 5.470147e-13 | 3.766809e-13 | 2.593870e-13 | 1.786170e-13 | 1.229979e-13 | 8.469782e-14 | 5.832394e-14 |
40 | 5.954978e-12 | 4.100670e-12 | 2.823771e-12 | 1.944483e-12 | 1.338994e-12 | 9.220478e-13 | 6.349333e-13 | 4.372227e-13 | 3.010769e-13 | 2.073252e-13 | 1.427666e-13 | 9.831082e-14 | 6.769802e-14 | 4.661768e-14 | 3.210150e-14 |
41 | 3.277620e-12 | 2.257009e-12 | 1.554204e-12 | 1.070243e-12 | 7.369826e-13 | 5.074951e-13 | 3.494673e-13 | 2.406474e-13 | 1.657127e-13 | 1.141118e-13 | 7.857875e-14 | 5.411028e-14 | 3.726099e-14 | 2.565837e-14 | 1.766866e-14 |
42 | 1.804002e-12 | 1.242258e-12 | 8.554337e-13 | 5.890620e-13 | 4.056352e-13 | 2.793253e-13 | 1.923468e-13 | 1.324523e-13 | 9.120827e-14 | 6.280712e-14 | 4.324974e-14 | 2.978230e-14 | 2.050845e-14 | 1.412237e-14 | 9.724833e-15 |
43 | 9.929228e-13 | 6.837386e-13 | 4.708307e-13 | 3.242197e-13 | 2.232616e-13 | 1.537406e-13 | 1.058677e-13 | 7.290176e-14 | 5.020103e-14 | 3.456904e-14 | 2.380466e-14 | 1.639218e-14 | 1.128785e-14 | 7.772951e-15 | 5.352548e-15 |
44 | 5.465047e-13 | 3.763297e-13 | 2.591452e-13 | 1.784505e-13 | 1.228832e-13 | 8.461885e-14 | 5.826957e-14 | 4.012513e-14 | 2.763065e-14 | 1.902680e-14 | 1.310208e-14 | 9.022254e-15 | 6.212833e-15 | 4.278232e-15 | 2.946042e-15 |
45 | 3.007962e-13 | 2.071319e-13 | 1.426335e-13 | 9.821917e-14 | 6.763491e-14 | 4.657422e-14 | 3.207157e-14 | 2.208487e-14 | 1.520791e-14 | 1.047235e-14 | 7.211387e-15 | 4.965848e-15 | 3.419543e-15 | 2.354739e-15 | 1.621502e-15 |
46 | 1.655582e-13 | 1.140054e-13 | 7.850549e-14 | 5.405983e-14 | 3.722625e-14 | 2.563445e-14 | 1.765219e-14 | 1.215551e-14 | 8.370433e-15 | 5.763982e-15 | 3.969147e-15 | 2.733203e-15 | 1.882117e-15 | 1.296048e-15 | 8.924745e-16 |
47 | 9.112324e-14 | 6.274857e-14 | 4.320942e-14 | 2.975453e-14 | 2.048933e-14 | 1.410920e-14 | 9.715766e-15 | 6.690394e-15 | 4.607086e-15 | 3.172495e-15 | 2.184619e-15 | 1.504355e-15 | 1.035917e-15 | 7.133450e-16 | 4.912180e-16 |
48 | 5.015423e-14 | 3.453681e-14 | 2.378247e-14 | 1.637689e-14 | 1.127733e-14 | 7.765704e-15 | 5.347558e-15 | 3.682393e-15 | 2.535740e-15 | 1.746142e-15 | 1.202414e-15 | 8.279969e-16 | 5.701687e-16 | 3.926251e-16 | 2.703664e-16 |
49 | 2.760489e-14 | 1.900906e-14 | 1.308987e-14 | 9.013842e-15 | 6.207041e-15 | 4.274244e-15 | 2.943296e-15 | 2.026789e-15 | 1.395672e-15 | 9.610763e-16 | 6.618088e-16 | 4.557295e-16 | 3.138209e-16 | 2.161008e-16 | 1.488097e-16 |
50 | 1.519373e-14 | 1.046259e-14 | 7.204664e-15 | 4.961219e-15 | 3.416355e-15 | 2.352544e-15 | 1.619990e-15 | 1.115545e-15 | 7.681776e-16 | 5.289764e-16 | 3.642595e-16 | 2.508335e-16 | 1.727270e-16 | 1.189419e-16 | 8.190483e-17 |
51 | 8.362630e-15 | 5.758608e-15 | 3.965447e-15 | 2.730655e-15 | 1.880362e-15 | 1.294840e-15 | 8.916425e-16 | 6.139958e-16 | 4.228049e-16 | 2.911486e-16 | 2.004885e-16 | 1.380588e-16 | 9.506894e-17 | 6.546562e-17 | 4.508042e-17 |
52 | 4.602791e-15 | 3.169538e-15 | 2.182582e-15 | 1.502952e-15 | 1.034951e-15 | 7.126799e-16 | 4.907600e-16 | 3.379433e-16 | 2.327118e-16 | 1.602482e-16 | 1.103488e-16 | 7.598755e-17 | 5.232595e-17 | 3.603228e-17 | 2.481226e-17 |
53 | 2.533376e-15 | 1.744514e-15 | 1.201293e-15 | 8.272250e-16 | 5.696372e-16 | 3.922590e-16 | 2.701143e-16 | 1.860040e-16 | 1.280846e-16 | 8.820060e-17 | 6.073600e-17 | 4.182355e-17 | 2.880020e-17 | 1.983217e-17 | 1.365667e-17 |
54 | 1.394370e-15 | 9.601803e-16 | 6.611918e-16 | 4.553047e-16 | 3.135283e-16 | 2.158994e-16 | 1.486709e-16 | 1.023766e-16 | 7.049776e-17 | 4.854561e-17 | 3.342910e-17 | 2.301968e-17 | 1.585163e-17 | 1.091562e-17 | 7.516631e-18 |
55 | 7.674614e-16 | 5.284832e-16 | 3.639200e-16 | 2.505997e-16 | 1.725660e-16 | 1.188310e-16 | 8.182848e-17 | 5.634808e-17 | 3.880197e-17 | 2.671951e-17 | 1.839937e-17 | 1.267003e-17 | 8.724737e-18 | 6.007960e-18 | 4.137154e-18 |
56 | 4.224108e-16 | 2.908772e-16 | 2.003015e-16 | 1.379301e-16 | 9.498031e-17 | 6.540459e-17 | 4.503839e-17 | 3.101398e-17 | 2.135660e-17 | 1.470642e-17 | 1.012702e-17 | 6.973586e-18 | 4.802095e-18 | 3.306781e-18 | 2.277089e-18 |
57 | 2.324949e-16 | 1.600988e-16 | 1.102460e-16 | 7.591671e-17 | 5.227716e-17 | 3.599869e-17 | 2.478913e-17 | 1.707010e-17 | 1.175467e-17 | 8.094411e-18 | 5.573909e-18 | 3.838262e-18 | 2.643073e-18 | 1.820052e-18 | 1.253310e-18 |
58 | 1.279652e-16 | 8.811838e-17 | 6.067938e-17 | 4.178456e-17 | 2.877335e-17 | 1.981368e-17 | 1.364394e-17 | 9.395381e-18 | 6.469773e-18 | 4.455164e-18 | 3.067880e-18 | 2.112579e-18 | 1.454748e-18 | 1.001757e-18 | 6.898218e-19 |
59 | 7.043204e-17 | 4.850036e-17 | 3.339793e-17 | 2.299822e-17 | 1.583685e-17 | 1.090545e-17 | 7.509624e-18 | 5.171218e-18 | 3.560963e-18 | 2.452122e-18 | 1.688561e-18 | 1.162764e-18 | 8.006931e-19 | 5.513669e-19 | 3.796779e-19 |
60 | 3.876579e-17 | 2.669460e-17 | 1.838222e-17 | 1.265822e-17 | 8.716604e-18 | 6.002359e-18 | 4.133297e-18 | 2.846238e-18 | 1.959954e-18 | 1.349648e-18 | 9.293840e-19 | 6.399851e-19 | 4.407015e-19 | 3.034724e-19 | 2.089747e-19 |
61 rows × 15 columns
res = 0.01
min_err = sys.float_info.max
best_bq = -1
best_b = -1
grid = {}
def tse_sdcg(bq, b):
err = 0.0
norm = 0.0
for i in range(M*N):
norm += d_sdcg(bq, b, i//N, i%N)
for i in range(M*N):
err += (gt[i//N, i%N]-d_sdcg(bq, b, i//N, i%N)/norm)**2
return err
t = tqdm_notebook(np.arange(1.01, 5+res, res), desc = "TSE: {:.4f}".format(min_err))
for bq_i, bq in enumerate(t):
grid[bq_i] = []
for b in np.arange(1.01, 20+res, res):
err = tse_sdcg(bq, b)
grid[bq_i].append(err)
if err < min_err:
min_err = err
best_bq = bq
best_b = b
t.set_description("TSE: {:.4f}".format(min_err))
grid = pd.DataFrame.from_dict(grid)
sdcg_best_bq = best_bq
sdcg_best_b = best_b
print("bq =", best_bq, ", b =", best_b, ", TSE = {:.4f}".format(min_err))
HBox(children=(IntProgress(value=0, description='TSE: 17976931348623157081452742373170435679807056752584499659…
bq = 1.07 , b = 4.540000000000003 , TSE = 0.0362
dist_sdcg = [[0] * N for i in range(M)]
norm = 0.0
for m in range(M):
for n in range(N):
dist_sdcg[m][n] = d_sdcg(sdcg_best_bq, sdcg_best_b, m, n)
norm += dist_sdcg[m][n]
dist_sdcg = pd.DataFrame(dist_sdcg)
dist_sdcg = dist_sdcg/norm
dist_sdcg.transpose()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.044418 | 0.003950 | 0.002577 | 0.002067 | 0.001792 | 0.001616 | 0.001493 | 0.001400 | 0.001327 | 0.001268 | 0.001219 | 0.001177 | 0.001142 | 0.001110 | 0.001083 |
1 | 0.028025 | 0.002492 | 0.001626 | 0.001304 | 0.001131 | 0.001020 | 0.000942 | 0.000883 | 0.000837 | 0.000800 | 0.000769 | 0.000743 | 0.000720 | 0.000701 | 0.000683 |
2 | 0.022209 | 0.001975 | 0.001288 | 0.001033 | 0.000896 | 0.000808 | 0.000746 | 0.000700 | 0.000663 | 0.000634 | 0.000609 | 0.000589 | 0.000571 | 0.000555 | 0.000541 |
3 | 0.019130 | 0.001701 | 0.001110 | 0.000890 | 0.000772 | 0.000696 | 0.000643 | 0.000603 | 0.000571 | 0.000546 | 0.000525 | 0.000507 | 0.000492 | 0.000478 | 0.000466 |
4 | 0.017183 | 0.001528 | 0.000997 | 0.000800 | 0.000693 | 0.000625 | 0.000577 | 0.000541 | 0.000513 | 0.000490 | 0.000472 | 0.000455 | 0.000442 | 0.000430 | 0.000419 |
5 | 0.015822 | 0.001407 | 0.000918 | 0.000736 | 0.000638 | 0.000576 | 0.000532 | 0.000499 | 0.000473 | 0.000452 | 0.000434 | 0.000419 | 0.000407 | 0.000395 | 0.000386 |
6 | 0.014806 | 0.001317 | 0.000859 | 0.000689 | 0.000597 | 0.000539 | 0.000498 | 0.000467 | 0.000442 | 0.000423 | 0.000406 | 0.000392 | 0.000381 | 0.000370 | 0.000361 |
7 | 0.014012 | 0.001246 | 0.000813 | 0.000652 | 0.000565 | 0.000510 | 0.000471 | 0.000442 | 0.000419 | 0.000400 | 0.000385 | 0.000371 | 0.000360 | 0.000350 | 0.000342 |
8 | 0.013371 | 0.001189 | 0.000776 | 0.000622 | 0.000539 | 0.000487 | 0.000449 | 0.000421 | 0.000399 | 0.000382 | 0.000367 | 0.000354 | 0.000344 | 0.000334 | 0.000326 |
9 | 0.012840 | 0.001142 | 0.000745 | 0.000597 | 0.000518 | 0.000467 | 0.000431 | 0.000405 | 0.000384 | 0.000367 | 0.000352 | 0.000340 | 0.000330 | 0.000321 | 0.000313 |
10 | 0.012390 | 0.001102 | 0.000719 | 0.000577 | 0.000500 | 0.000451 | 0.000416 | 0.000390 | 0.000370 | 0.000354 | 0.000340 | 0.000328 | 0.000318 | 0.000310 | 0.000302 |
11 | 0.012003 | 0.001067 | 0.000696 | 0.000559 | 0.000484 | 0.000437 | 0.000403 | 0.000378 | 0.000359 | 0.000343 | 0.000329 | 0.000318 | 0.000308 | 0.000300 | 0.000293 |
12 | 0.011666 | 0.001037 | 0.000677 | 0.000543 | 0.000471 | 0.000425 | 0.000392 | 0.000368 | 0.000349 | 0.000333 | 0.000320 | 0.000309 | 0.000300 | 0.000292 | 0.000284 |
13 | 0.011369 | 0.001011 | 0.000660 | 0.000529 | 0.000459 | 0.000414 | 0.000382 | 0.000358 | 0.000340 | 0.000325 | 0.000312 | 0.000301 | 0.000292 | 0.000284 | 0.000277 |
14 | 0.011104 | 0.000988 | 0.000644 | 0.000517 | 0.000448 | 0.000404 | 0.000373 | 0.000350 | 0.000332 | 0.000317 | 0.000305 | 0.000294 | 0.000285 | 0.000278 | 0.000271 |
15 | 0.010867 | 0.000966 | 0.000630 | 0.000506 | 0.000438 | 0.000395 | 0.000365 | 0.000342 | 0.000325 | 0.000310 | 0.000298 | 0.000288 | 0.000279 | 0.000272 | 0.000265 |
16 | 0.010652 | 0.000947 | 0.000618 | 0.000496 | 0.000430 | 0.000388 | 0.000358 | 0.000336 | 0.000318 | 0.000304 | 0.000292 | 0.000282 | 0.000274 | 0.000266 | 0.000260 |
17 | 0.010456 | 0.000930 | 0.000607 | 0.000487 | 0.000422 | 0.000380 | 0.000351 | 0.000329 | 0.000312 | 0.000298 | 0.000287 | 0.000277 | 0.000269 | 0.000261 | 0.000255 |
18 | 0.010277 | 0.000914 | 0.000596 | 0.000478 | 0.000415 | 0.000374 | 0.000345 | 0.000324 | 0.000307 | 0.000293 | 0.000282 | 0.000272 | 0.000264 | 0.000257 | 0.000251 |
19 | 0.010113 | 0.000899 | 0.000587 | 0.000471 | 0.000408 | 0.000368 | 0.000340 | 0.000319 | 0.000302 | 0.000289 | 0.000278 | 0.000268 | 0.000260 | 0.000253 | 0.000246 |
20 | 0.009960 | 0.000886 | 0.000578 | 0.000464 | 0.000402 | 0.000362 | 0.000335 | 0.000314 | 0.000298 | 0.000284 | 0.000273 | 0.000264 | 0.000256 | 0.000249 | 0.000243 |
21 | 0.009819 | 0.000873 | 0.000570 | 0.000457 | 0.000396 | 0.000357 | 0.000330 | 0.000309 | 0.000293 | 0.000280 | 0.000269 | 0.000260 | 0.000252 | 0.000245 | 0.000239 |
22 | 0.009688 | 0.000862 | 0.000562 | 0.000451 | 0.000391 | 0.000353 | 0.000326 | 0.000305 | 0.000289 | 0.000277 | 0.000266 | 0.000257 | 0.000249 | 0.000242 | 0.000236 |
23 | 0.009565 | 0.000851 | 0.000555 | 0.000445 | 0.000386 | 0.000348 | 0.000321 | 0.000301 | 0.000286 | 0.000273 | 0.000262 | 0.000254 | 0.000246 | 0.000239 | 0.000233 |
24 | 0.009450 | 0.000840 | 0.000548 | 0.000440 | 0.000381 | 0.000344 | 0.000318 | 0.000298 | 0.000282 | 0.000270 | 0.000259 | 0.000250 | 0.000243 | 0.000236 | 0.000230 |
25 | 0.009342 | 0.000831 | 0.000542 | 0.000435 | 0.000377 | 0.000340 | 0.000314 | 0.000294 | 0.000279 | 0.000267 | 0.000256 | 0.000248 | 0.000240 | 0.000234 | 0.000228 |
26 | 0.009240 | 0.000822 | 0.000536 | 0.000430 | 0.000373 | 0.000336 | 0.000310 | 0.000291 | 0.000276 | 0.000264 | 0.000254 | 0.000245 | 0.000237 | 0.000231 | 0.000225 |
27 | 0.009143 | 0.000813 | 0.000530 | 0.000425 | 0.000369 | 0.000333 | 0.000307 | 0.000288 | 0.000273 | 0.000261 | 0.000251 | 0.000242 | 0.000235 | 0.000229 | 0.000223 |
28 | 0.009052 | 0.000805 | 0.000525 | 0.000421 | 0.000365 | 0.000329 | 0.000304 | 0.000285 | 0.000270 | 0.000258 | 0.000248 | 0.000240 | 0.000233 | 0.000226 | 0.000221 |
29 | 0.008966 | 0.000797 | 0.000520 | 0.000417 | 0.000362 | 0.000326 | 0.000301 | 0.000283 | 0.000268 | 0.000256 | 0.000246 | 0.000238 | 0.000230 | 0.000224 | 0.000219 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
31 | 0.008805 | 0.000783 | 0.000511 | 0.000410 | 0.000355 | 0.000320 | 0.000296 | 0.000277 | 0.000263 | 0.000251 | 0.000242 | 0.000233 | 0.000226 | 0.000220 | 0.000215 |
32 | 0.008731 | 0.000776 | 0.000507 | 0.000406 | 0.000352 | 0.000318 | 0.000293 | 0.000275 | 0.000261 | 0.000249 | 0.000240 | 0.000231 | 0.000224 | 0.000218 | 0.000213 |
33 | 0.008660 | 0.000770 | 0.000502 | 0.000403 | 0.000349 | 0.000315 | 0.000291 | 0.000273 | 0.000259 | 0.000247 | 0.000238 | 0.000230 | 0.000223 | 0.000216 | 0.000211 |
34 | 0.008592 | 0.000764 | 0.000498 | 0.000400 | 0.000347 | 0.000313 | 0.000289 | 0.000271 | 0.000257 | 0.000245 | 0.000236 | 0.000228 | 0.000221 | 0.000215 | 0.000209 |
35 | 0.008526 | 0.000758 | 0.000495 | 0.000397 | 0.000344 | 0.000310 | 0.000286 | 0.000269 | 0.000255 | 0.000243 | 0.000234 | 0.000226 | 0.000219 | 0.000213 | 0.000208 |
36 | 0.008464 | 0.000753 | 0.000491 | 0.000394 | 0.000341 | 0.000308 | 0.000284 | 0.000267 | 0.000253 | 0.000242 | 0.000232 | 0.000224 | 0.000218 | 0.000212 | 0.000206 |
37 | 0.008404 | 0.000747 | 0.000488 | 0.000391 | 0.000339 | 0.000306 | 0.000282 | 0.000265 | 0.000251 | 0.000240 | 0.000231 | 0.000223 | 0.000216 | 0.000210 | 0.000205 |
38 | 0.008346 | 0.000742 | 0.000484 | 0.000388 | 0.000337 | 0.000304 | 0.000280 | 0.000263 | 0.000249 | 0.000238 | 0.000229 | 0.000221 | 0.000214 | 0.000209 | 0.000203 |
39 | 0.008291 | 0.000737 | 0.000481 | 0.000386 | 0.000334 | 0.000302 | 0.000279 | 0.000261 | 0.000248 | 0.000237 | 0.000228 | 0.000220 | 0.000213 | 0.000207 | 0.000202 |
40 | 0.008237 | 0.000733 | 0.000478 | 0.000383 | 0.000332 | 0.000300 | 0.000277 | 0.000260 | 0.000246 | 0.000235 | 0.000226 | 0.000218 | 0.000212 | 0.000206 | 0.000201 |
41 | 0.008186 | 0.000728 | 0.000475 | 0.000381 | 0.000330 | 0.000298 | 0.000275 | 0.000258 | 0.000245 | 0.000234 | 0.000225 | 0.000217 | 0.000210 | 0.000205 | 0.000200 |
42 | 0.008136 | 0.000724 | 0.000472 | 0.000379 | 0.000328 | 0.000296 | 0.000273 | 0.000256 | 0.000243 | 0.000232 | 0.000223 | 0.000216 | 0.000209 | 0.000203 | 0.000198 |
43 | 0.008088 | 0.000719 | 0.000469 | 0.000376 | 0.000326 | 0.000294 | 0.000272 | 0.000255 | 0.000242 | 0.000231 | 0.000222 | 0.000214 | 0.000208 | 0.000202 | 0.000197 |
44 | 0.008042 | 0.000715 | 0.000467 | 0.000374 | 0.000324 | 0.000293 | 0.000270 | 0.000253 | 0.000240 | 0.000230 | 0.000221 | 0.000213 | 0.000207 | 0.000201 | 0.000196 |
45 | 0.007997 | 0.000711 | 0.000464 | 0.000372 | 0.000323 | 0.000291 | 0.000269 | 0.000252 | 0.000239 | 0.000228 | 0.000219 | 0.000212 | 0.000206 | 0.000200 | 0.000195 |
46 | 0.007953 | 0.000707 | 0.000461 | 0.000370 | 0.000321 | 0.000289 | 0.000267 | 0.000251 | 0.000238 | 0.000227 | 0.000218 | 0.000211 | 0.000204 | 0.000199 | 0.000194 |
47 | 0.007911 | 0.000704 | 0.000459 | 0.000368 | 0.000319 | 0.000288 | 0.000266 | 0.000249 | 0.000236 | 0.000226 | 0.000217 | 0.000210 | 0.000203 | 0.000198 | 0.000193 |
48 | 0.007870 | 0.000700 | 0.000457 | 0.000366 | 0.000318 | 0.000286 | 0.000264 | 0.000248 | 0.000235 | 0.000225 | 0.000216 | 0.000209 | 0.000202 | 0.000197 | 0.000192 |
49 | 0.007830 | 0.000696 | 0.000454 | 0.000364 | 0.000316 | 0.000285 | 0.000263 | 0.000247 | 0.000234 | 0.000224 | 0.000215 | 0.000208 | 0.000201 | 0.000196 | 0.000191 |
50 | 0.007792 | 0.000693 | 0.000452 | 0.000363 | 0.000314 | 0.000284 | 0.000262 | 0.000246 | 0.000233 | 0.000222 | 0.000214 | 0.000207 | 0.000200 | 0.000195 | 0.000190 |
51 | 0.007755 | 0.000690 | 0.000450 | 0.000361 | 0.000313 | 0.000282 | 0.000261 | 0.000244 | 0.000232 | 0.000221 | 0.000213 | 0.000206 | 0.000199 | 0.000194 | 0.000189 |
52 | 0.007718 | 0.000686 | 0.000448 | 0.000359 | 0.000311 | 0.000281 | 0.000259 | 0.000243 | 0.000231 | 0.000220 | 0.000212 | 0.000205 | 0.000198 | 0.000193 | 0.000188 |
53 | 0.007683 | 0.000683 | 0.000446 | 0.000358 | 0.000310 | 0.000280 | 0.000258 | 0.000242 | 0.000230 | 0.000219 | 0.000211 | 0.000204 | 0.000197 | 0.000192 | 0.000187 |
54 | 0.007649 | 0.000680 | 0.000444 | 0.000356 | 0.000309 | 0.000278 | 0.000257 | 0.000241 | 0.000228 | 0.000218 | 0.000210 | 0.000203 | 0.000197 | 0.000191 | 0.000186 |
55 | 0.007615 | 0.000677 | 0.000442 | 0.000354 | 0.000307 | 0.000277 | 0.000256 | 0.000240 | 0.000227 | 0.000217 | 0.000209 | 0.000202 | 0.000196 | 0.000190 | 0.000186 |
56 | 0.007582 | 0.000674 | 0.000440 | 0.000353 | 0.000306 | 0.000276 | 0.000255 | 0.000239 | 0.000227 | 0.000216 | 0.000208 | 0.000201 | 0.000195 | 0.000190 | 0.000185 |
57 | 0.007551 | 0.000671 | 0.000438 | 0.000351 | 0.000305 | 0.000275 | 0.000254 | 0.000238 | 0.000226 | 0.000216 | 0.000207 | 0.000200 | 0.000194 | 0.000189 | 0.000184 |
58 | 0.007520 | 0.000669 | 0.000436 | 0.000350 | 0.000303 | 0.000274 | 0.000253 | 0.000237 | 0.000225 | 0.000215 | 0.000206 | 0.000199 | 0.000193 | 0.000188 | 0.000183 |
59 | 0.007489 | 0.000666 | 0.000434 | 0.000349 | 0.000302 | 0.000273 | 0.000252 | 0.000236 | 0.000224 | 0.000214 | 0.000206 | 0.000199 | 0.000192 | 0.000187 | 0.000183 |
60 | 0.007460 | 0.000663 | 0.000433 | 0.000347 | 0.000301 | 0.000271 | 0.000251 | 0.000235 | 0.000223 | 0.000213 | 0.000205 | 0.000198 | 0.000192 | 0.000186 | 0.000182 |
61 rows × 15 columns
min_err = sys.float_info.max
best_p = -1
res = 0.01
def tse_rbp(p):
norm = 0.0
for i in range(N):
norm += p**i
err = 0.0
for i in range(N):
err += (nt[i][0]-(p**i)/norm)**2
return err
t = tqdm_notebook(np.arange(0, 1 + res, res), desc = "TSE: {:.4f}".format(min_err))
for p_i, p in enumerate(t):
err = tse_rbp(p)
if err < min_err:
min_err = err
best_p = p
t.set_description("TSE: {:.4f}".format(min_err))
rbp_best_p = best_p
print("p =", best_p, ", TSE = {:.4f}".format(min_err))
HBox(children=(IntProgress(value=0, description='TSE: 17976931348623157081452742373170435679807056752584499659…
p = 0.59 , TSE = 0.0252
dist_rbp = [0] * N
norm = 0.0
for n in range(N):
dist_rbp[n] = rbp_best_p**n
norm += dist_rbp[n]
dist_rbp = pd.DataFrame(dist_rbp)
dist_rbp = dist_rbp/norm
dist_rbp
0 | |
---|---|
0 | 4.100000e-01 |
1 | 2.419000e-01 |
2 | 1.427210e-01 |
3 | 8.420539e-02 |
4 | 4.968118e-02 |
5 | 2.931190e-02 |
6 | 1.729402e-02 |
7 | 1.020347e-02 |
8 | 6.020048e-03 |
9 | 3.551828e-03 |
10 | 2.095579e-03 |
11 | 1.236391e-03 |
12 | 7.294709e-04 |
13 | 4.303879e-04 |
14 | 2.539288e-04 |
15 | 1.498180e-04 |
16 | 8.839263e-05 |
17 | 5.215165e-05 |
18 | 3.076947e-05 |
19 | 1.815399e-05 |
20 | 1.071085e-05 |
21 | 6.319404e-06 |
22 | 3.728448e-06 |
23 | 2.199784e-06 |
24 | 1.297873e-06 |
25 | 7.657450e-07 |
26 | 4.517895e-07 |
27 | 2.665558e-07 |
28 | 1.572679e-07 |
29 | 9.278808e-08 |
... | ... |
31 | 3.229953e-08 |
32 | 1.905672e-08 |
33 | 1.124347e-08 |
34 | 6.633645e-09 |
35 | 3.913851e-09 |
36 | 2.309172e-09 |
37 | 1.362411e-09 |
38 | 8.038228e-10 |
39 | 4.742554e-10 |
40 | 2.798107e-10 |
41 | 1.650883e-10 |
42 | 9.740211e-11 |
43 | 5.746724e-11 |
44 | 3.390567e-11 |
45 | 2.000435e-11 |
46 | 1.180256e-11 |
47 | 6.963513e-12 |
48 | 4.108473e-12 |
49 | 2.423999e-12 |
50 | 1.430159e-12 |
51 | 8.437940e-13 |
52 | 4.978385e-13 |
53 | 2.937247e-13 |
54 | 1.732976e-13 |
55 | 1.022456e-13 |
56 | 6.032489e-14 |
57 | 3.559168e-14 |
58 | 2.099909e-14 |
59 | 1.238946e-14 |
60 | 7.309784e-15 |
61 rows × 1 columns
min_err = sys.float_info.max
res = 0.01
best_p = -1
best_b = -1
grid = {}
def tse_srbp(b, p):
norm = 0.0
for i in range(N):
norm += d_srbp(b, p, 0, i)
err = 0.0
for i in range(N):
err += (nt[i][0]-d_srbp(b, p, 0, i)/norm)**2
return err
t = tqdm_notebook(np.arange(0, 1+res, res), desc = "TSE: {:.4f}".format(min_err))
for p_i, p in enumerate(t):
grid[p_i] = []
for b in np.arange(0, 1+res, res):
err = tse_srbp(b, p)
grid[p_i].append(err)
if err < min_err:
min_err = err
best_p = p
best_b = b
t.set_description("TSE: {:.4f}".format(min_err))
grid = pd.DataFrame.from_dict(grid)
srbp_best_p_2 = best_p
srbp_best_b_2 = best_b
print("b =", best_b, ", p =", best_p, ", TSE = {:.4f}".format(min_err))
HBox(children=(IntProgress(value=0, description='TSE: 17976931348623157081452742373170435679807056752584499659…
b = 0.92 , p = 0.64 , TSE = 0.0252
dist_srbp_2 = [0] * N
norm = 0.0
for n in range(N):
dist_srbp_2[n] = d_srbp(srbp_best_b_2, srbp_best_p_2, 0, n)
norm += dist_srbp_2[n]
dist_srbp_2 = pd.DataFrame(dist_srbp_2)
dist_srbp_2 = dist_srbp_2/norm
dist_srbp_2
0 | |
---|---|
0 | 4.112000e-01 |
1 | 2.421146e-01 |
2 | 1.425571e-01 |
3 | 8.393759e-02 |
4 | 4.942245e-02 |
5 | 2.909994e-02 |
6 | 1.713405e-02 |
7 | 1.008853e-02 |
8 | 5.940124e-03 |
9 | 3.497545e-03 |
10 | 2.059355e-03 |
11 | 1.212548e-03 |
12 | 7.139482e-04 |
13 | 4.203727e-04 |
14 | 2.475155e-04 |
15 | 1.457371e-04 |
16 | 8.581000e-05 |
17 | 5.052493e-05 |
18 | 2.974908e-05 |
19 | 1.751626e-05 |
20 | 1.031357e-05 |
21 | 6.072632e-06 |
22 | 3.575565e-06 |
23 | 2.105293e-06 |
24 | 1.239596e-06 |
25 | 7.298744e-07 |
26 | 4.297501e-07 |
27 | 2.530368e-07 |
28 | 1.489881e-07 |
29 | 8.772419e-08 |
... | ... |
31 | 3.041270e-08 |
32 | 1.790700e-08 |
33 | 1.054364e-08 |
34 | 6.208095e-09 |
35 | 3.655326e-09 |
36 | 2.152256e-09 |
37 | 1.267248e-09 |
38 | 7.461559e-10 |
39 | 4.393366e-10 |
40 | 2.586814e-10 |
41 | 1.523116e-10 |
42 | 8.968107e-11 |
43 | 5.280421e-11 |
44 | 3.109112e-11 |
45 | 1.830645e-11 |
46 | 1.077884e-11 |
47 | 6.346580e-12 |
48 | 3.736866e-12 |
49 | 2.200267e-12 |
50 | 1.295517e-12 |
51 | 7.628005e-13 |
52 | 4.491369e-13 |
53 | 2.644518e-13 |
54 | 1.557092e-13 |
55 | 9.168160e-14 |
56 | 5.398213e-14 |
57 | 3.178468e-14 |
58 | 1.871482e-14 |
59 | 1.101928e-14 |
60 | 6.488155e-15 |
61 rows × 1 columns
min_err = sys.float_info.max
best_b = -1
def tse_dcg(b):
norm = 0.0
for i in range(N):
norm += 1.0/math.log(i+2, b)
err = 0.0
for i in range(N):
err += (nt[i][0]-1.0/(math.log(i+2, b)*norm))**2
return err
t = tqdm_notebook(np.arange(1.01, 20.01, 1.0/res), desc = "TSE: {:.4f}".format(min_err))
for b_i, b in enumerate(t):
err = tse_dcg(b)
if err < min_err:
min_err = err
best_b = b
t.set_description("TSE: {:.4f}".format(min_err))
dcg_best_b = best_b
print("b =", best_b, ", TSE = {:.4f}".format(min_err))
HBox(children=(IntProgress(value=0, description='TSE: 17976931348623157081452742373170435679807056752584499659…
b = 1.01 , TSE = 0.1521
dist_dcg = [0] * N
norm = 0.0
for n in range(N):
dist_dcg[n] = 1.0/math.log(n+2, dcg_best_b)
norm += dist_dcg[n]
dist_dcg = pd.DataFrame(dist_dcg)
dist_dcg = dist_dcg/norm
dist_dcg
0 | |
---|---|
0 | 0.067638 |
1 | 0.042675 |
2 | 0.033819 |
3 | 0.029130 |
4 | 0.026166 |
5 | 0.024093 |
6 | 0.022546 |
7 | 0.021337 |
8 | 0.020361 |
9 | 0.019552 |
10 | 0.018867 |
11 | 0.018278 |
12 | 0.017765 |
13 | 0.017312 |
14 | 0.016909 |
15 | 0.016548 |
16 | 0.016220 |
17 | 0.015923 |
18 | 0.015650 |
19 | 0.015399 |
20 | 0.015167 |
21 | 0.014952 |
22 | 0.014752 |
23 | 0.014565 |
24 | 0.014390 |
25 | 0.014225 |
26 | 0.014070 |
27 | 0.013923 |
28 | 0.013784 |
29 | 0.013653 |
... | ... |
31 | 0.013408 |
32 | 0.013295 |
33 | 0.013187 |
34 | 0.013083 |
35 | 0.012984 |
36 | 0.012888 |
37 | 0.012797 |
38 | 0.012709 |
39 | 0.012625 |
40 | 0.012543 |
41 | 0.012465 |
42 | 0.012389 |
43 | 0.012316 |
44 | 0.012245 |
45 | 0.012177 |
46 | 0.012111 |
47 | 0.012047 |
48 | 0.011984 |
49 | 0.011924 |
50 | 0.011865 |
51 | 0.011808 |
52 | 0.011753 |
53 | 0.011699 |
54 | 0.011647 |
55 | 0.011596 |
56 | 0.011546 |
57 | 0.011498 |
58 | 0.011451 |
59 | 0.011405 |
60 | 0.011360 |
61 rows × 1 columns
res = 100
min_err = sys.float_info.max
best_bq = -1
best_b = -1
grid = {}
def tse_sdcg(bq, b):
norm = 0.0
for i in range(N):
norm += d_sdcg(bq, b, 0, i%N)
err = 0.0
for i in range(N):
err += (nt[i][0]-d_sdcg(bq, b, 0, i%N)/norm)**2
return err
t = tqdm_notebook(np.arange(1.01, 5.01, 1.0/res), desc = "TSE: {:.4f}".format(min_err)) # 5.01
for bq_i, bq in enumerate(t):
grid[bq_i] = []
for b in np.arange(1.01, 20.01, 1.0/res): # 20.01
err = tse_sdcg(bq, b)
grid[bq_i].append(err)
if err < min_err:
min_err = err
best_bq = bq
best_b = b
t.set_description("TSE: {:.4f}".format(min_err))
grid = pd.DataFrame.from_dict(grid)
sdcg_best_bq_2 = best_bq
sdcg_best_b_2 = best_b
print("bq =", best_bq, ", b = ", best_b, ", TSE = {:.4f}".format(min_err))
HBox(children=(IntProgress(value=0, description='TSE: 17976931348623157081452742373170435679807056752584499659…
bq = 1.01 , b = 1.2600000000000002 , TSE = 0.1521
dist_sdcg_2 = [0] * N
norm = 0.0
for n in range(N):
dist_sdcg_2[n] = d_sdcg(sdcg_best_bq_2, sdcg_best_b_2, 0, n)
norm += dist_sdcg_2[n]
dist_sdcg_2 = pd.DataFrame(dist_sdcg_2)
dist_sdcg_2 = dist_sdcg_2/norm
dist_sdcg_2
0 | |
---|---|
0 | 0.067638 |
1 | 0.042675 |
2 | 0.033819 |
3 | 0.029130 |
4 | 0.026166 |
5 | 0.024093 |
6 | 0.022546 |
7 | 0.021337 |
8 | 0.020361 |
9 | 0.019552 |
10 | 0.018867 |
11 | 0.018278 |
12 | 0.017765 |
13 | 0.017312 |
14 | 0.016909 |
15 | 0.016548 |
16 | 0.016220 |
17 | 0.015923 |
18 | 0.015650 |
19 | 0.015399 |
20 | 0.015167 |
21 | 0.014952 |
22 | 0.014752 |
23 | 0.014565 |
24 | 0.014390 |
25 | 0.014225 |
26 | 0.014070 |
27 | 0.013923 |
28 | 0.013784 |
29 | 0.013653 |
... | ... |
31 | 0.013408 |
32 | 0.013295 |
33 | 0.013187 |
34 | 0.013083 |
35 | 0.012984 |
36 | 0.012888 |
37 | 0.012797 |
38 | 0.012709 |
39 | 0.012625 |
40 | 0.012543 |
41 | 0.012465 |
42 | 0.012389 |
43 | 0.012316 |
44 | 0.012245 |
45 | 0.012177 |
46 | 0.012111 |
47 | 0.012047 |
48 | 0.011984 |
49 | 0.011924 |
50 | 0.011865 |
51 | 0.011808 |
52 | 0.011753 |
53 | 0.011699 |
54 | 0.011647 |
55 | 0.011596 |
56 | 0.011546 |
57 | 0.011498 |
58 | 0.011451 |
59 | 0.011405 |
60 | 0.011360 |
61 rows × 1 columns
print("Sessions")
e = 0.0
for m in range(M):
for n in range(N):
e += abs(gt[m, n] - dist_srbp.iat[m, n])
print("\tsRBP \t{:.4f}".format(e))
e = 0.0
for m in range(M):
for n in range(N):
e += abs(gt[m, n] - dist_sdcg.iat[m, n])
print("\tsDCG \t{:.4f}".format(e))
e = 0.0
for n in range(N):
e += abs(nt[n][0] - dist_rbp.values[n][0])
print("Independent Queries")
print("\tRBP \t{:.4f}".format(e))
e = 0.0
for n in range(N):
e += abs(nt[n][0] - dist_srbp_2.values[n][0])
print("\tsRBP \t{:.4f}".format(e))
e = 0.0
for n in range(N):
e += abs(nt[n][0] - dist_dcg.values[n][0])
print("\tDCG \t{:.4f}".format(e))
e = 0.0
for n in range(N):
e += abs(nt[n][0] - dist_sdcg_2.values[n][0])
print("\tsDCG \t{:.4f}".format(e))
Sessions sRBP 0.4950 sDCG 1.3357 Independent Queries RBP 0.4242 sRBP 0.4238 DCG 1.2162 sDCG 1.2162
print("Sessions")
e = 0.0
for m in range(M):
for n in range(N):
if gt[m, n] > 0:
e += gt[m, n]*math.log(gt[m, n]/dist_srbp.iat[m, n], 2)
print("\tsRBP \t{:.4f}".format(e))
e = 0.0
for m in range(M):
for n in range(N):
if gt[m, n] > 0:
e += gt[m, n]*math.log(gt[m, n]/dist_sdcg.iat[m, n], 2)
print("\tsDCG \t{:.4f}".format(e))
e = 0.0
for n in range(N):
if nt[n][0] > 0:
e += nt[n][0]*math.log(nt[n][0]/dist_rbp.values[n][0], 2)
print("Independent Queries")
print("\tRBP \t{:.4f}".format(e))
e = 0.0
for n in range(N):
if nt[n, 0] > 0:
e += nt[n, 0]*math.log(nt[n, 0]/dist_srbp_2.values[n][0], 2)
print("\tsRBP \t{:.4f}".format(e))
e = 0.0
for n in range(N):
if nt[n, 0] > 0:
e += nt[n, 0]*math.log(nt[n, 0]/dist_dcg.values[n][0], 2)
print("\tDCG \t{:.4f}".format(e))
e = 0.0
for n in range(N):
if nt[n, 0] > 0:
e += nt[n, 0]*math.log(nt[n, 0]/dist_sdcg_2.values[n][0], 2)
print("\tsDCG \t{:.4f}".format(e))
Sessions sRBP 0.9475 sDCG 2.2710 Independent Queries RBP 0.6624 sRBP 0.6679 DCG 1.5035 sDCG 1.5035
We now compare how the evaluation measures behave when used as evaluation measures on actual sessions and search results. But before doing this we need to select only those sessions that have been judged (topics are contained on the qRels).
judged_sessions = []
for session in range(1, 101):
judged_sessions.append(sessions[session])
To use standard evaluation measures on sessions we can either (i) evaluate only the last reformulation, or (ii) aggregate evaluations of the query and all reformulations together. Of course, session-based evaluation measures do not have this issue.
def standard_i_measure(d, sessions):
res=0.0
for session in sessions:
r=0.0
n=0
for event in session:
if event.startswith('q') or event == 'r':
n=0
r=0.0
elif event.endswith('-1'):
r+=d(n)
n+=1
elif event != 'f':
n+=1
res+=r
res/=len(sessions)
return res
def standard_ii_measure(d, sessions):
res=0.0
m=-1 # reformulation
for session in sessions:
n=0 # rank
for event in session:
if event.startswith('q') or event == 'r':
m+=1
n=0
elif event.endswith('-1'):
res+=d(n)
n+=1
elif event != 'f':
n+=1
res/=(m+1)
return res
def session_based_measure(d, sessions):
res=0.0
for session in sessions:
n=0 # rank
m=0 # reformulation
for event in session:
if event.startswith('q'):
m=0
n=0
elif event == 'r':
n=0
m+=1
elif event.endswith('-1'):
res += d(m, n)
n+=1
elif event != 'f':
n+=1
res/=len(sessions)
return res
def rbp_i(p, sessions):
return (1-p)*standard_i_measure(lambda n : p**n, sessions)
def dcg_i(b, sessions):
return standard_i_measure(lambda n : 1.0/math.log(n+2, b), sessions)
def rbp_ii(p, sessions):
return (1-p)*standard_ii_measure(lambda n : p**n, sessions)
def dcg_ii(b, sessions):
return standard_ii_measure(lambda n : 1.0/math.log(n+2, b), sessions)
def srbp(b, p, sessions):
return (1-p)*session_based_measure(lambda m, n : d_srbp(b, p, m, n), sessions)
def sdcg(bq, b, sessions):
return session_based_measure(lambda m, n : d_sdcg(bq, b, m, n), sessions)
srbp_vs = []
rbp_i_vs = []
rbp_ii_vs = []
sdcg_vs = []
dcg_i_vs = []
dcg_ii_vs = []
for i in range(len(judged_sessions)):
srbp_vs.append(
srbp(srbp_best_b, srbp_best_p, [judged_sessions[i]]))
rbp_i_vs.append(
rbp_i(rbp_best_p, [judged_sessions[i]]))
rbp_ii_vs.append(
rbp_ii(rbp_best_p, [judged_sessions[i]]))
sdcg_vs.append(
sdcg(sdcg_best_bq, sdcg_best_b, [judged_sessions[i]]))
dcg_i_vs.append(
dcg_i(dcg_best_b, [judged_sessions[i]]))
dcg_ii_vs.append(
dcg_ii(dcg_best_b, [judged_sessions[i]]))
ls = [("sRBP (b {:.2f}, p {:.2f})".format(srbp_best_b, srbp_best_p), srbp_vs),
("RBP i (p {:.2f})".format(rbp_best_p), rbp_i_vs),
("RBP ii (p {:.2f})".format(rbp_best_p), rbp_ii_vs),
("sDCG (bq {:.2f}, b {:.2f})".format(sdcg_best_bq, sdcg_best_b), sdcg_vs),
("DCG i (b {:.2f})".format(dcg_best_b), dcg_i_vs),
("DCG ii (b {:.2f})".format(dcg_best_b), dcg_ii_vs)]
for name1, vs1 in ls:
for name2, vs2 in ls:
if name1 > name2:
print("{:<25} - {:<25} = {:.3f}".format(name1, name2, stats.kendalltau(vs1, vs2).correlation))
sRBP (b 0.64, p 0.86) - RBP i (p 0.59) = 0.555 sRBP (b 0.64, p 0.86) - RBP ii (p 0.59) = 0.801 sRBP (b 0.64, p 0.86) - sDCG (bq 1.07, b 4.54) = 0.772 sRBP (b 0.64, p 0.86) - DCG i (b 1.01) = 0.532 sRBP (b 0.64, p 0.86) - DCG ii (b 1.01) = 0.745 RBP i (p 0.59) - DCG i (b 1.01) = 0.906 RBP i (p 0.59) - DCG ii (b 1.01) = 0.666 RBP ii (p 0.59) - RBP i (p 0.59) = 0.675 RBP ii (p 0.59) - DCG i (b 1.01) = 0.659 RBP ii (p 0.59) - DCG ii (b 1.01) = 0.869 sDCG (bq 1.07, b 4.54) - RBP i (p 0.59) = 0.532 sDCG (bq 1.07, b 4.54) - RBP ii (p 0.59) = 0.747 sDCG (bq 1.07, b 4.54) - DCG i (b 1.01) = 0.560 sDCG (bq 1.07, b 4.54) - DCG ii (b 1.01) = 0.779 DCG ii (b 1.01) - DCG i (b 1.01) = 0.702
def plot_scatter(name_x, x, name_y, y):
trace = go.Scatter(
x = x,
y = y,
mode = 'markers')
layout = go.Layout(
xaxis=go.layout.XAxis(title=go.layout.xaxis.Title(text=name_x)),
yaxis=go.layout.YAxis(title=go.layout.yaxis.Title(text=name_y)))
data = [trace]
fig = go.Figure(data=data, layout=layout)
iplot(fig)
for name1, vs1 in ls:
for name2, vs2 in ls:
if name1 > name2:
plot_scatter(name1, vs1, name2, vs2)
The search results provide only ranked list of documents for the last reformulation in each session. To create sessions out of these runs we pre-append to each run its session.
def join_sessions_to_runs(sessions, runs):
res = {}
for run in runs:
res[run] = {}
for session in sessions:
if session in runs[run]:
session_list = sessions[session].copy()
session_list[-1] = 'r'
res[run][session] = session_list + runs[run][session]
return res
complete_runs = join_sessions_to_runs(sessions, runs)
srbp_vs = []
rbp_i_vs = []
rbp_ii_vs = []
sdcg_vs = []
dcg_i_vs = []
dcg_ii_vs = []
for runs in complete_runs:
runs = list(complete_runs[runs].values())
srbp_vs.append(
srbp(srbp_best_b, srbp_best_p, runs))
rbp_i_vs.append(
rbp_i(rbp_best_p, runs))
rbp_ii_vs.append(
rbp_ii(rbp_best_p, runs))
sdcg_vs.append(
sdcg(sdcg_best_bq, sdcg_best_b, runs))
dcg_i_vs.append(
dcg_i(dcg_best_b, runs))
dcg_ii_vs.append(
dcg_ii(dcg_best_b, runs))
ls = [("sRBP (b {:.2f}, p {:.2f})".format(srbp_best_b, srbp_best_p), srbp_vs),
("RBP i (p {:.2f})".format(rbp_best_p), rbp_i_vs),
("RBP ii (p {:.2f})".format(rbp_best_p), rbp_ii_vs),
("sDCG (bq {:.2f}, b {:.2f})".format(sdcg_best_bq, sdcg_best_b), sdcg_vs),
("DCG i (b {:.2f})".format(dcg_best_b), dcg_i_vs),
("DCG ii (b {:.2f})".format(dcg_best_b), dcg_ii_vs)]
for name1, vs1 in ls:
for name2, vs2 in ls:
if name1 > name2:
print("{:<25} - {:<25} = {:.3f}".format(name1, name2, stats.kendalltau(vs1, vs2).correlation))
sRBP (b 0.64, p 0.86) - RBP i (p 0.59) = 0.843 sRBP (b 0.64, p 0.86) - RBP ii (p 0.59) = 0.843 sRBP (b 0.64, p 0.86) - sDCG (bq 1.07, b 4.54) = 0.290 sRBP (b 0.64, p 0.86) - DCG i (b 1.01) = 0.315 sRBP (b 0.64, p 0.86) - DCG ii (b 1.01) = 0.315 RBP i (p 0.59) - DCG i (b 1.01) = 0.293 RBP i (p 0.59) - DCG ii (b 1.01) = 0.293 RBP ii (p 0.59) - RBP i (p 0.59) = 1.000 RBP ii (p 0.59) - DCG i (b 1.01) = 0.293 RBP ii (p 0.59) - DCG ii (b 1.01) = 0.293 sDCG (bq 1.07, b 4.54) - RBP i (p 0.59) = 0.270 sDCG (bq 1.07, b 4.54) - RBP ii (p 0.59) = 0.270 sDCG (bq 1.07, b 4.54) - DCG i (b 1.01) = 0.950 sDCG (bq 1.07, b 4.54) - DCG ii (b 1.01) = 0.950 DCG ii (b 1.01) - DCG i (b 1.01) = 1.000
for name1, vs1 in ls:
for name2, vs2 in ls:
if name1 > name2:
plot_scatter(name1, vs1, name2, vs2)
[1] Aldo Lipani, Ben Carterette, Emine Yilmaz. From a User Model for Query Sessions to Session Rank Biased Precision (sRBP). In Proc. of ICTIR '19.