This compares the RV precisions by eniric in 2017 which fixed a masking bug in condition #2 and changed the normalization impelentation. it compares RV precision calculated in 2017 to the published results in Figueria et al 2016.
from os.path import join
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from eniric import config
path = config.paths["precision_results"]
old_precision_file = join(path,
"precision_figueira_2016.dat")
df_pub = pd.read_csv(old_precision_file, sep="\t")
df_pub.columns
# df_old_corrected = ...
df_pub.head()
Simulation | RV_Cond_1[m/s] | RV_Cond_2[m/s] | RV_Cond_3[m/s] | |
---|---|---|---|---|
0 | M0-Z-1.0-60k | 8.9 | 26.1 | 9.3 |
1 | M0-Z-1.0-80k | 6.0 | 17.1 | 6.2 |
2 | M0-Z-1.0-100k | 4.5 | 12.8 | 4.6 |
3 | M0-Z-5.0-60k | 13.6 | 38.9 | 14.0 |
4 | M0-Z-5.0-80k | 10.6 | 30.5 | 10.9 |
new_snrnorm_file = join(path, "precision_results_2017.dat")
df_new = pd.read_csv(new_snrnorm_file, sep="\t")
df_new = df_new.rename(columns={"# id": "Simulation"})
# Round to 1dp
df_new.prec_1 = np.round(df_new.prec_1, 1)
df_new.prec_2 = np.round(df_new.prec_2, 1)
df_new.prec_3 = np.round(df_new.prec_3, 1)
df_new.head()
Simulation | prec_1 | prec_2 | prec_3 | |
---|---|---|---|---|
0 | M0-Z-1.0-60k | 9.0 | 14.8 | 9.3 |
1 | M0-Z-1.0-80k | 6.0 | 9.9 | 6.2 |
2 | M0-Z-1.0-100k | 4.5 | 7.5 | 4.7 |
3 | M0-Z-5.0-60k | 13.7 | 22.4 | 14.2 |
4 | M0-Z-5.0-80k | 10.7 | 17.4 | 11.0 |
# Check all rows are the same between tables.
assert np.all(df_pub.Simulation == df_new.Simulation)
# Header names Simulation RV_Cond_1[m/s] RV_Cond_2[m/s] RV_Cond_3[m/s]
print(df_pub.columns)
print(df_new.columns)
Index(['Simulation', 'RV_Cond_1[m/s]', 'RV_Cond_2[m/s]', 'RV_Cond_3[m/s]'], dtype='object') Index(['Simulation', 'prec_1', 'prec_2', 'prec_3'], dtype='object')
# Compare results of 2017 precsions with fixing clumping and changing the normalization.
# Comapre between df_new and df_pub
# Calcualte percentage difference inc precsion of cond 1, 2, 3
# With 1 and 3 testing the normalization.
# Count number of percision in cond 2 that decreased
cond_1_up = (df_new["prec_1"] > df_pub["RV_Cond_1[m/s]"]).sum()
cond_1_down = (df_new["prec_1"] < df_pub["RV_Cond_1[m/s]"]).sum()
cond_2_up = (df_new["prec_2"] > df_pub["RV_Cond_2[m/s]"]).sum()
cond_2_down = (df_new["prec_2"] < df_pub["RV_Cond_2[m/s]"]).sum()
cond_3_up = (df_new["prec_3"] > df_pub["RV_Cond_3[m/s]"]).sum()
cond_3_down = (df_new["prec_3"] < df_pub["RV_Cond_3[m/s]"]).sum()
print(
"Number of Simulations that improve precision from all fixes = {}".format(
cond_1_down
)
)
print(
"Number of Simulations that worsen precision from all fixes = {}".format(cond_1_up)
)
print(
"Number of Simulations that improve precision from all fixes = {}".format(
cond_2_down
)
)
print(
"Number of Simulations that worsen precision from all fixes = {}".format(cond_2_up)
)
print(
"Number of Simulations that improve precision from all fixes = {}".format(
cond_3_down
)
)
print(
"Number of Simulations that worsen precision from all fixes = {}".format(cond_3_up)
)
# Percentage of change
# 100 * (new - old) / old
all_cond1_percent_diff = (
100 * (df_new["prec_1"] - df_pub["RV_Cond_1[m/s]"]) / df_pub["RV_Cond_1[m/s]"]
)
all_cond2_percent_diff = (
100 * (df_new["prec_2"] - df_pub["RV_Cond_2[m/s]"]) / df_pub["RV_Cond_2[m/s]"]
)
all_cond3_percent_diff = (
100 * (df_new["prec_3"] - df_pub["RV_Cond_3[m/s]"]) / df_pub["RV_Cond_3[m/s]"]
)
all_cond1_percent_diff.hist(bins=20, label="cond1")
plt.xlabel("Percentage of RV Change.")
plt.legend()
plt.title("RV prevision change due to all fixes.")
plt.show()
all_cond2_percent_diff.hist(bins=20, label="cond2")
plt.xlabel("Percentage of RV Change.")
plt.legend()
plt.title("RV prevision change due to all fixes.")
plt.show()
all_cond3_percent_diff.hist(bins=20, label="cond3")
plt.xlabel("Percentage of RV Change.")
plt.legend()
plt.title("RV prevision change due to all fixes.")
plt.show()
print("The majority of the changes comes from the bug in condition_2.")
Number of Simulations that improve precision from all fixes = 0 Number of Simulations that worsen precision from all fixes = 110 Number of Simulations that improve precision from all fixes = 69 Number of Simulations that worsen precision from all fixes = 111 Number of Simulations that improve precision from all fixes = 0 Number of Simulations that worsen precision from all fixes = 104
# Find the large discrepencies in precision >50 %erc
# Find the extreme changing precisions
percentage_lim = 200
mask = all_cond2_percent_diff > percentage_lim
dict_for_df = {
"Simulation": df_new.Simulation[mask],
"pub_cond_2": df_pub["RV_Cond_2[m/s]"][mask],
"new_cond_2": df_new.prec_2[mask],
"pub_cond_1": df_pub["RV_Cond_1[m/s]"][mask],
"new_cond_1": df_new.prec_1[mask],
"pub_cond_3": df_pub["RV_Cond_3[m/s]"][mask],
"new_cond_3": df_new.prec_3[mask],
}
# print(df_new.Simulation[all_cond1_percent_diff > 2])
df_large = pd.DataFrame(
dict_for_df
) # print(df_new.Simulation[all_cond3_percent_diff > 2])
cols = df_large.columns.tolist()
new_cols = [
"Simulation",
"pub_cond_1",
"new_cond_1",
"pub_cond_2",
"new_cond_2",
"pub_cond_3",
"new_cond_3",
]
df_large = df_large[new_cols] # Reorder columns
print("Simulations that have a large change in precision for condition 2.")
df_large.head()
# Most large changes in precision from the K band.
cols = df_large.columns.tolist()
new_cols = [
"Simulation",
"pub_cond_1",
"new_cond_1",
"pub_cond_2",
"new_cond_2",
"pub_cond_3",
"new_cond_3",
]
Including published precison 2 values.
res_colour = {"60k": "blue", "80k": "green", "100k": "red"}
cond_marker = {1: ".", 2: "o", 3: "^"}
band_loc = {"Z": 1, "Y": 2, "J": 3, "H": 4, "K": 5}
conditions = {1: ["prec_1"], 2: ["prec_2"], 3: ["prec_3"]}
pub_conds = {1: ["RV_Cond_1[m/s]"], 2: ["RV_Cond_2[m/s]"], 3: ["RV_Cond_3[m/s]"]}
vel = 1.0
print("Dashed lines indicate the published cond_2 values.")
# for b in "ZYJHK":
for star in ["M0", "M3", "M6", "M9"]:
# plt.figure()
for res in ["60k", "80k", "100k"]:
for cond in range(1, 4):
x_vals = [band_loc[b] for b in "ZYJHK"]
ids = ["{0:s}-{1:s}-{2:.1f}-{3}".format(star, b, vel, res) for b in "ZYJHK"]
df_masks = [df_new.Simulation == id for id in ids]
# .iat[0,0] gives the first(only element) in dataframe created by mask of index
pub_rv_vals = [df_pub[pub_conds[cond]][mask].iat[0, 0] for mask in df_masks]
plt.plot(
x_vals,
pub_rv_vals,
ls="--",
marker=cond_marker[cond],
color=res_colour[res],
)
rv_vals = [df_new[conditions[cond]][mask].iat[0, 0] for mask in df_masks]
plt.plot(x_vals, rv_vals, marker=cond_marker[cond], color=res_colour[res])
plt.xticks(x_vals, [b for b in "ZYJHK"])
plt.ylabel("Precision [m/s]")
plt.title("{0} with R_vel = {1} m/s".format(star, vel))
plt.show()