This analysis builds off BuzzFeed News' prior work analzying figure skating scores, which you can find here. The code below calculates the percentage of times judges from any given country have scored each ice dance team above or below the average of the other judges for that performance at 17 high-level competitions between Oct. 2016 and Dec. 2017.
import pandas as pd
Note: This section is a reproduction of the setup steps in the home-country-preference
notebook. Please see that notebook for more details about the process.
all_judges = pd.read_csv("../data/processed/judges.csv")
judge_nat = pd.read_csv("../data/processed/judge-country.csv")
judges = pd.merge(
all_judges,
judge_nat,
on="clean_judge_name"
)
def clean_judge_number(role):
return "J" + role.strip()[-1]
judges["clean_role"] = judges["role"].apply(clean_judge_number)
performances = pd.read_csv("../data/raw/performances.csv")
print("{:,} performances".format(len(performances)))
aspects = pd.read_csv("../data/raw/judged-aspects.csv")
print("{:,} aspects".format(len(aspects)))
scores = pd.read_csv("../data/raw/judge-scores.csv")
print("{:,} scores".format(len(scores)))
1,726 performances 23,932 aspects 214,531 scores
judge_goe = pd.read_csv("../data/processed/judge-goe.csv")
scores_with_context = scores.pipe(
pd.merge,
aspects,
on = "aspect_id",
how = "left"
).pipe(
pd.merge,
performances,
on = "performance_id",
how = "left"
).pipe(
pd.merge,
judge_goe,
on = [ "aspect_id", "judge" ],
how = "left"
).assign(
is_junior = lambda x: x["program"].str.contains("JUNIOR"),
program_type = lambda x: x["program"]\
.apply(lambda x: "short" if "SHORT" in x else "free")
)
assert len(scores) == len(scores_with_context)
senior_scores = scores_with_context[
(scores_with_context["is_junior"] == False) &
# Because we are only analyzing Ice Dance in this notebook
# we limit the scope of the scoring data to those programs
(scores_with_context["program"].str.contains("ICE DANCE"))
].copy()
senior_scores["performance_id"].nunique()
392
def total_points(row):
if row["section"] == "elements":
return round(row["base_value"] + row["judge_goe"], 2)
elif row["section"] == "components":
return round(row["factor"] * row["score"], 2)
else:
print("Unknown section: {}".format(row["section"]))
return None
senior_scores["total_points"] = senior_scores.apply(total_points, axis=1)
perf_judge_grps = senior_scores[
~senior_scores["total_points"].isnull()
].groupby(["performance_id", "judge"])
points_by_judge = pd.DataFrame({
"points": perf_judge_grps["total_points"].sum(),
"deductions": perf_judge_grps["total_deductions"].first(),
"name": perf_judge_grps["name"].first(),
"nation": perf_judge_grps["nation"].first(),
"program": perf_judge_grps["program"].first(),
"program_type": perf_judge_grps["program_type"].first(),
"competition": perf_judge_grps["competition"].first()
}).reset_index()
points_by_judge["final_score"] = points_by_judge["points"] - points_by_judge["deductions"]
points_by_judge.head()
performance_id | judge | competition | deductions | name | nation | points | program | program_type | final_score | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 00693b66b5 | J1 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 76.3 | ICE DANCE SHORT DANCE | short | 76.3 |
1 | 00693b66b5 | J2 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 76.6 | ICE DANCE SHORT DANCE | short | 76.6 |
2 | 00693b66b5 | J3 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 76.0 | ICE DANCE SHORT DANCE | short | 76.0 |
3 | 00693b66b5 | J4 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 77.9 | ICE DANCE SHORT DANCE | short | 77.9 |
4 | 00693b66b5 | J5 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 74.5 | ICE DANCE SHORT DANCE | short | 74.5 |
perf_grps = points_by_judge.groupby(["performance_id"])
perfs = pd.DataFrame({
"total_points": perf_grps["final_score"].sum(),
"total_judges": perf_grps.size()
}).reset_index()
points_with_comparison = pd.merge(
points_by_judge,
perfs,
how = "left",
on = "performance_id"
)
points_with_comparison.head()
performance_id | judge | competition | deductions | name | nation | points | program | program_type | final_score | total_judges | total_points | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 00693b66b5 | J1 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 76.3 | ICE DANCE SHORT DANCE | short | 76.3 | 9 | 689.2 |
1 | 00693b66b5 | J2 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 76.6 | ICE DANCE SHORT DANCE | short | 76.6 | 9 | 689.2 |
2 | 00693b66b5 | J3 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 76.0 | ICE DANCE SHORT DANCE | short | 76.0 | 9 | 689.2 |
3 | 00693b66b5 | J4 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 77.9 | ICE DANCE SHORT DANCE | short | 77.9 | 9 | 689.2 |
4 | 00693b66b5 | J5 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 74.5 | ICE DANCE SHORT DANCE | short | 74.5 | 9 | 689.2 |
points_with_comparison["avg_without_judge"] = points_with_comparison\
.apply(lambda x: (x["total_points"] - x["final_score"]) / (x["total_judges"] - 1), axis=1)
points_with_comparison["points_vs_avg"] = points_with_comparison["final_score"] - \
points_with_comparison["avg_without_judge"]
judge_points = pd.merge(
points_with_comparison,
judges[[
"program", "competition", "segment_category",
"clean_judge_name", "judge_country", "clean_role"
]],
left_on=[ "program", "competition", "judge" ],
right_on=[ "program", "competition", "clean_role" ],
how="left"
).dropna(subset=["judge_country"])
judge_points.head()
performance_id | judge | competition | deductions | name | nation | points | program | program_type | final_score | total_judges | total_points | avg_without_judge | points_vs_avg | segment_category | clean_judge_name | judge_country | clean_role | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 00693b66b5 | J1 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 76.3 | ICE DANCE SHORT DANCE | short | 76.3 | 9 | 689.2 | 76.6125 | -0.3125 | Short Dance|Ice Dance | Tianyi ZHANG | CHN | J1 |
1 | 00693b66b5 | J2 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 76.6 | ICE DANCE SHORT DANCE | short | 76.6 | 9 | 689.2 | 76.5750 | 0.0250 | Short Dance|Ice Dance | Marta OLOZAGARRE | ESP | J2 |
2 | 00693b66b5 | J3 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 76.0 | ICE DANCE SHORT DANCE | short | 76.0 | 9 | 689.2 | 76.6500 | -0.6500 | Short Dance|Ice Dance | Mayumi KATO | JPN | J3 |
3 | 00693b66b5 | J4 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 77.9 | ICE DANCE SHORT DANCE | short | 77.9 | 9 | 689.2 | 76.4125 | 1.4875 | Short Dance|Ice Dance | Mark STORTON | AUS | J4 |
4 | 00693b66b5 | J5 | ISU Four Continents Championships 2017 | 0.0 | Maia SHIBUTANI / Alex SHIBUTANI | USA | 74.5 | ICE DANCE SHORT DANCE | short | 74.5 | 9 | 689.2 | 76.8375 | -2.3375 | Short Dance|Ice Dance | Andre-Marc ALLAIN | CAN | J5 |
def compare_scores_by_judge_country(skater_name):
skater_scores = judge_points[
judge_points["name"] == skater_name
]
country_grps = skater_scores.groupby("judge_country")
country_df = pd.DataFrame({
"scores_above_average": country_grps.apply(lambda x: len(x[x["points_vs_avg"] > 0])),
"scores_below_average": country_grps.apply(lambda x: len(x[x["points_vs_avg"] < 0])),
"scores_exactly_average": country_grps.apply(lambda x: len(x[x["points_vs_avg"] == 0])),
"total_scores": country_grps.size(),
"pct_above_average": round(country_grps.apply(lambda x: len(x[x["points_vs_avg"] > 0])) / \
country_grps.size() * 100, 2),
"pct_below_average": round(country_grps.apply(lambda x: len(x[x["points_vs_avg"] < 0])) / \
country_grps.size() * 100, 2)
})
return country_df[
country_df["total_scores"] >= 5
].sort_values("pct_above_average", ascending=False)
Results for Virtue/Moir (Canada)
compare_scores_by_judge_country("Tessa VIRTUE / Scott MOIR")
pct_above_average | pct_below_average | scores_above_average | scores_below_average | scores_exactly_average | total_scores | |
---|---|---|---|---|---|---|
judge_country | ||||||
CAN | 100.00 | 0.00 | 16 | 0 | 0 | 16 |
JPN | 83.33 | 16.67 | 10 | 2 | 0 | 12 |
KOR | 83.33 | 16.67 | 5 | 1 | 0 | 6 |
GER | 71.43 | 28.57 | 5 | 2 | 0 | 7 |
CHN | 70.00 | 30.00 | 7 | 3 | 0 | 10 |
RUS | 56.25 | 43.75 | 9 | 7 | 0 | 16 |
AUS | 50.00 | 50.00 | 3 | 3 | 0 | 6 |
ESP | 30.00 | 70.00 | 3 | 7 | 0 | 10 |
ITA | 25.00 | 66.67 | 3 | 8 | 1 | 12 |
USA | 25.00 | 75.00 | 4 | 12 | 0 | 16 |
FRA | 11.11 | 88.89 | 1 | 8 | 0 | 9 |
Results for Papadakis/Cizeron (France)
compare_scores_by_judge_country("Gabriella PAPADAKIS / Guillaume CIZERON")
pct_above_average | pct_below_average | scores_above_average | scores_below_average | scores_exactly_average | total_scores | |
---|---|---|---|---|---|---|
judge_country | ||||||
FRA | 92.31 | 7.69 | 12 | 1 | 0 | 13 |
USA | 85.71 | 14.29 | 12 | 2 | 0 | 14 |
CHN | 83.33 | 16.67 | 5 | 1 | 0 | 6 |
ESP | 71.43 | 28.57 | 5 | 2 | 0 | 7 |
ISR | 66.67 | 33.33 | 4 | 2 | 0 | 6 |
ITA | 50.00 | 50.00 | 5 | 5 | 0 | 10 |
JPN | 50.00 | 50.00 | 3 | 3 | 0 | 6 |
KOR | 50.00 | 50.00 | 3 | 3 | 0 | 6 |
GER | 42.86 | 57.14 | 3 | 4 | 0 | 7 |
RUS | 28.57 | 71.43 | 4 | 10 | 0 | 14 |
CZE | 22.22 | 77.78 | 2 | 7 | 0 | 9 |
CAN | 21.43 | 78.57 | 3 | 11 | 0 | 14 |
POL | 20.00 | 80.00 | 1 | 4 | 0 | 5 |
UKR | 0.00 | 100.00 | 0 | 5 | 0 | 5 |
Comparing results for the two teams
(Includes only countries whose judges scored, in the dataset, at least five performances of each team.)
compare_scores_by_judge_country("Tessa VIRTUE / Scott MOIR")\
.join(
compare_scores_by_judge_country("Gabriella PAPADAKIS / Guillaume CIZERON"),
lsuffix="_canada",
rsuffix="_france")\
.dropna()[["pct_above_average_canada", "pct_above_average_france"]]
pct_above_average_canada | pct_above_average_france | |
---|---|---|
judge_country | ||
CAN | 100.00 | 21.43 |
JPN | 83.33 | 50.00 |
KOR | 83.33 | 50.00 |
GER | 71.43 | 42.86 |
CHN | 70.00 | 83.33 |
RUS | 56.25 | 28.57 |
ESP | 30.00 | 71.43 |
ITA | 25.00 | 50.00 |
USA | 25.00 | 85.71 |
FRA | 11.11 | 92.31 |
Results for Bobrova/Soloviev (Russia)
compare_scores_by_judge_country("Ekaterina BOBROVA / Dmitri SOLOVIEV")
pct_above_average | pct_below_average | scores_above_average | scores_below_average | scores_exactly_average | total_scores | |
---|---|---|---|---|---|---|
judge_country | ||||||
RUS | 100.00 | 0.00 | 12 | 0 | 0 | 12 |
TUR | 100.00 | 0.00 | 9 | 0 | 0 | 9 |
ESP | 71.43 | 28.57 | 5 | 2 | 0 | 7 |
ISR | 62.50 | 37.50 | 5 | 3 | 0 | 8 |
CZE | 57.14 | 42.86 | 4 | 3 | 0 | 7 |
CAN | 50.00 | 50.00 | 6 | 6 | 0 | 12 |
KOR | 50.00 | 50.00 | 3 | 3 | 0 | 6 |
FRA | 28.57 | 71.43 | 2 | 5 | 0 | 7 |
ITA | 20.00 | 80.00 | 2 | 8 | 0 | 10 |
USA | 16.67 | 83.33 | 2 | 10 | 0 | 12 |
Results for Chock/Bates (USA)
compare_scores_by_judge_country("Madison CHOCK / Evan BATES")
pct_above_average | pct_below_average | scores_above_average | scores_below_average | scores_exactly_average | total_scores | |
---|---|---|---|---|---|---|
judge_country | ||||||
USA | 100.00 | 0.00 | 16 | 0 | 0 | 16 |
JPN | 87.50 | 12.50 | 7 | 1 | 0 | 8 |
CHN | 80.00 | 20.00 | 8 | 2 | 0 | 10 |
ESP | 70.00 | 30.00 | 7 | 3 | 0 | 10 |
FRA | 55.56 | 44.44 | 5 | 4 | 0 | 9 |
KOR | 50.00 | 50.00 | 4 | 4 | 0 | 8 |
GER | 40.00 | 60.00 | 2 | 3 | 0 | 5 |
ISR | 40.00 | 60.00 | 2 | 3 | 0 | 5 |
CZE | 25.00 | 75.00 | 2 | 6 | 0 | 8 |
ITA | 20.00 | 80.00 | 2 | 8 | 0 | 10 |
RUS | 18.75 | 81.25 | 3 | 13 | 0 | 16 |
CAN | 12.50 | 87.50 | 2 | 14 | 0 | 16 |
Results for Shibutani/Shibutani (USA)
compare_scores_by_judge_country("Maia SHIBUTANI / Alex SHIBUTANI")
pct_above_average | pct_below_average | scores_above_average | scores_below_average | scores_exactly_average | total_scores | |
---|---|---|---|---|---|---|
judge_country | ||||||
USA | 100.00 | 0.00 | 16 | 0 | 0 | 16 |
ESP | 80.00 | 20.00 | 8 | 2 | 0 | 10 |
ITA | 80.00 | 20.00 | 8 | 2 | 0 | 10 |
CHN | 60.00 | 40.00 | 6 | 4 | 0 | 10 |
CAN | 43.75 | 56.25 | 7 | 9 | 0 | 16 |
FRA | 42.86 | 57.14 | 3 | 4 | 0 | 7 |
JPN | 40.00 | 60.00 | 4 | 6 | 0 | 10 |
KOR | 33.33 | 66.67 | 2 | 4 | 0 | 6 |
RUS | 31.25 | 68.75 | 5 | 11 | 0 | 16 |
ISR | 28.57 | 71.43 | 2 | 5 | 0 | 7 |
CZE | 16.67 | 83.33 | 1 | 5 | 0 | 6 |
GER | 0.00 | 100.00 | 0 | 5 | 0 | 5 |
TUR | 0.00 | 100.00 | 0 | 5 | 0 | 5 |
Results for Hubbell/Donohue (USA)
compare_scores_by_judge_country("Madison HUBBELL / Zachary DONOHUE")
pct_above_average | pct_below_average | scores_above_average | scores_below_average | scores_exactly_average | total_scores | |
---|---|---|---|---|---|---|
judge_country | ||||||
USA | 100.00 | 0.00 | 16 | 0 | 0 | 16 |
ESP | 80.00 | 20.00 | 8 | 2 | 0 | 10 |
FRA | 77.78 | 22.22 | 7 | 2 | 0 | 9 |
KOR | 66.67 | 33.33 | 4 | 2 | 0 | 6 |
CHN | 62.50 | 37.50 | 5 | 3 | 0 | 8 |
JPN | 60.00 | 40.00 | 6 | 4 | 0 | 10 |
TUR | 60.00 | 40.00 | 3 | 2 | 0 | 5 |
UKR | 60.00 | 40.00 | 3 | 2 | 0 | 5 |
CAN | 31.25 | 68.75 | 5 | 11 | 0 | 16 |
GER | 20.00 | 80.00 | 1 | 4 | 0 | 5 |
RUS | 18.75 | 81.25 | 3 | 13 | 0 | 16 |
CZE | 16.67 | 83.33 | 1 | 5 | 0 | 6 |
ISR | 0.00 | 100.00 | 0 | 7 | 0 | 7 |
ITA | 0.00 | 100.00 | 0 | 10 | 0 | 10 |
Results for Cappellini/Lanotte (Italy)
compare_scores_by_judge_country("Anna CAPPELLINI / Luca LANOTTE")
pct_above_average | pct_below_average | scores_above_average | scores_below_average | scores_exactly_average | total_scores | |
---|---|---|---|---|---|---|
judge_country | ||||||
ITA | 100.00 | 0.00 | 12 | 0 | 0 | 12 |
JPN | 100.00 | 0.00 | 10 | 0 | 0 | 10 |
CHN | 75.00 | 25.00 | 6 | 2 | 0 | 8 |
UKR | 60.00 | 40.00 | 3 | 2 | 0 | 5 |
ESP | 57.14 | 42.86 | 4 | 3 | 0 | 7 |
FRA | 28.57 | 71.43 | 2 | 5 | 0 | 7 |
CAN | 25.00 | 75.00 | 3 | 9 | 0 | 12 |
RUS | 25.00 | 75.00 | 3 | 9 | 0 | 12 |
ISR | 16.67 | 83.33 | 1 | 5 | 0 | 6 |
KOR | 16.67 | 83.33 | 1 | 5 | 0 | 6 |
USA | 16.67 | 83.33 | 2 | 10 | 0 | 12 |
Comparing results for the four teams
team_scores = judge_points[
(
judge_points["name"].isin([
"Ekaterina BOBROVA / Dmitri SOLOVIEV",
"Madison CHOCK / Evan BATES",
"Maia SHIBUTANI / Alex SHIBUTANI",
"Anna CAPPELLINI / Luca LANOTTE"
])) &
(
judge_points["judge_country"].isin([
"ITA", "RUS", "USA", "JPN"
]))
]
grps = team_scores.groupby(["judge_country","name"])
team_df = pd.DataFrame({
"scores_above_average": grps.apply(lambda x: len(x[x["points_vs_avg"] > 0])),
"total_scores": grps.size(),
"pct_above_average": round(grps.apply(lambda x: len(x[x["points_vs_avg"] > 0])) / \
grps.size() * 100, 2)
}).unstack()
team_df.sort_values(("pct_above_average", "Ekaterina BOBROVA / Dmitri SOLOVIEV"), ascending=False)
pct_above_average | scores_above_average | total_scores | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
name | Anna CAPPELLINI / Luca LANOTTE | Ekaterina BOBROVA / Dmitri SOLOVIEV | Madison CHOCK / Evan BATES | Maia SHIBUTANI / Alex SHIBUTANI | Anna CAPPELLINI / Luca LANOTTE | Ekaterina BOBROVA / Dmitri SOLOVIEV | Madison CHOCK / Evan BATES | Maia SHIBUTANI / Alex SHIBUTANI | Anna CAPPELLINI / Luca LANOTTE | Ekaterina BOBROVA / Dmitri SOLOVIEV | Madison CHOCK / Evan BATES | Maia SHIBUTANI / Alex SHIBUTANI |
judge_country | ||||||||||||
RUS | 25.00 | 100.00 | 18.75 | 31.25 | 3 | 12 | 3 | 5 | 12 | 12 | 16 | 16 |
JPN | 100.00 | 25.00 | 87.50 | 40.00 | 10 | 1 | 7 | 4 | 10 | 4 | 8 | 10 |
ITA | 100.00 | 20.00 | 20.00 | 80.00 | 12 | 2 | 2 | 8 | 12 | 10 | 10 | 10 |
USA | 16.67 | 16.67 | 100.00 | 100.00 | 2 | 2 | 16 | 16 | 12 | 12 | 16 | 16 |