Game recommendation on GOG.com

Loading data

In [44]:
import json, glob

games = json.load(open('games.json'))
reviews = {}
for filepath in glob.glob('reviews/*.json'):
    game = filepath.replace('reviews/', '').replace('.json', '')
    reviews[game] = json.load(open(filepath))

print('games', len(games))
print('reviewed games', len(reviews))
games 3778
reviewed games 2256

Similarity metric

In [ ]:
def similarity(game1, game2):
    game1_users = set(review['reviewer']['username'] for review in reviews[game1])
    game2_users = set(review['reviewer']['username'] for review in reviews[game2])
    
    if len(game1_users | game2_users) == 0:
        return 1
    
    return 1 - len(game1_users & game2_users) / len(game1_users | game2_users)
In [42]:
for game, _ in list(sorted(reviews.items(), key=lambda game: -len(game[1])))[:10]:
    sims = [(other_game, similarity(game, other_game)) for other_game in reviews if other_game != game]
    sims.sort(key=lambda x: x[1])
    print(game)
    for other_game, sim in sims[:3]:
        if sim < 1:
            print('  >', other_game, sim)
diablo
  > warcraft_2_battlenet_edition 0.9855855855855856
  > elder_scrolls_iv_oblivion_game_of_the_year_edition_deluxe_the 0.9897260273972602
  > blade_runner 0.990521327014218
firewatch
  > what_remains_of_edith_finch 0.9571428571428572
  > the_vanishing_of_ethan_carter 0.9788732394366197
  > gone_home 0.9804560260586319
legend_of_grimrock
  > legend_of_grimrock_2 0.9655172413793104
  > the_book_of_unwritten_tales 0.9795081967213115
  > gothic_3 0.9857142857142858
elex
  > seven_the_days_long_gone 0.9786324786324786
  > divinity_original_sin_enhanced_edition 0.9805194805194806
  > kingdom_come_deliverance 0.9821428571428571
deus_ex
  > deus_ex_invisible_war 0.937037037037037
  > system_shock_2 0.9742268041237113
  > star_wars_knights_of_the_old_republic 0.9787234042553191
dungeon_keeper
  > dungeon_keeper_2 0.9769820971867008
  > jade_empire_special_edition 0.9844236760124611
  > nox 0.9870466321243523
dungeon_keeper_2
  > dungeon_keeper 0.9769820971867008
  > jade_empire_special_edition 0.9844236760124611
  > theme_hospital 0.9872773536895675
total_anihilation_commander_pack
  > total_annihilation_kingdoms 0.9723320158102767
  > dark_reign_expansion 0.9834710743801653
  > infested_planet 0.9855072463768116
the_witcher
  > dragon_age_origins 0.9821882951653944
  > vampire_the_masquerade_bloodlines 0.9840425531914894
  > alan_wake 0.9873015873015873
sid_meiers_alpha_centauri
  > wing_commander_4_the_price_of_freedom 0.9881422924901185
  > wing_commander_3_heart_of_the_tiger 0.9891696750902527
  > heroes_of_might_and_magic_5_bundle 0.9894366197183099

Top 20 most similar games to Red Faction (old-school FPS)

In [45]:
game = 'red_faction'
sims = [(other_game, similarity(game, other_game)) for other_game in reviews if other_game != game]
sims.sort(key=lambda x: x[1])
print(game)
for other_game, sim in sims[:20]:
    print('  >', other_game, sim)
red_faction
  > red_faction_2 0.8867924528301887
  > stalker_clear_sky 0.9565217391304348
  > serious_sam_the_first_encounter 0.9634146341463414
  > sea_dogs 0.9672131147540983
  > call_of_juarez 0.967741935483871
  > serious_sam_the_second_encounter 0.967741935483871
  > terminal_velocity 0.9682539682539683
  > hogs_of_war 0.9692307692307692
  > syndicate_wars 0.9714285714285714
  > quake_4 0.971830985915493
  > tomb_raider_the_angel_of_darkness 0.9722222222222222
  > abandon_ship 0.975609756097561
  > indiana_jones_and_the_emperors_tomb 0.9759036144578314
  > sniper_ghost_warrior_3 0.9761904761904762
  > delta_force_land_warrior 0.9767441860465116
  > star_wolves_3_civil_war 0.9767441860465116
  > unholy_heights 0.9777777777777777
  > judge_dredd_dredd_vs_death 0.9777777777777777
  > brothers_in_arms_hells_highway 0.9782608695652174
  > wing_commander_armada 0.9782608695652174

Plotting that on a map

In [43]:
import umap
import umap.plot

def metric(a, b):
    return similarity(id_to_slug[int(a[0])], id_to_slug[int(b[0])])


games_reviewed = list(sorted(reviews))
games_ids = [[i] for i, _ in enumerate(games_reviewed)]
slug_to_game = {game['slug']: game for game in games}
id_to_slug = {i: slug for i, slug in enumerate(games_reviewed)}
categories = [slug_to_game[slug]['category'] for slug in games_reviewed]

mapper = umap.UMAP(metric=metric, random_state=30).fit(games_ids)

umap.plot.output_notebook()
p = umap.plot.interactive(mapper, labels=categories, hover_data={"category": categories, 'slugs': games_reviewed})
umap.plot.show(p)
/home/damien/.venv/lib/python3.6/site-packages/umap/umap_.py:1495: UserWarning: custom distance metric does not return gradient; inverse_transform will be unavailable. To enable using inverse_transform method method, define a distance function that returns a tuple of (distance [float], gradient [np.array])
  "custom distance metric does not return gradient; inverse_transform will be unavailable. "
Loading BokehJS ...
In [ ]: