import json, glob
games = json.load(open('games.json'))
reviews = {}
for filepath in glob.glob('reviews/*.json'):
game = filepath.replace('reviews/', '').replace('.json', '')
reviews[game] = json.load(open(filepath))
print('games', len(games))
print('reviewed games', len(reviews))
games 3778 reviewed games 2256
def similarity(game1, game2):
game1_users = set(review['reviewer']['username'] for review in reviews[game1])
game2_users = set(review['reviewer']['username'] for review in reviews[game2])
if len(game1_users | game2_users) == 0:
return 1
return 1 - len(game1_users & game2_users) / len(game1_users | game2_users)
for game, _ in list(sorted(reviews.items(), key=lambda game: -len(game[1])))[:10]:
sims = [(other_game, similarity(game, other_game)) for other_game in reviews if other_game != game]
sims.sort(key=lambda x: x[1])
print(game)
for other_game, sim in sims[:3]:
if sim < 1:
print(' >', other_game, sim)
diablo > warcraft_2_battlenet_edition 0.9855855855855856 > elder_scrolls_iv_oblivion_game_of_the_year_edition_deluxe_the 0.9897260273972602 > blade_runner 0.990521327014218 firewatch > what_remains_of_edith_finch 0.9571428571428572 > the_vanishing_of_ethan_carter 0.9788732394366197 > gone_home 0.9804560260586319 legend_of_grimrock > legend_of_grimrock_2 0.9655172413793104 > the_book_of_unwritten_tales 0.9795081967213115 > gothic_3 0.9857142857142858 elex > seven_the_days_long_gone 0.9786324786324786 > divinity_original_sin_enhanced_edition 0.9805194805194806 > kingdom_come_deliverance 0.9821428571428571 deus_ex > deus_ex_invisible_war 0.937037037037037 > system_shock_2 0.9742268041237113 > star_wars_knights_of_the_old_republic 0.9787234042553191 dungeon_keeper > dungeon_keeper_2 0.9769820971867008 > jade_empire_special_edition 0.9844236760124611 > nox 0.9870466321243523 dungeon_keeper_2 > dungeon_keeper 0.9769820971867008 > jade_empire_special_edition 0.9844236760124611 > theme_hospital 0.9872773536895675 total_anihilation_commander_pack > total_annihilation_kingdoms 0.9723320158102767 > dark_reign_expansion 0.9834710743801653 > infested_planet 0.9855072463768116 the_witcher > dragon_age_origins 0.9821882951653944 > vampire_the_masquerade_bloodlines 0.9840425531914894 > alan_wake 0.9873015873015873 sid_meiers_alpha_centauri > wing_commander_4_the_price_of_freedom 0.9881422924901185 > wing_commander_3_heart_of_the_tiger 0.9891696750902527 > heroes_of_might_and_magic_5_bundle 0.9894366197183099
game = 'red_faction'
sims = [(other_game, similarity(game, other_game)) for other_game in reviews if other_game != game]
sims.sort(key=lambda x: x[1])
print(game)
for other_game, sim in sims[:20]:
print(' >', other_game, sim)
red_faction > red_faction_2 0.8867924528301887 > stalker_clear_sky 0.9565217391304348 > serious_sam_the_first_encounter 0.9634146341463414 > sea_dogs 0.9672131147540983 > call_of_juarez 0.967741935483871 > serious_sam_the_second_encounter 0.967741935483871 > terminal_velocity 0.9682539682539683 > hogs_of_war 0.9692307692307692 > syndicate_wars 0.9714285714285714 > quake_4 0.971830985915493 > tomb_raider_the_angel_of_darkness 0.9722222222222222 > abandon_ship 0.975609756097561 > indiana_jones_and_the_emperors_tomb 0.9759036144578314 > sniper_ghost_warrior_3 0.9761904761904762 > delta_force_land_warrior 0.9767441860465116 > star_wolves_3_civil_war 0.9767441860465116 > unholy_heights 0.9777777777777777 > judge_dredd_dredd_vs_death 0.9777777777777777 > brothers_in_arms_hells_highway 0.9782608695652174 > wing_commander_armada 0.9782608695652174
import umap
import umap.plot
def metric(a, b):
return similarity(id_to_slug[int(a[0])], id_to_slug[int(b[0])])
games_reviewed = list(sorted(reviews))
games_ids = [[i] for i, _ in enumerate(games_reviewed)]
slug_to_game = {game['slug']: game for game in games}
id_to_slug = {i: slug for i, slug in enumerate(games_reviewed)}
categories = [slug_to_game[slug]['category'] for slug in games_reviewed]
mapper = umap.UMAP(metric=metric, random_state=30).fit(games_ids)
umap.plot.output_notebook()
p = umap.plot.interactive(mapper, labels=categories, hover_data={"category": categories, 'slugs': games_reviewed})
umap.plot.show(p)
/home/damien/.venv/lib/python3.6/site-packages/umap/umap_.py:1495: UserWarning: custom distance metric does not return gradient; inverse_transform will be unavailable. To enable using inverse_transform method method, define a distance function that returns a tuple of (distance [float], gradient [np.array]) "custom distance metric does not return gradient; inverse_transform will be unavailable. "