import json, glob
games = json.load(open('games.json'))
reviews = {}
for filepath in glob.glob('reviews/*.json'):
game = filepath.replace('reviews/', '').replace('.json', '')
reviews[game] = json.load(open(filepath))
print('games', len(games))
print('reviewed games', len(reviews))
def similarity(game1, game2):
game1_users = set(review['reviewer']['username'] for review in reviews[game1])
game2_users = set(review['reviewer']['username'] for review in reviews[game2])
if len(game1_users | game2_users) == 0:
return 1
return 1 - len(game1_users & game2_users) / len(game1_users | game2_users)
for game, _ in list(sorted(reviews.items(), key=lambda game: -len(game[1])))[:10]:
sims = [(other_game, similarity(game, other_game)) for other_game in reviews if other_game != game]
sims.sort(key=lambda x: x[1])
print(game)
for other_game, sim in sims[:3]:
if sim < 1:
print(' >', other_game, sim)
game = 'red_faction'
sims = [(other_game, similarity(game, other_game)) for other_game in reviews if other_game != game]
sims.sort(key=lambda x: x[1])
print(game)
for other_game, sim in sims[:20]:
print(' >', other_game, sim)
import umap
import umap.plot
def metric(a, b):
return similarity(id_to_slug[int(a[0])], id_to_slug[int(b[0])])
games_reviewed = list(sorted(reviews))
games_ids = [[i] for i, _ in enumerate(games_reviewed)]
slug_to_game = {game['slug']: game for game in games}
id_to_slug = {i: slug for i, slug in enumerate(games_reviewed)}
categories = [slug_to_game[slug]['category'] for slug in games_reviewed]
mapper = umap.UMAP(metric=metric, random_state=30).fit(games_ids)
umap.plot.output_notebook()
p = umap.plot.interactive(mapper, labels=categories, hover_data={"category": categories, 'slugs': games_reviewed})
umap.plot.show(p)