import math
import operator
#Building Custom Data for Movie Rating
review = {
'Marlon Brando': {
'The Godfather': 5.00,
'The Godfather Part II': 4.29,
'Apocalypse Now': 5.00,
'Jaws': 1.
},
'Stephen King': {
'The Shawshank Redemption': 4.89,
'The Shining': 4.93 ,
'The Green Mile': 4.87,
'The Godfather': 1.33,
},
'Steven Spielberg': {
'Raiders of the Lost Ark': 5.0,
'Jaws': 4.89,
'Saving Private Ryan': 4.78,
'Star Wars Episode IV - A New Hope': 4.33,
'Close Encounters of the Third Kind': 4.77,
'The Godfather': 1.25,
'The Godfather Part II': 1.72
},
'George Lucas':{
'Star Wars Episode IV - A New Hope': 5.00
},
'Al Pacino': {
'The Godfather': 4.02,
'The Godfather Part II': 5.00,
},
'Robert DeNiro': {
'The Godfather': 3.07,
'The Godfather Part II': 4.29,
'Raging Bull': 5.00,
'Goodfellas': 4.89
},
'Robert Duvall': {
'The Godfather': 3.80,
'The Godfather Part II': 3.61,
'Apocalypse Now': 4.26
},
'Jack Nicholson': {
'The Shining': 5.0,
'One Flew Over The Cuckoos Nest': 5.0,
'The Godfather': 2.22,
'The Godfather Part II': 3.34
},
'Morgan Freeman': {
'The Shawshank Redemption': 4.98,
'The Shining': 4.42,
'Apocalypse Now': 1.63,
'The Godfather': 1.12,
'The Godfather Part II': 2.16
},
'Harrison Ford': {
'Raiders of the Lost Ark': 5.0,
'Star Wars Episode IV - A New Hope': 4.84,
},
'Tom Hanks': {
'Saving Private Ryan': 3.78,
'The Green Mile': 4.96,
'The Godfather': 1.04,
'The Godfather Part II': 1.03
},
'Francis Ford Coppola': {
'The Godfather': 5.00,
'The Godfather Part II': 5.0,
'Jaws': 1.24,
'One Flew Over The Cuckoos Nest': 2.02
},
'Martin Scorsese': {
'Raging Bull': 5.0,
'Goodfellas': 4.87,
'Close Encounters of the Third Kind': 1.14,
'The Godfather': 4.00
},
'Diane Keaton': {
'The Godfather': 2.98,
'The Godfather Part II': 3.93,
'Close Encounters of the Third Kind': 1.37
},
'Richard Dreyfuss': {
'Jaws': 5.0,
'Close Encounters of the Third Kind': 5.0,
'The Godfather': 1.07,
'The Godfather Part II': 0.63
},
'Joe Pesci': {
'Raging Bull': 4.89,
'Goodfellas': 5.0,
'The Godfather': 4.87,
'Star Wars Episode IV - A New Hope': 1.32
}
}
# Function to get common movies b/w Users
def get_common_movies(criticA,criticB):
return [movie for movie in review[criticA] if movie in review[criticB]]
get_common_movies('Marlon Brando','Robert DeNiro')
['The Godfather', 'The Godfather Part II']
get_common_movies('Steven Spielberg','Tom Hanks')
['Saving Private Ryan', 'The Godfather', 'The Godfather Part II']
get_common_movies('Martin Scorsese','Joe Pesci')
['Raging Bull', 'Goodfellas', 'The Godfather']
# Function to get reviews from the common movies
def get_reviews(criticA,criticB):
common_movies = get_common_movies(criticA,criticB)
return [(review[criticA][movie], review[criticB][movie]) for movie in common_movies]
get_reviews('Marlon Brando','Robert DeNiro')
[(5.0, 3.07), (4.29, 4.29)]
get_reviews('Steven Spielberg','Tom Hanks')
[(4.78, 3.78), (1.25, 1.04), (1.72, 1.03)]
get_reviews('Martin Scorsese','Joe Pesci')
[(5.0, 4.89), (4.87, 5.0), (4.0, 4.87)]
$d(x,y)=\sqrt{(x1-y1)^2 + (x2-y1)^2 + (xn-yn)^2}$
# Function to get Euclidean Distance b/w 2 points
def euclidean_distance(points):
squared_diffs = [(point[0] - point[1]) ** 2 for point in points]
summed_squared_diffs = sum(squared_diffs)
distance = math.sqrt(summed_squared_diffs)
return distance
# Function to calculate similarity more similar less the distance and vice versa
# Added 1 for if highly similar can make the distance zero and give NotDefined Error
def similarity(reviews):
return 1/ (1 + euclidean_distance(reviews))
# Function to get similarity b/w 2 users
def get_critic_similarity(criticA, criticB):
reviews = get_reviews(criticA,criticB)
return similarity(reviews)
get_critic_similarity('Marlon Brando','Robert DeNiro')
0.341296928327645
get_critic_similarity('Steven Spielberg','Tom Hanks')
0.4478352722730117
get_critic_similarity('Martin Scorsese','Joe Pesci')
0.5300793497254199
# Function to give recommendation to users based on their reviews.
def recommend_movies(critic, num_suggestions):
similarity_scores = [(get_critic_similarity(critic, other), other) for other in review if other != critic]
# Get similarity Scores for all the critics
similarity_scores.sort()
similarity_scores.reverse()
similarity_scores = similarity_scores[0:num_suggestions]
recommendations = {}
# Dictionary to store recommendations
for similarity, other in similarity_scores:
reviewed = review[other]
# Storing the review
for movie in reviewed:
if movie not in review[critic]:
weight = similarity * reviewed[movie]
# Weighing similarity with review
if movie in recommendations:
sim, weights = recommendations[movie]
recommendations[movie] = (sim + similarity, weights + [weight])
# Similarity of movie along with weight
else:
recommendations[movie] = (similarity, [weight])
for recommendation in recommendations:
similarity, movie = recommendations[recommendation]
recommendations[recommendation] = sum(movie) / similarity
# Normalizing weights with similarity
sorted_recommendations = sorted(recommendations.items(), key=operator.itemgetter(1), reverse=True)
#Sorting recommendations with weight
return sorted_recommendations
recommend_movies('Marlon Brando',4)
[('Goodfellas', 5.000000000000001), ('Raiders of the Lost Ark', 5.0), ('Raging Bull', 4.89), ('Star Wars Episode IV - A New Hope', 3.8157055214723923), ('One Flew Over The Cuckoos Nest', 2.02)]
recommend_movies('Robert DeNiro',4)
[('Raiders of the Lost Ark', 5.0), ('Star Wars Episode IV - A New Hope', 4.92), ('Close Encounters of the Third Kind', 1.2744773851327365)]
recommend_movies('Steven Spielberg',4)
[('The Shawshank Redemption', 4.928285762244913), ('The Green Mile', 4.87), ('The Shining', 4.71304734727882), ('Apocalypse Now', 1.63)]
recommend_movies('Tom Hanks',4)
[('Raiders of the Lost Ark', 5.0), ('Jaws', 5.0), ('Close Encounters of the Third Kind', 5.0), ('The Shining', 4.93), ('Star Wars Episode IV - A New Hope', 4.92), ('The Shawshank Redemption', 4.89)]
recommend_movies('Martin Scorsese',4)
[('Raiders of the Lost Ark', 5.0), ('Star Wars Episode IV - A New Hope', 4.92), ('The Godfather Part II', 4.3613513513513515), ('Apocalypse Now', 4.26)]
recommend_movies('Joe Pesci',4)
[('Apocalypse Now', 5.000000000000001), ('The Godfather Part II', 4.7280538302277435), ('One Flew Over The Cuckoos Nest', 2.02), ('Close Encounters of the Third Kind', 1.14), ('Jaws', 1.12)]