Importing Libraries and Loading Data

In [1]:
import math
import operator

#Building Custom Data for Movie Rating
review = {
'Marlon Brando': {
'The Godfather': 5.00, 
'The Godfather Part II': 4.29,
'Apocalypse Now': 5.00, 
'Jaws': 1.
},
'Stephen King': {
'The Shawshank Redemption': 4.89, 
'The Shining': 4.93 , 
'The Green Mile': 4.87,
'The Godfather': 1.33,
},
'Steven Spielberg': {
'Raiders of the Lost Ark': 5.0, 
'Jaws': 4.89,
'Saving Private Ryan': 4.78, 
'Star Wars Episode IV - A New Hope': 4.33,
'Close Encounters of the Third Kind': 4.77,
'The Godfather':  1.25,
'The Godfather Part II': 1.72
},
'George Lucas':{
'Star Wars Episode IV - A New Hope': 5.00	
},
'Al Pacino': {
'The Godfather': 4.02, 
'The Godfather Part II': 5.00,
},
'Robert DeNiro': {
'The Godfather': 3.07, 
'The Godfather Part II': 4.29, 
'Raging Bull': 5.00, 
'Goodfellas':  4.89
},
'Robert Duvall': {
'The Godfather': 3.80, 
'The Godfather Part II': 3.61,
'Apocalypse Now': 4.26 
},
'Jack Nicholson': {
'The Shining': 5.0,
'One Flew Over The Cuckoos Nest': 5.0,
'The Godfather': 2.22,
'The Godfather Part II': 3.34
},
'Morgan Freeman': {
'The Shawshank Redemption': 4.98,
'The Shining': 4.42,
'Apocalypse Now': 1.63,
'The Godfather': 1.12,
'The Godfather Part II': 2.16
},
'Harrison Ford': {
'Raiders of the Lost Ark': 5.0, 
'Star Wars Episode IV - A New Hope': 4.84,
},
'Tom Hanks': {
'Saving Private Ryan': 3.78, 
'The Green Mile': 4.96,
'The Godfather': 1.04,
'The Godfather Part II': 1.03
},
'Francis Ford Coppola': {
'The Godfather': 5.00, 
'The Godfather Part II': 5.0, 
'Jaws': 1.24,
'One Flew Over The Cuckoos Nest': 2.02
},
'Martin Scorsese': {
'Raging Bull': 5.0, 
'Goodfellas': 4.87,
'Close Encounters of the Third Kind': 1.14,
'The Godfather': 4.00
},
'Diane Keaton': {
'The Godfather': 2.98,
'The Godfather Part II': 3.93,
'Close Encounters of the Third Kind': 1.37
},
'Richard Dreyfuss': {
'Jaws': 5.0, 
'Close Encounters of the Third Kind': 5.0,
'The Godfather': 1.07,
'The Godfather Part II': 0.63
},
'Joe Pesci': {
'Raging Bull': 4.89, 
'Goodfellas': 5.0,
'The Godfather': 4.87,
'Star Wars Episode IV - A New Hope': 1.32
}
}
In [2]:
# Function to get common movies b/w Users
def get_common_movies(criticA,criticB):
    return [movie for movie in review[criticA] if movie in review[criticB]]
In [3]:
get_common_movies('Marlon Brando','Robert DeNiro')
Out[3]:
['The Godfather', 'The Godfather Part II']
In [4]:
get_common_movies('Steven Spielberg','Tom Hanks')
Out[4]:
['Saving Private Ryan', 'The Godfather', 'The Godfather Part II']
In [5]:
get_common_movies('Martin Scorsese','Joe Pesci')
Out[5]:
['Raging Bull', 'Goodfellas', 'The Godfather']
In [6]:
# Function to get reviews from the common movies
def get_reviews(criticA,criticB):
    common_movies = get_common_movies(criticA,criticB)
    return [(review[criticA][movie], review[criticB][movie]) for movie in common_movies]
In [7]:
get_reviews('Marlon Brando','Robert DeNiro')
Out[7]:
[(5.0, 3.07), (4.29, 4.29)]
In [8]:
get_reviews('Steven Spielberg','Tom Hanks')
Out[8]:
[(4.78, 3.78), (1.25, 1.04), (1.72, 1.03)]
In [9]:
get_reviews('Martin Scorsese','Joe Pesci')
Out[9]:
[(5.0, 4.89), (4.87, 5.0), (4.0, 4.87)]

Euclidean Distance Formula for Calculating similarity

$d(x,y)=\sqrt{(x1-y1)^2 + (x2-y1)^2 + (xn-yn)^2}$

In [10]:
# Function to get Euclidean Distance b/w 2 points 
def euclidean_distance(points):
    squared_diffs = [(point[0] - point[1]) ** 2 for point in points]
    summed_squared_diffs = sum(squared_diffs)
    distance = math.sqrt(summed_squared_diffs)
    return distance
In [11]:
# Function to  calculate similarity more similar less the distance and vice versa
# Added 1 for if highly similar can make the distance zero and give NotDefined Error
def similarity(reviews):
    return 1/ (1 + euclidean_distance(reviews))
In [12]:
# Function to get similarity b/w 2 users
def get_critic_similarity(criticA, criticB):
    reviews = get_reviews(criticA,criticB)
    return similarity(reviews)
In [13]:
get_critic_similarity('Marlon Brando','Robert DeNiro')
Out[13]:
0.341296928327645
In [14]:
get_critic_similarity('Steven Spielberg','Tom Hanks')
Out[14]:
0.4478352722730117
In [15]:
get_critic_similarity('Martin Scorsese','Joe Pesci')
Out[15]:
0.5300793497254199
In [16]:
# Function to give recommendation to users based on their reviews.
def recommend_movies(critic, num_suggestions):
    similarity_scores = [(get_critic_similarity(critic, other), other) for other in review if other != critic]
    # Get similarity Scores for all the critics
    similarity_scores.sort() 
    similarity_scores.reverse()
    similarity_scores = similarity_scores[0:num_suggestions]

    recommendations = {}
    # Dictionary to store recommendations
    for similarity, other in similarity_scores:
        reviewed = review[other]
        # Storing the review
        for movie in reviewed:
            if movie not in review[critic]:
                weight = similarity * reviewed[movie]
                # Weighing similarity with review
                if movie in recommendations:
                    sim, weights = recommendations[movie]
                    recommendations[movie] = (sim + similarity, weights + [weight])
                    # Similarity of movie along with weight
                else:
                    recommendations[movie] = (similarity, [weight])
                    

    for recommendation in recommendations:
        similarity, movie = recommendations[recommendation]
        recommendations[recommendation] = sum(movie) / similarity
        # Normalizing weights with similarity

    sorted_recommendations = sorted(recommendations.items(), key=operator.itemgetter(1), reverse=True)
    #Sorting recommendations with weight
    return sorted_recommendations
In [17]:
recommend_movies('Marlon Brando',4)
Out[17]:
[('Goodfellas', 5.000000000000001),
 ('Raiders of the Lost Ark', 5.0),
 ('Raging Bull', 4.89),
 ('Star Wars Episode IV - A New Hope', 3.8157055214723923),
 ('One Flew Over The Cuckoos Nest', 2.02)]
In [18]:
recommend_movies('Robert DeNiro',4)
Out[18]:
[('Raiders of the Lost Ark', 5.0),
 ('Star Wars Episode IV - A New Hope', 4.92),
 ('Close Encounters of the Third Kind', 1.2744773851327365)]
In [19]:
recommend_movies('Steven Spielberg',4)
Out[19]:
[('The Shawshank Redemption', 4.928285762244913),
 ('The Green Mile', 4.87),
 ('The Shining', 4.71304734727882),
 ('Apocalypse Now', 1.63)]
In [20]:
recommend_movies('Tom Hanks',4)
Out[20]:
[('Raiders of the Lost Ark', 5.0),
 ('Jaws', 5.0),
 ('Close Encounters of the Third Kind', 5.0),
 ('The Shining', 4.93),
 ('Star Wars Episode IV - A New Hope', 4.92),
 ('The Shawshank Redemption', 4.89)]
In [21]:
recommend_movies('Martin Scorsese',4)
Out[21]:
[('Raiders of the Lost Ark', 5.0),
 ('Star Wars Episode IV - A New Hope', 4.92),
 ('The Godfather Part II', 4.3613513513513515),
 ('Apocalypse Now', 4.26)]
In [22]:
recommend_movies('Joe Pesci',4)
Out[22]:
[('Apocalypse Now', 5.000000000000001),
 ('The Godfather Part II', 4.7280538302277435),
 ('One Flew Over The Cuckoos Nest', 2.02),
 ('Close Encounters of the Third Kind', 1.14),
 ('Jaws', 1.12)]