#!/usr/bin/env python # coding: utf-8 # ## Importing Libraries and Loading Data # In[1]: import pandas as pd import numpy as np import matplotlib.pyplot as plt # In[2]: # Reading movies file movies = pd.read_csv('movies.csv', sep=',', encoding='latin-1', usecols=['title', 'genres']) # In[3]: movies.head() # In[4]: # Break up the big genre string into a string array movies['genres'] = movies['genres'].str.split('|') # Convert genres to string value movies['genres'] = movies['genres'].fillna("").astype('str') # # Recommendation based on Genre # In[5]: from sklearn.feature_extraction.text import TfidfVectorizer tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english') tfidf_matrix = tf.fit_transform(movies['genres']) tfidf_matrix.shape # In[6]: from sklearn.metrics.pairwise import cosine_similarity cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) cosine_sim[:4, :4] # In[7]: # Build a 1-dimensional array with movie titles titles = movies['title'] indices = pd.Series(movies.index, index=movies['title']) # Function that get movie recommendations based on the cosine similarity score of movie genres def genre_recommendations(title): idx = indices[title] sim_scores = list(enumerate(cosine_sim[idx])) sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) sim_scores = sim_scores[1:21] movie_indices = [i[0] for i in sim_scores] return titles.iloc[movie_indices] # In[8]: genre_recommendations('Dark Knight ').head(20) # # Recommendation based on Title # In[9]: from sklearn.feature_extraction.text import TfidfVectorizer tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english') tfidf_matrix = tf.fit_transform(movies['title']) tfidf_matrix.shape # In[10]: from sklearn.metrics.pairwise import cosine_similarity cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) cosine_sim[:4, :4] # In[11]: # Build a 1-dimensional array with movie titles titles = movies['title'] indices = pd.Series(movies.index, index=movies['title']) # Function that get movie recommendations based on the cosine similarity score of movie genres def title_recommendations(title): idx = indices[title] sim_scores = list(enumerate(cosine_sim[idx])) sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) sim_scores = sim_scores[1:21] movie_indices = [i[0] for i in sim_scores] return titles.iloc[movie_indices] # In[12]: title_recommendations('Dark Knight ').head(20)