import pandas as pd
import numpy as np
import time
import sqlite3
data_home = './'
triplet_dataset = pd.read_csv(filepath_or_buffer=data_home+'train_triplets.txt',
nrows=10000,sep='\t', header=None,
names=['user','song','play_count'])
triplet_dataset.head(n=10)
user | song | play_count | |
---|---|---|---|
0 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOAKIMP12A8C130995 | 1 |
1 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOAPDEY12A81C210A9 | 1 |
2 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOBBMDR12A8C13253B | 2 |
3 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOBFNSP12AF72A0E22 | 1 |
4 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOBFOVM12A58A7D494 | 1 |
5 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOBNZDC12A6D4FC103 | 1 |
6 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOBSUJE12A6D4F8CF5 | 2 |
7 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOBVFZR12A6D4F8AE3 | 1 |
8 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOBXALG12A8C13C108 | 1 |
9 | b80344d063b5ccb3212f76538f3d9e43d87dca9e | SOBXHDL12A81C204C0 | 1 |
output_dict = {}
with open(data_home+'train_triplets.txt') as f:
for line_number, line in enumerate(f):
user = line.split('\t')[0]
play_count = int(line.split('\t')[2])
if user in output_dict:
play_count +=output_dict[user]
output_dict.update({user:play_count})
output_dict.update({user:play_count})
output_list = [{'user':k,'play_count':v} for k,v in output_dict.items()]
play_count_df = pd.DataFrame(output_list)
play_count_df = play_count_df.sort_values(by = 'play_count', ascending = False)
play_count_df.to_csv(path_or_buf='user_playcount_df.csv', index = False)
output_dict = {}
with open(data_home+'train_triplets.txt') as f:
for line_number, line in enumerate(f):
song = line.split('\t')[1]
play_count = int(line.split('\t')[2])
if song in output_dict:
play_count +=output_dict[song]
output_dict.update({song:play_count})
output_dict.update({song:play_count})
output_list = [{'song':k,'play_count':v} for k,v in output_dict.items()]
song_count_df = pd.DataFrame(output_list)
song_count_df = song_count_df.sort_values(by = 'play_count', ascending = False)
song_count_df.to_csv(path_or_buf='song_playcount_df.csv', index = False)
play_count_df = pd.read_csv(filepath_or_buffer='user_playcount_df.csv')
play_count_df.head(n =10)
play_count | user | |
---|---|---|
0 | 13132 | 093cb74eb3c517c5179ae24caf0ebec51b24d2a2 |
1 | 9884 | 119b7c88d58d0c6eb051365c103da5caf817bea6 |
2 | 8210 | 3fa44653315697f42410a30cb766a4eb102080bb |
3 | 7015 | a2679496cd0af9779a92a13ff7c6af5c81ea8c7b |
4 | 6494 | d7d2d888ae04d16e994d6964214a1de81392ee04 |
5 | 6472 | 4ae01afa8f2430ea0704d502bc7b57fb52164882 |
6 | 6150 | b7c24f770be6b802805ac0e2106624a517643c17 |
7 | 5656 | 113255a012b2affeab62607563d03fbdf31b08e7 |
8 | 5620 | 6d625c6557df84b60d90426c0116138b617b9449 |
9 | 5602 | 99ac3d883681e21ea68071019dba828ce76fe94d |
song_count_df = pd.read_csv(filepath_or_buffer='song_playcount_df.csv')
song_count_df.head(10)
play_count | song | |
---|---|---|
0 | 726885 | SOBONKR12A58A7A7E0 |
1 | 648239 | SOAUWYT12A81C206F1 |
2 | 527893 | SOSXLTC12AF72A7F54 |
3 | 425463 | SOFRQTD12A81C233C0 |
4 | 389880 | SOEGIYH12A6D4FC0E3 |
5 | 356533 | SOAXGDH12A8C13F8A1 |
6 | 292642 | SONYKOW12AB01849C9 |
7 | 274627 | SOPUCYA12A8C13A694 |
8 | 268353 | SOUFTBI12AB0183F65 |
9 | 244730 | SOVDSJC12A58A7A271 |
total_play_count = sum(song_count_df.play_count)
(float(play_count_df.head(n=100000).play_count.sum())/total_play_count)*100
play_count_subset = play_count_df.head(n=100000)
40.8807280500655
(float(song_count_df.head(n=30000).play_count.sum())/total_play_count)*100
78.39315366645269
song_count_subset = song_count_df.head(n=30000)
user_subset = list(play_count_subset.user)
song_subset = list(song_count_subset.song)
triplet_dataset = pd.read_csv(filepath_or_buffer=data_home+'train_triplets.txt',sep='\t',
header=None, names=['user','song','play_count'])
triplet_dataset_sub = triplet_dataset[triplet_dataset.user.isin(user_subset) ]
del(triplet_dataset)
triplet_dataset_sub_song = triplet_dataset_sub[triplet_dataset_sub.song.isin(song_subset)]
del(triplet_dataset_sub)
triplet_dataset_sub_song.to_csv(path_or_buf=data_home+'triplet_dataset_sub_song.csv', index=False)
triplet_dataset_sub_song.shape
(10774785, 3)
triplet_dataset_sub_song.head(n=10)
user | song | play_count | |
---|---|---|---|
498 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOADQPP12A67020C82 | 12 |
499 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOAFTRR12AF72A8D4D | 1 |
500 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOANQFY12AB0183239 | 1 |
501 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOAYATB12A6701FD50 | 1 |
502 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOBOAFP12A8C131F36 | 7 |
503 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOBONKR12A58A7A7E0 | 26 |
504 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOBZZDU12A6310D8A3 | 7 |
505 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOCAHRT12A8C13A1A4 | 5 |
506 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SODASIJ12A6D4F5D89 | 1 |
507 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SODEAWL12AB0187032 | 8 |
conn = sqlite3.connect(data_home+'track_metadata.db')
cur = conn.cursor()
cur.execute("SELECT name FROM sqlite_master WHERE type='table'")
cur.fetchall()
[('songs',)]
track_metadata_df.head()
track_id | title | song_id | release | artist_id | artist_mbid | artist_name | duration | artist_familiarity | artist_hotttnesss | year | track_7digitalid | shs_perf | shs_work | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | TRMMMYQ128F932D901 | Silent Night | SOQMMHC12AB0180CB8 | Monster Ballads X-Mas | ARYZTJS1187B98C555 | 357ff05d-848a-44cf-b608-cb34b5701ae5 | Faster Pussy cat | 252.05506 | 0.649822 | 0.394032 | 2003 | 7032331 | -1 | 0 |
1 | TRMMMKD128F425225D | Tanssi vaan | SOVFVAK12A8C1350D9 | Karkuteillä | ARMVN3U1187FB3A1EB | 8d7ef530-a6fd-4f8f-b2e2-74aec765e0f9 | Karkkiautomaatti | 156.55138 | 0.439604 | 0.356992 | 1995 | 1514808 | -1 | 0 |
2 | TRMMMRX128F93187D9 | No One Could Ever | SOGTUKN12AB017F4F1 | Butter | ARGEKB01187FB50750 | 3d403d44-36ce-465c-ad43-ae877e65adc4 | Hudson Mohawke | 138.97098 | 0.643681 | 0.437504 | 2006 | 6945353 | -1 | 0 |
3 | TRMMMCH128F425532C | Si Vos Querés | SOBNYVR12A8C13558C | De Culo | ARNWYLR1187B9B2F9C | 12be7648-7094-495f-90e6-df4189d68615 | Yerba Brava | 145.05751 | 0.448501 | 0.372349 | 2003 | 2168257 | -1 | 0 |
4 | TRMMMWA128F426B589 | Tangle Of Aspens | SOHSBXH12A8C13B0DF | Rene Ablaze Presents Winter Sessions | AREQDTE1269FB37231 | Der Mystic | 514.29832 | 0.000000 | 0.000000 | 0 | 2264873 | -1 | 0 |
track_metadata_df = pd.read_sql(con=conn, sql='select * from songs')
track_metadata_df_sub = track_metadata_df[track_metadata_df.song_id.isin(song_subset)]
track_metadata_df_sub.to_csv(path_or_buf=data_home+'track_metadata_df_sub.csv', index=False)
track_metadata_df_sub.shape
(30447, 14)
triplet_dataset_sub_song = pd.read_csv(filepath_or_buffer=data_home+'train_triplets_sub_song.csv')
track_metadata_df_sub = pd.read_csv(filepath_or_buffer=data_home+'track_metadata_df_sub.csv')
del(track_metadata_df_sub['track_id'])
del(track_metadata_df_sub['artist_mbid'])
track_metadata_df_sub = track_metadata_df_sub.drop_duplicates(['song_id'])
triplet_dataset_sub_song_merged = pd.merge(triplet_dataset_sub_song, track_metadata_df_sub, how='left', left_on='song', right_on='song_id')
triplet_dataset_sub_song_merged.rename(columns={'play_count':'listen_count'},inplace=True)
del(triplet_dataset_sub_song_merged['song_id'])
del(triplet_dataset_sub_song_merged['artist_id'])
del(triplet_dataset_sub_song_merged['duration'])
del(triplet_dataset_sub_song_merged['artist_familiarity'])
del(triplet_dataset_sub_song_merged['artist_hotttnesss'])
del(triplet_dataset_sub_song_merged['track_7digitalid'])
del(triplet_dataset_sub_song_merged['shs_perf'])
del(triplet_dataset_sub_song_merged['shs_work'])
triplet_dataset_sub_song_merged.head(n=10)
user | song | listen_count | title | release | artist_name | year | |
---|---|---|---|---|---|---|---|
0 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOADQPP12A67020C82 | 12 | You And Me Jesus | Tribute To Jake Hess | Jake Hess | 2004 |
1 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOAFTRR12AF72A8D4D | 1 | Harder Better Faster Stronger | Discovery | Daft Punk | 2007 |
2 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOANQFY12AB0183239 | 1 | Uprising | Uprising | Muse | 0 |
3 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOAYATB12A6701FD50 | 1 | Breakfast At Tiffany's | Home | Deep Blue Something | 1993 |
4 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOBOAFP12A8C131F36 | 7 | Lucky (Album Version) | We Sing. We Dance. We Steal Things. | Jason Mraz & Colbie Caillat | 0 |
5 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOBONKR12A58A7A7E0 | 26 | You're The One | If There Was A Way | Dwight Yoakam | 1990 |
6 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOBZZDU12A6310D8A3 | 7 | Don't Dream It's Over | Recurring Dream_ Best Of Crowded House (Domest... | Crowded House | 1986 |
7 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOCAHRT12A8C13A1A4 | 5 | S.O.S. | SOS | Jonas Brothers | 2007 |
8 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SODASIJ12A6D4F5D89 | 1 | The Invisible Man | The Invisible Man | Michael Cretu | 1985 |
9 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SODEAWL12AB0187032 | 8 | American Idiot [feat. Green Day & The Cast Of ... | The Original Broadway Cast Recording 'American... | Green Day | 0 |
popular_songs = triplet_dataset_sub_song_merged[['title','listen_count']].groupby('title').sum().reset_index()
popular_songs_top_20 = popular_songs.sort_values('listen_count', ascending=False).head(n=20)
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
objects = (list(popular_songs_top_20['title']))
y_pos = np.arange(len(objects))
performance = list(popular_songs_top_20['listen_count'])
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects, rotation='vertical')
plt.ylabel('Item count')
plt.title('Most popular songs')
plt.show()
popular_release = triplet_dataset_sub_song_merged[['release','listen_count']].groupby('release').sum().reset_index()
popular_release_top_20 = popular_release.sort_values('listen_count', ascending=False).head(n=20)
objects = (list(popular_release_top_20['release']))
y_pos = np.arange(len(objects))
performance = list(popular_release_top_20['listen_count'])
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects, rotation='vertical')
plt.ylabel('Item count')
plt.title('Most popular Release')
plt.show()
popular_artist = triplet_dataset_sub_song_merged[['artist_name','listen_count']].groupby('artist_name').sum().reset_index()
popular_artist_top_20 = popular_artist.sort_values('listen_count', ascending=False).head(n=20)
objects = (list(popular_artist_top_20['artist_name']))
y_pos = np.arange(len(objects))
performance = list(popular_artist_top_20['listen_count'])
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects, rotation='vertical')
plt.ylabel('Item count')
plt.title('Most popular Artists')
plt.show()
user_song_count_distribution = triplet_dataset_sub_song_merged[['user','title']].groupby('user').count().reset_index().sort_values(
by='title',ascending = False)
user_song_count_distribution.title.describe()
x = user_song_count_distribution.title
n, bins, patches = plt.hist(x, 50, facecolor='green', alpha=0.75)
plt.xlabel('Play Counts')
plt.ylabel('Num of Users')
plt.title(r'$\mathrm{Histogram\ of\ User\ Play\ Count\ Distribution}\ $')
plt.grid(True)
plt.show()
import Recommenders as Recommenders
from sklearn.model_selection import train_test_split
triplet_dataset_sub_song_merged_set = triplet_dataset_sub_song_merged
train_data, test_data = train_test_split(triplet_dataset_sub_song_merged_set, test_size = 0.40, random_state=0)
train_data.head()
user | song | listen_count | title | release | artist_name | year | |
---|---|---|---|---|---|---|---|
3031653 | e120d4418d07d81a8efdb5cb9ab4328c09b28a95 | SONYKOW12AB01849C9 | 8 | Secrets | Waking Up | OneRepublic | 2009 |
7890358 | e9b13911d892a8ad58de6714b465aa5550fa3f61 | SOYFFTM12AF72AA2AE | 1 | Gone | Some Mad Hope | Matt Nathanson | 2007 |
6291377 | 359f2acd6a2da352521531d17de290573b0f04ca | SOOAFDW12A8C13325B | 1 | Bryn (Album) | Vampire Weekend | Vampire Weekend | 2007 |
3124329 | 43259fdd657ced6341184945608c1bd5939bdb95 | SOHFVJR12AF72A9805 | 1 | Holdin' On Together | Alphabetical | Phoenix | 0 |
2359286 | b7ef66e5c94b0f03943f7352be7271c3b6a54908 | SOWORSV12A6D4F9345 | 4 | Mr Blue | Hurricane Glass | Catherine Feeny | 2007 |
def create_popularity_recommendation(train_data, user_id, item_id):
#Get a count of user_ids for each unique song as recommendation score
train_data_grouped = train_data.groupby([item_id]).agg({user_id: 'count'}).reset_index()
train_data_grouped.rename(columns = {user_id: 'score'},inplace=True)
#Sort the songs based upon recommendation score
train_data_sort = train_data_grouped.sort_values(['score', item_id], ascending = [0,1])
#Generate a recommendation rank based upon score
train_data_sort['Rank'] = train_data_sort['score'].rank(ascending=0, method='first')
#Get the top 10 recommendations
popularity_recommendations = train_data_sort.head(20)
return popularity_recommendations
recommendations = create_popularity_recommendation(triplet_dataset_sub_song_merged,'user','title')
recommendations
title | score | Rank | |
---|---|---|---|
19580 | Sehr kosmisch | 18629 | 1.0 |
5780 | Dog Days Are Over (Radio Edit) | 17636 | 2.0 |
27314 | You're The One | 16082 | 3.0 |
19542 | Secrets | 15139 | 4.0 |
18636 | Revelry | 14942 | 5.0 |
25070 | Undo | 14682 | 6.0 |
7531 | Fireflies | 13087 | 7.0 |
9641 | Hey_ Soul Sister | 12991 | 8.0 |
25216 | Use Somebody | 12790 | 9.0 |
9922 | Horn Concerto No. 4 in E flat K495: II. Romanc... | 12343 | 10.0 |
24291 | Tive Sim | 11825 | 11.0 |
3629 | Canada | 11591 | 12.0 |
23468 | The Scientist | 11534 | 13.0 |
4194 | Clocks | 11358 | 14.0 |
12136 | Just Dance | 11056 | 15.0 |
26974 | Yellow | 10923 | 16.0 |
16438 | OMG | 10824 | 17.0 |
9845 | Home | 10511 | 18.0 |
3296 | Bulletproof | 10382 | 19.0 |
4760 | Creep (Explicit) | 10246 | 20.0 |
song_count_subset = song_count_df.head(n=5000)
user_subset = list(play_count_subset.user)
song_subset = list(song_count_subset.song)
triplet_dataset_sub_song_merged_sub = triplet_dataset_sub_song_merged[triplet_dataset_sub_song_merged.song.isin(song_subset)]
triplet_dataset_sub_song_merged_sub.head()
user | song | listen_count | title | release | artist_name | year | |
---|---|---|---|---|---|---|---|
6689 | 0d0f80a34807aab31a3521424d456d30bf2c93d9 | SOAEHEX12A8C13EFA4 | 21 | Dead And Gone [feat. Justin Timberlake] (Expli... | Paper Trail | T.I. | 2008 |
6690 | 0d0f80a34807aab31a3521424d456d30bf2c93d9 | SOCKJVP12A6D4F920C | 3 | Epic | The Real Thing | Faith No More | 1989 |
6692 | 0d0f80a34807aab31a3521424d456d30bf2c93d9 | SOCZKYZ12A8C13C748 | 1 | Walking In The Air | Oceanborn | Nightwish | 1998 |
6697 | 0d0f80a34807aab31a3521424d456d30bf2c93d9 | SOZAFNE12AAF3B50E6 | 12 | You Raise Me Up | Holidays & Hits | Celtic Woman | 2005 |
10680 | 4e11f45d732f4861772b2906f81a7d384552ad12 | SOABJBU12A8C13F63F | 2 | Back Against The Wall | Cage The Elephant | Cage The Elephant | 2008 |
train_data, test_data = train_test_split(triplet_dataset_sub_song_merged_sub, test_size = 0.30, random_state=0)
is_model = Recommenders.item_similarity_recommender_py()
is_model.create(train_data, 'user', 'title')
user_id = list(train_data.user)[7]
user_items = is_model.get_user_items(user_id)
#Recommend songs for the user using personalized model
is_model.recommend(user_id)
---------------------------------------------------------------------- Recommendation process going on: ---------------------------------------------------------------------- No. of unique songs for the user: 186 no. of unique songs in the training set: 4833 Non zero values in cooccurence_matrix :315344
user_id | song | score | rank | |
---|---|---|---|---|
0 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Meteor | 0.099810 | 1 |
1 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Coda | 0.093718 | 2 |
2 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Tuesday Moon | 0.084476 | 3 |
3 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Tron | 0.083682 | 4 |
4 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Acadian Coast | 0.081797 | 5 |
5 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Love Letter To Japan | 0.081042 | 6 |
6 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Heavy Water | 0.080742 | 7 |
7 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Balloons (Single version) | 0.079459 | 8 |
8 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Blackbirds | 0.078346 | 9 |
9 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | Diamond Dave | 0.076680 | 10 |
triplet_dataset_sub_song_merged_sum_df = triplet_dataset_sub_song_merged[['user','listen_count']].groupby('user').sum().reset_index()
triplet_dataset_sub_song_merged_sum_df.rename(columns={'listen_count':'total_listen_count'},inplace=True)
triplet_dataset_sub_song_merged = pd.merge(triplet_dataset_sub_song_merged,triplet_dataset_sub_song_merged_sum_df)
triplet_dataset_sub_song_merged['fractional_play_count'] = triplet_dataset_sub_song_merged['listen_count']/triplet_dataset_sub_song_merged['total_listen_count']
triplet_dataset_sub_song_merged[triplet_dataset_sub_song_merged.user =='d6589314c0a9bcbca4fee0c93b14bc402363afea'][['user','song','listen_count','fractional_play_count']].head()
user | song | listen_count | fractional_play_count | |
---|---|---|---|---|
0 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOADQPP12A67020C82 | 12 | 0.036474 |
1 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOAFTRR12AF72A8D4D | 1 | 0.003040 |
2 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOANQFY12AB0183239 | 1 | 0.003040 |
3 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOAYATB12A6701FD50 | 1 | 0.003040 |
4 | d6589314c0a9bcbca4fee0c93b14bc402363afea | SOBOAFP12A8C131F36 | 7 | 0.021277 |
from scipy.sparse import coo_matrix
small_set = triplet_dataset_sub_song_merged
user_codes = small_set.user.drop_duplicates().reset_index()
song_codes = small_set.song.drop_duplicates().reset_index()
user_codes.rename(columns={'index':'user_index'}, inplace=True)
song_codes.rename(columns={'index':'song_index'}, inplace=True)
song_codes['so_index_value'] = list(song_codes.index)
user_codes['us_index_value'] = list(user_codes.index)
small_set = pd.merge(small_set,song_codes,how='left')
small_set = pd.merge(small_set,user_codes,how='left')
mat_candidate = small_set[['us_index_value','so_index_value','fractional_play_count']]
data_array = mat_candidate.fractional_play_count.values
row_array = mat_candidate.us_index_value.values
col_array = mat_candidate.so_index_value.values
data_sparse = coo_matrix((data_array, (row_array, col_array)),dtype=float)
data_sparse
<99996x30000 sparse matrix of type '<class 'numpy.float64'>' with 10774785 stored elements in COOrdinate format>
user_codes[user_codes.user =='2a2f776cbac6df64d6cb505e7e834e01684673b6']
user_index | user | us_index_value | |
---|---|---|---|
27513 | 2981069 | 2a2f776cbac6df64d6cb505e7e834e01684673b6 | 27513 |
import math as mt
from scipy.sparse.linalg import * #used for matrix multiplication
from scipy.sparse.linalg import svds
from scipy.sparse import csc_matrix
def compute_svd(urm, K):
U, s, Vt = svds(urm, K)
dim = (len(s), len(s))
S = np.zeros(dim, dtype=np.float32)
for i in range(0, len(s)):
S[i,i] = mt.sqrt(s[i])
U = csc_matrix(U, dtype=np.float32)
S = csc_matrix(S, dtype=np.float32)
Vt = csc_matrix(Vt, dtype=np.float32)
return U, S, Vt
def compute_estimated_matrix(urm, U, S, Vt, uTest, K, test):
rightTerm = S*Vt
max_recommendation = 250
estimatedRatings = np.zeros(shape=(MAX_UID, MAX_PID), dtype=np.float16)
recomendRatings = np.zeros(shape=(MAX_UID,max_recommendation ), dtype=np.float16)
for userTest in uTest:
prod = U[userTest, :]*rightTerm
estimatedRatings[userTest, :] = prod.todense()
recomendRatings[userTest, :] = (-estimatedRatings[userTest, :]).argsort()[:max_recommendation]
return recomendRatings
K=50
urm = data_sparse
MAX_PID = urm.shape[1]
MAX_UID = urm.shape[0]
U, S, Vt = compute_svd(urm, K)
uTest = [4,5,6,7,8,873,23]
uTest_recommended_items = compute_estimated_matrix(urm, U, S, Vt, uTest, K, True)
Predictied ratings:
for user in uTest:
print("Recommendation for user with user id {}". format(user))
rank_value = 1
for i in uTest_recommended_items[user,0:10]:
song_details = small_set[small_set.so_index_value == i].drop_duplicates('so_index_value')[['title','artist_name']]
print("The number {} recommended song is {} BY {}".format(rank_value, list(song_details['title'])[0],list(song_details['artist_name'])[0]))
rank_value+=1
Recommendation for user with user id 4 The number 1 recommended song is Fireflies BY Charttraxx Karaoke The number 2 recommended song is Hey_ Soul Sister BY Train The number 3 recommended song is OMG BY Usher featuring will.i.am The number 4 recommended song is Lucky (Album Version) BY Jason Mraz & Colbie Caillat The number 5 recommended song is Vanilla Twilight BY Owl City The number 6 recommended song is Billionaire [feat. Bruno Mars] (Explicit Album Version) BY Travie McCoy The number 7 recommended song is Crumpshit BY Philippe Rochard The number 8 recommended song is Love Story BY Taylor Swift The number 9 recommended song is TULENLIEKKI BY M.A. Numminen The number 10 recommended song is Use Somebody BY Kings Of Leon Recommendation for user with user id 5 The number 1 recommended song is Sehr kosmisch BY Harmonia The number 2 recommended song is Dog Days Are Over (Radio Edit) BY Florence + The Machine The number 3 recommended song is Ain't Misbehavin BY Sam Cooke The number 4 recommended song is Revelry BY Kings Of Leon The number 5 recommended song is Undo BY Björk The number 6 recommended song is Cosmic Love BY Florence + The Machine The number 7 recommended song is Home BY Edward Sharpe & The Magnetic Zeros The number 8 recommended song is You've Got The Love BY Florence + The Machine The number 9 recommended song is Tighten Up BY The Black Keys The number 10 recommended song is Bring Me To Life BY Evanescence Recommendation for user with user id 6 The number 1 recommended song is Crumpshit BY Philippe Rochard The number 2 recommended song is Marry Me BY Train The number 3 recommended song is Hey_ Soul Sister BY Train The number 4 recommended song is Lucky (Album Version) BY Jason Mraz & Colbie Caillat The number 5 recommended song is One On One BY the bird and the bee The number 6 recommended song is Canada BY Five Iron Frenzy The number 7 recommended song is I Never Told You BY Colbie Caillat The number 8 recommended song is Fireflies BY Charttraxx Karaoke The number 9 recommended song is TULENLIEKKI BY M.A. Numminen The number 10 recommended song is Bring Me To Life BY Evanescence Recommendation for user with user id 7 The number 1 recommended song is Behind The Sea [Live In Chicago] BY Panic At The Disco The number 2 recommended song is The City Is At War (Album Version) BY Cobra Starship The number 3 recommended song is Una Confusion BY LU The number 4 recommended song is Dead Souls BY Nine Inch Nails The number 5 recommended song is Home BY Edward Sharpe & The Magnetic Zeros The number 6 recommended song is Climbing Up The Walls BY Radiohead The number 7 recommended song is Tighten Up BY The Black Keys The number 8 recommended song is West One (Shine On Me) BY The Ruts The number 9 recommended song is Tive Sim BY Cartola The number 10 recommended song is Cosmic Love BY Florence + The Machine Recommendation for user with user id 8 The number 1 recommended song is Undo BY Björk The number 2 recommended song is Canada BY Five Iron Frenzy The number 3 recommended song is Better To Reign In Hell BY Cradle Of Filth The number 4 recommended song is Unite (2009 Digital Remaster) BY Beastie Boys The number 5 recommended song is Behind The Sea [Live In Chicago] BY Panic At The Disco The number 6 recommended song is Rockin' Around The Christmas Tree BY Brenda Lee The number 7 recommended song is Tautou BY Brand New The number 8 recommended song is Revelry BY Kings Of Leon The number 9 recommended song is 16 Candles BY The Crests The number 10 recommended song is Catch You Baby (Steve Pitron & Max Sanna Radio Edit) BY Lonnie Gordon Recommendation for user with user id 873 The number 1 recommended song is The City Is At War (Album Version) BY Cobra Starship The number 2 recommended song is Una Confusion BY LU The number 3 recommended song is Dead Souls BY Nine Inch Nails The number 4 recommended song is Home BY Edward Sharpe & The Magnetic Zeros The number 5 recommended song is Tighten Up BY The Black Keys The number 6 recommended song is Angel of Music BY Andrew Lloyd Webber The number 7 recommended song is Tive Sim BY Cartola The number 8 recommended song is What You Know BY Two Door Cinema Club The number 9 recommended song is Float On BY Modest Mouse The number 10 recommended song is The Funeral (Album Version) BY Band Of Horses Recommendation for user with user id 23 The number 1 recommended song is Garden Of Eden BY Guns N' Roses The number 2 recommended song is Don't Speak BY John Dahlbäck The number 3 recommended song is Master Of Puppets BY Metallica The number 4 recommended song is TULENLIEKKI BY M.A. Numminen The number 5 recommended song is Bring Me To Life BY Evanescence The number 6 recommended song is Kryptonite BY 3 Doors Down The number 7 recommended song is Make Her Say BY Kid Cudi / Kanye West / Common The number 8 recommended song is Night Village BY Deep Forest The number 9 recommended song is Better To Reign In Hell BY Cradle Of Filth The number 10 recommended song is Xanadu BY Olivia Newton-John;Electric Light Orchestra
uTest = [27513]
#Get estimated rating for test user
print("Predictied ratings:")
uTest_recommended_items = compute_estimated_matrix(urm, U, S, Vt, uTest, K, True)
Predictied ratings:
for user in uTest:
print("Recommendation for user with user id {}". format(user))
rank_value = 1
for i in uTest_recommended_items[user,0:10]:
song_details = small_set[small_set.so_index_value == i].drop_duplicates('so_index_value')[['title','artist_name']]
print("The number {} recommended song is {} BY {}".format(rank_value, list(song_details['title'])[0],list(song_details['artist_name'])[0]))
rank_value+=1
Recommendation for user with user id 27513 The number 1 recommended song is Behind The Sea [Live In Chicago] BY Panic At The Disco The number 2 recommended song is Una Confusion BY LU The number 3 recommended song is Home BY Edward Sharpe & The Magnetic Zeros The number 4 recommended song is Dead Souls BY Nine Inch Nails The number 5 recommended song is The City Is At War (Album Version) BY Cobra Starship The number 6 recommended song is Tighten Up BY The Black Keys The number 7 recommended song is Climbing Up The Walls BY Radiohead The number 8 recommended song is Yellow BY Coldplay The number 9 recommended song is Creep (Explicit) BY Radiohead The number 10 recommended song is West One (Shine On Me) BY The Ruts