In [1]:
from numpy import *
from numpy import linalg as la
import pandas as pd
import pdb
import numpy as np

In [2]:
def loadData():
M =    [[0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5],
[0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 3],
[0, 0, 0, 0, 4, 0, 0, 1, 0, 4, 0],
[3, 3, 4, 0, 0, 0, 0, 2, 2, 0, 0],
[5, 4, 5, 0, 0, 0, 0, 5, 5, 0, 0],
[0, 0, 0, 0, 5, 0, 1, 0, 0, 5, 0],
[4, 3, 4, 0, 0, 0, 0, 5, 5, 0, 1],
[0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 4],
[0, 0, 0, 2, 0, 2, 5, 0, 0, 1, 2],
[0, 0, 0, 0, 5, 0, 0, 0, 0, 4, 0],
[1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0],
[2, 1, 0, 2, 0, 5, 3, 0, 1, 0, 1]]
return(np.mat(M))

In [3]:
def euclidSim(inA,inB):
return 1.0/(1.0 + la.norm(inA - inB))

def pearsonSim(inA,inB):
if len(inA) < 3 : return 1.0
return 0.5+0.5*corrcoef(inA, inB, rowvar = 0)[0][1]

def cosineSim(inA,inB):
num = float(inA.T*inB)
denom = la.norm(inA)*la.norm(inB)
return 0.5+0.5*(num/denom)

In [4]:
A = np.array([2,3,0,1,0,4,-5])
B = np.array([0,1,2,-4,2,0,3])
A = mat(A)
B = mat(B)

In [5]:
print(euclidSim(A.T,B.T))

0.08333333333333333

In [6]:
print(cosineSim(A.T,B.T))

0.3150010839748479

In [7]:
print(pearsonSim(A.T,B.T))

0.2665380020120951

In [8]:
def standEst(dataMat, user, simMeas, item):
n = shape(dataMat)[1]
simTotal = 0.0; ratSimTotal = 0.0
for j in range(n):
userRating = dataMat[user,j]
if userRating == 0:
continue
overLap = nonzero(logical_and(dataMat[:,item]>0, dataMat[:,j]>0))[0]
if len(overLap) == 0:
similarity = 0
else:
similarity = simMeas(dataMat[overLap,item], dataMat[overLap,j])
#print('the %d and %d similarity is: %f' % (item, j, similarity))
simTotal += similarity
ratSimTotal += similarity * userRating
if simTotal == 0: return 0
else: return ratSimTotal/simTotal

In [9]:
def svdEst(dataMat, user, simMeas, item):
n = shape(dataMat)[1]
simTotal = 0.0; ratSimTotal = 0.0
data=mat(dataMat)
U,Sigma,VT = la.svd(data)
Sig4 = mat(eye(4)*Sigma[:4]) #arrange Sig4 into a diagonal matrix
xformedItems = data.T * U[:,:4] * Sig4.I  #create transformed items
for j in range(n):
userRating = data[user,j]
if userRating == 0 or j==item: continue
similarity = simMeas(xformedItems[item,:].T, xformedItems[j,:].T)
#print('the %d and %d similarity is: %f' % (item, j, similarity))
simTotal += similarity
ratSimTotal += similarity * userRating
if simTotal == 0: return 0
else: return ratSimTotal/simTotal

In [10]:
def recommend(dataMat, user, N=3, simMeas=cosineSim, estMethod=standEst):
unratedItems = nonzero(dataMat[user,:].A==0)[1] #find unrated items
if len(unratedItems) == 0: return 'you rated everything'
itemScores = []
for item in unratedItems:
estimatedScore = estMethod(dataMat, user, simMeas, item)
itemScores.append((item, estimatedScore))
return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N]

In [11]:
data = loadData()
print(data)

[[0 0 0 0 0 4 0 0 0 0 5]
[0 0 0 3 0 4 0 0 0 0 3]
[0 0 0 0 4 0 0 1 0 4 0]
[3 3 4 0 0 0 0 2 2 0 0]
[5 4 5 0 0 0 0 5 5 0 0]
[0 0 0 0 5 0 1 0 0 5 0]
[4 3 4 0 0 0 0 5 5 0 1]
[0 0 0 4 0 4 0 0 0 0 4]
[0 0 0 2 0 2 5 0 0 1 2]
[0 0 0 0 5 0 0 0 0 4 0]
[1 0 0 0 0 0 0 1 2 0 0]
[2 1 0 2 0 5 3 0 1 0 1]]

In [12]:
D=mat(data)
U,Sigma,VT = la.svd(D)
Sig4 = mat(eye(4)*Sigma[:4]) #arrange Sig4 into a diagonal matrix
xItems = data.T * U[:,:4] * Sig4.I  #create transformed items
print(xItems)

[[-0.45889187  0.03170418 -0.01809311  0.11036907]
[-0.3622062   0.04692163 -0.01141864  0.04254964]
[-0.45537578  0.10423397 -0.00800224 -0.05403528]
[-0.051868   -0.39701598 -0.05950012  0.06753374]
[-0.01726089 -0.08392364  0.71965471 -0.13098077]
[-0.09964753 -0.67126432 -0.11207725 -0.04038616]
[-0.04619366 -0.25745027  0.05860349  0.87744841]
[-0.45397947  0.09523267  0.03757744 -0.09430203]
[-0.46909953  0.0672883  -0.0131357   0.00911101]
[-0.01955354 -0.10798751  0.67233514  0.01344801]
[-0.09629148 -0.52832652 -0.09176174 -0.42505074]]

In [13]:
user = 4
recommendations = recommend(data, user, N=4, simMeas=cosineSim, estMethod=standEst)
print(recommendations)

[(4, 5.0), (9, 5.0), (10, 4.804196825932594), (3, 4.666666666666667)]

In [14]:
print("Recommended Items for User", user, ":\n")
for i, p in recommendations:
print("Item ", i, "with predicted rating: ", p, "\n")

Recommended Items for User 4 :

Item  4 with predicted rating:  5.0

Item  9 with predicted rating:  5.0

Item  10 with predicted rating:  4.804196825932594

Item  3 with predicted rating:  4.666666666666667


In [15]:
user = 4
recommendations = recommend(data, user, N=4, simMeas=cosineSim, estMethod=svdEst)
print(recommendations)

[(10, 4.808129974963378), (4, 4.80785278924504), (9, 4.803516888538971), (5, 4.79538416477759)]

In [16]:
print("Recommended Items for User", user, ":\n")
for i, p in recommendations:
print("Item ", i, "with predicted rating: ", p, "\n")

Recommended Items for User 4 :

Item  10 with predicted rating:  4.808129974963378

Item  4 with predicted rating:  4.80785278924504

Item  9 with predicted rating:  4.803516888538971

Item  5 with predicted rating:  4.79538416477759


In [ ]: