#hide #skip ! [ -e /content ] && pip install -Uqq fastai # upgrade fastai on colab from fastai.tabular.all import * from fastai.collab import * # all_slow path = untar_data(URLs.ML_100k) ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None, usecols=(0,1,2), names=['user','movie','rating']) ratings.head() movies = pd.read_csv(path/'u.item', delimiter='|', encoding='latin-1', usecols=(0,1), names=('movie','title'), header=None) movies.head() ratings = ratings.merge(movies) ratings.head() dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64) dls.show_batch() learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5)) learn.fit_one_cycle(5, 5e-3, wd=0.1) g = ratings.groupby('title')['rating'].count() top_movies = g.sort_values(ascending=False).index.values[:1000] top_movies[:10] movie_bias = learn.model.bias(top_movies, is_item=True) movie_bias.shape mean_ratings = ratings.groupby('title')['rating'].mean() movie_ratings = [(b, i, mean_ratings.loc[i]) for i,b in zip(top_movies,movie_bias)] item0 = lambda o:o[0] sorted(movie_ratings, key=item0)[:15] sorted(movie_ratings, key=lambda o: o[0], reverse=True)[:15] movie_w = learn.model.weight(top_movies, is_item=True) movie_w.shape movie_pca = movie_w.pca(3) movie_pca.shape fac0,fac1,fac2 = movie_pca.t() movie_comp = [(f, i) for f,i in zip(fac0, top_movies)] sorted(movie_comp, key=itemgetter(0), reverse=True)[:10] sorted(movie_comp, key=itemgetter(0))[:10] movie_comp = [(f, i) for f,i in zip(fac1, top_movies)] sorted(movie_comp, key=itemgetter(0), reverse=True)[:10] sorted(movie_comp, key=itemgetter(0))[:10] idxs = np.random.choice(len(top_movies), 50, replace=False) idxs = list(range(50)) X = fac0[idxs] Y = fac2[idxs] plt.figure(figsize=(15,15)) plt.scatter(X, Y) for i, x, y in zip(top_movies[idxs], X, Y): plt.text(x,y,i, color=np.random.rand(3)*0.7, fontsize=11) plt.show()