import graphlab
graphlab.canvas.set_target("ipynb")
# set canvas to show sframes and sgraphs in ipython notebook
# import matplotlib.pyplot as plt
# %matplotlib inline
# download data from: http://files.grouplens.org/datasets/movielens/ml-1m.zip
data = graphlab.SFrame.read_csv('/Users/datalab/bigdata/cjc/ml-1m/ratings.dat', delimiter='\n',
header=False)['X1'].apply(lambda x: x.split('::')).unpack()
for col in data.column_names():
data[col] = data[col].astype(int)
data.rename({'X.0': 'user_id', 'X.1': 'movie_id', 'X.2': 'rating', 'X.3': 'timestamp'})
data.save('ratings')
users = graphlab.SFrame.read_csv('/Users/datalab/bigdata/cjc/ml-1m/users.dat', delimiter='\n',
header=False)['X1'].apply(lambda x: x.split('::')).unpack()
users.rename({'X.0': 'user_id', 'X.1': 'gender', 'X.2': 'age', 'X.3': 'occupation', 'X.4': 'zip-code'})
users['user_id'] = users['user_id'].astype(int)
users.save('users')
items = graphlab.SFrame.read_csv('/Users/datalab/bigdata/cjc/ml-1m/movies.dat', delimiter='\n',
header=False)['X1'].apply(lambda x: x.split('::')).unpack()
items.rename({'X.0': 'movie_id', 'X.1': 'title', 'X.2': 'genre'})
items['movie_id'] = items['movie_id'].astype(int)
items.save('items')
[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1525501840.log
This non-commercial license of GraphLab Create for academic use is assigned to wangchengjun@nju.edu.cn and will expire on March 14, 2019.
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat
Parsing completed. Parsed 100 lines in 0.43242 secs.
------------------------------------------------------ Inferred types from first 100 line(s) of file as column_type_hints=[str] If parsing fails due to incorrect types, you can correct the inferred type list above and pass it to read_csv in the column_type_hints argument ------------------------------------------------------
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat
Parsing completed. Parsed 1000209 lines in 0.560575 secs.
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat
------------------------------------------------------
Parsing completed. Parsed 100 lines in 0.037251 secs.
Inferred types from first 100 line(s) of file as column_type_hints=[str] If parsing fails due to incorrect types, you can correct the inferred type list above and pass it to read_csv in the column_type_hints argument ------------------------------------------------------
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat
Parsing completed. Parsed 6040 lines in 0.015785 secs.
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/movies.dat
Parsing completed. Parsed 100 lines in 0.033283 secs.
------------------------------------------------------ Inferred types from first 100 line(s) of file as column_type_hints=[str] If parsing fails due to incorrect types, you can correct the inferred type list above and pass it to read_csv in the column_type_hints argument ------------------------------------------------------
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/movies.dat
Parsing completed. Parsed 3883 lines in 0.016475 secs.
data
user_id | movie_id | rating | timestamp |
---|---|---|---|
1 | 1193 | 5 | 978300760 |
1 | 661 | 3 | 978302109 |
1 | 914 | 3 | 978301968 |
1 | 3408 | 4 | 978300275 |
1 | 2355 | 5 | 978824291 |
1 | 1197 | 3 | 978302268 |
1 | 1287 | 5 | 978302039 |
1 | 2804 | 5 | 978300719 |
1 | 594 | 4 | 978302268 |
1 | 919 | 4 | 978301368 |
items
movie_id | title | genre |
---|---|---|
1 | Toy Story (1995) | Animation|Children's|Come dy ... |
2 | Jumanji (1995) | Adventure|Children's|Fant asy ... |
3 | Grumpier Old Men (1995) | Comedy|Romance |
4 | Waiting to Exhale (1995) | Comedy|Drama |
5 | Father of the Bride Part II (1995) ... |
Comedy |
6 | Heat (1995) | Action|Crime|Thriller |
7 | Sabrina (1995) | Comedy|Romance |
8 | Tom and Huck (1995) | Adventure|Children's |
9 | Sudden Death (1995) | Action |
10 | GoldenEye (1995) | Action|Adventure|Thriller |
users
user_id | gender | age | occupation | zip-code |
---|---|---|---|---|
1 | F | 1 | 10 | 48067 |
2 | M | 56 | 16 | 70072 |
3 | M | 25 | 15 | 55117 |
4 | M | 45 | 7 | 02460 |
5 | M | 25 | 20 | 55455 |
6 | F | 50 | 9 | 55117 |
7 | M | 35 | 1 | 06810 |
8 | M | 25 | 12 | 11413 |
9 | M | 25 | 17 | 61614 |
10 | F | 35 | 1 | 95370 |
data = data.join(items, on='movie_id')
data
user_id | movie_id | rating | timestamp | title | genre |
---|---|---|---|---|---|
1 | 1193 | 5 | 978300760 | One Flew Over the Cuckoo's Nest (1975) ... |
Drama |
1 | 661 | 3 | 978302109 | James and the Giant Peach (1996) ... |
Animation|Children's|Musi cal ... |
1 | 914 | 3 | 978301968 | My Fair Lady (1964) | Musical|Romance |
1 | 3408 | 4 | 978300275 | Erin Brockovich (2000) | Drama |
1 | 2355 | 5 | 978824291 | Bug's Life, A (1998) | Animation|Children's|Come dy ... |
1 | 1197 | 3 | 978302268 | Princess Bride, The (1987) ... |
Action|Adventure|Comedy|R omance ... |
1 | 1287 | 5 | 978302039 | Ben-Hur (1959) | Action|Adventure|Drama |
1 | 2804 | 5 | 978300719 | Christmas Story, A (1983) | Comedy|Drama |
1 | 594 | 4 | 978302268 | Snow White and the Seven Dwarfs (1937) ... |
Animation|Children's|Musi cal ... |
1 | 919 | 4 | 978301368 | Wizard of Oz, The (1939) | Adventure|Children's|Dram a|Musical ... |
train_set, test_set = data.random_split(0.95, seed=1)
m = graphlab.recommender.create(train_set, 'user_id', 'movie_id', 'rating')
Recsys training: model = ranking_factorization_recommender
Preparing data set.
Data has 949852 observations with 6040 users and 3701 items.
Data prepared in: 1.29085s
Training ranking_factorization_recommender for recommendations.
+--------------------------------+--------------------------------------------------+----------+
| Parameter | Description | Value |
+--------------------------------+--------------------------------------------------+----------+
| num_factors | Factor Dimension | 32 |
| regularization | L2 Regularization on Factors | 1e-09 |
| solver | Solver used for training | adagrad |
| linear_regularization | L2 Regularization on Linear Coefficients | 1e-09 |
| ranking_regularization | Rank-based Regularization Weight | 0.25 |
| max_iterations | Maximum Number of Iterations | 25 |
+--------------------------------+--------------------------------------------------+----------+
Optimizing model using SGD; tuning step size.
Using 118731 / 949852 points for tuning the step size.
+---------+-------------------+------------------------------------------+
| Attempt | Initial Step Size | Estimated Objective Value |
+---------+-------------------+------------------------------------------+
| 0 | 10 | Not Viable |
| 1 | 2.5 | Not Viable |
| 2 | 0.625 | Not Viable |
| 3 | 0.15625 | Not Viable |
| 4 | 0.0390625 | 0.682141 |
| 5 | 0.0195312 | 1.16279 |
| 6 | 0.00976562 | 1.50932 |
+---------+-------------------+------------------------------------------+
| Final | 0.0390625 | 0.682141 |
+---------+-------------------+------------------------------------------+
Starting Optimization.
+---------+--------------+-------------------+-----------------------+-------------+
| Iter. | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size |
+---------+--------------+-------------------+-----------------------+-------------+
| Initial | 74us | 2.44717 | 1.11719 | |
+---------+--------------+-------------------+-----------------------+-------------+
| 1 | 1.16s | 1.31004 | 0.991321 | 0.0390625 |
| 2 | 2.24s | 0.983868 | 0.906376 | 0.0390625 |
| 3 | 3.32s | 0.87771 | 0.869764 | 0.0390625 |
| 4 | 4.41s | 0.826156 | 0.852127 | 0.0390625 |
| 5 | 5.54s | 0.793964 | 0.840965 | 0.0390625 |
| 6 | 6.60s | 0.770374 | 0.832034 | 0.0390625 |
| 7 | 7.72s | 0.751784 | 0.825083 | 0.0390625 |
| 8 | 8.76s | 0.737281 | 0.819622 | 0.0390625 |
| 9 | 9.89s | 0.725296 | 0.814701 | 0.0390625 |
| 10 | 10.93s | 0.714175 | 0.810177 | 0.0390625 |
| 11 | 12.08s | 0.70574 | 0.806509 | 0.0390625 |
| 12 | 13.11s | 0.697265 | 0.80316 | 0.0390625 |
| 13 | 14.34s | 0.689758 | 0.799906 | 0.0390625 |
| 14 | 15.37s | 0.683825 | 0.797263 | 0.0390625 |
| 15 | 16.49s | 0.678044 | 0.794862 | 0.0390625 |
| 16 | 17.68s | 0.67252 | 0.792279 | 0.0390625 |
| 17 | 18.77s | 0.667249 | 0.790149 | 0.0390625 |
| 18 | 19.82s | 0.662575 | 0.788064 | 0.0390625 |
| 19 | 20.83s | 0.658308 | 0.786007 | 0.0390625 |
| 20 | 21.87s | 0.654573 | 0.78429 | 0.0390625 |
| 21 | 22.89s | 0.650925 | 0.782561 | 0.0390625 |
| 22 | 24.06s | 0.647671 | 0.781114 | 0.0390625 |
| 23 | 25.31s | 0.644385 | 0.779579 | 0.0390625 |
| 24 | 26.59s | 0.641323 | 0.778085 | 0.0390625 |
| 25 | 27.79s | 0.638167 | 0.776486 | 0.0390625 |
+---------+--------------+-------------------+-----------------------+-------------+
Optimization Complete: Maximum number of passes through the data reached.
Computing final objective value and training RMSE.
Final objective value: 0.625427
Final training RMSE: 0.768104
m
Class : RankingFactorizationRecommender Schema ------ User ID : user_id Item ID : movie_id Target : rating Additional observation features : 3 User side features : [] Item side features : [] Statistics ---------- Number of observations : 949852 Number of users : 6040 Number of items : 3701 Training summary ---------------- Training time : 33.7974 Model Parameters ---------------- Model class : RankingFactorizationRecommender num_factors : 32 binary_target : 0 side_data_factorization : 1 solver : auto nmf : 0 max_iterations : 25 Regularization Settings ----------------------- regularization : 0.0 regularization_type : normal linear_regularization : 0.0 ranking_regularization : 0.25 unobserved_rating_value : -1.79769313486e+308 num_sampled_negative_examples : 4 ials_confidence_scaling_type : auto ials_confidence_scaling_factor : 1 Optimization Settings --------------------- init_random_sigma : 0.01 sgd_convergence_interval : 4 sgd_convergence_threshold : 0.0 sgd_max_trial_iterations : 5 sgd_sampling_block_size : 131072 sgd_step_adjustment_interval : 4 sgd_step_size : 0.0 sgd_trial_sample_minimum_size : 10000 sgd_trial_sample_proportion : 0.125 step_size_decrease_rate : 0.75 additional_iterations_if_unhealthy : 5 adagrad_momentum_weighting : 0.9 num_tempering_iterations : 4 tempering_regularization_start_value : 0.0 track_exact_loss : 0
m2 = graphlab.item_similarity_recommender.create(train_set,
'user_id', 'movie_id', 'rating',
similarity_type='pearson')
Recsys training: model = item_similarity
Warning: Ignoring columns timestamp, title, genre;
To use these columns in scoring predictions, use a model that allows the use of additional features.
Preparing data set.
Data has 949852 observations with 6040 users and 3701 items.
Data prepared in: 0.690296s
Training model from provided data.
Gathering per-item and per-user statistics.
+--------------------------------+------------+
| Elapsed Time (Item Statistics) | % Complete |
+--------------------------------+------------+
| 4.296ms | 16.5 |
| 55.516ms | 100 |
+--------------------------------+------------+
Setting up lookup tables.
Processing data in one pass using dense lookup tables.
+-------------------------------------+------------------+-----------------+
| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |
+-------------------------------------+------------------+-----------------+
| 85.885ms | 0 | 0 |
| 1.09s | 35 | 1300 |
| 2.09s | 63.5 | 2350 |
| 3.09s | 99.5 | 3689 |
| 3.17s | 100 | 3701 |
+-------------------------------------+------------------+-----------------+
Finalizing lookup tables.
Generating candidate set for working with new users.
Finished training in 3.21611s
m2
Class : ItemSimilarityRecommender Schema ------ User ID : user_id Item ID : movie_id Target : rating Additional observation features : 0 User side features : [] Item side features : [] Statistics ---------- Number of observations : 949852 Number of users : 6040 Number of items : 3701 Training summary ---------------- Training time : 3.2161 Model Parameters ---------------- Model class : ItemSimilarityRecommender threshold : 0.001 similarity_type : pearson training_method : auto Other Settings -------------- degree_approximation_threshold : 4096 sparse_density_estimation_sample_size : 4096 max_data_passes : 4096 target_memory_usage : 8589934592 seed_item_set_size : 50 nearest_neighbors_interaction_proportion_threshold : 0.05 max_item_neighborhood_size : 64
result = graphlab.recommender.util.compare_models(test_set,
[m, m2],
user_sample=.5, skip_set=train_set)
compare_models: using 2811 users to estimate model performance PROGRESS: Evaluate model M0
recommendations finished on 1000/2811 queries. users per second: 7788.34
recommendations finished on 2000/2811 queries. users per second: 8209.17
Precision and recall summary statistics by cutoff +--------+-----------------+-----------------+ | cutoff | mean_precision | mean_recall | +--------+-----------------+-----------------+ | 1 | 0.0647456421202 | 0.0055003280382 | | 2 | 0.0585200996087 | 0.0113353035287 | | 3 | 0.0524131388592 | 0.0159704348141 | | 4 | 0.0489149768766 | 0.0202725808447 | | 5 | 0.0452508004269 | 0.0240583282714 | | 6 | 0.0432230522946 | 0.0281284957357 | | 7 | 0.0413680947299 | 0.0315008447446 | | 8 | 0.03962113127 | 0.0342475240871 | | 9 | 0.0383414364204 | 0.0373489084669 | | 10 | 0.0367484880825 | 0.0397919699688 | +--------+-----------------+-----------------+ [10 rows x 3 columns] ('\nOverall RMSE: ', 0.908462581230655) Per User RMSE (best) +---------+-------+------------------+ | user_id | count | rmse | +---------+-------+------------------+ | 1614 | 1 | 0.00313141188165 | +---------+-------+------------------+ [1 rows x 3 columns] Per User RMSE (worst) +---------+-------+---------------+ | user_id | count | rmse | +---------+-------+---------------+ | 4936 | 1 | 4.31876092511 | +---------+-------+---------------+ [1 rows x 3 columns] Per Item RMSE (best) +----------+-------+------------------+ | movie_id | count | rmse | +----------+-------+------------------+ | 3138 | 1 | 0.00103085925747 | +----------+-------+------------------+ [1 rows x 3 columns] Per Item RMSE (worst) +----------+-------+---------------+ | movie_id | count | rmse | +----------+-------+---------------+ | 1455 | 1 | 4.06725567438 | +----------+-------+---------------+ [1 rows x 3 columns] PROGRESS: Evaluate model M1
recommendations finished on 1000/2811 queries. users per second: 21419.2
recommendations finished on 2000/2811 queries. users per second: 22703.3
Precision and recall summary statistics by cutoff +--------+-------------------+-------------------+ | cutoff | mean_precision | mean_recall | +--------+-------------------+-------------------+ | 1 | 0.0 | 0.0 | | 2 | 0.0 | 0.0 | | 3 | 0.0 | 0.0 | | 4 | 8.89363215937e-05 | 5.08207551964e-05 | | 5 | 7.1149057275e-05 | 5.08207551964e-05 | | 6 | 5.92908810625e-05 | 5.08207551964e-05 | | 7 | 5.08207551964e-05 | 5.08207551964e-05 | | 8 | 4.44681607969e-05 | 5.08207551964e-05 | | 9 | 3.95272540417e-05 | 5.08207551964e-05 | | 10 | 3.55745286375e-05 | 5.08207551964e-05 | +--------+-------------------+-------------------+ [10 rows x 3 columns] ('\nOverall RMSE: ', 0.9853701447799256) Per User RMSE (best) +---------+-------+------------------+ | user_id | count | rmse | +---------+-------+------------------+ | 5821 | 1 | 0.00493980551612 | +---------+-------+------------------+ [1 rows x 3 columns] Per User RMSE (worst) +---------+-------+---------------+ | user_id | count | rmse | +---------+-------+---------------+ | 5214 | 2 | 3.28453141022 | +---------+-------+---------------+ [1 rows x 3 columns] Per Item RMSE (best) +----------+-------+------+ | movie_id | count | rmse | +----------+-------+------+ | 977 | 1 | 0.0 | +----------+-------+------+ [1 rows x 3 columns] Per Item RMSE (worst) +----------+-------+------+ | movie_id | count | rmse | +----------+-------+------+ | 572 | 1 | 4.0 | +----------+-------+------+ [1 rows x 3 columns]
m.get_similar_items([1287]) # movie_id is Ben-Hur
movie_id | similar | score | rank |
---|---|---|---|
1287 | 940 | 0.62611323595 | 1 |
1287 | 1291 | 0.578746318817 | 2 |
1287 | 2370 | 0.560160756111 | 3 |
1287 | 3805 | 0.524255812168 | 4 |
1287 | 2905 | 0.523368954659 | 5 |
1287 | 2324 | 0.519006967545 | 6 |
1287 | 2804 | 0.510964155197 | 7 |
1287 | 1214 | 0.496943891048 | 8 |
1287 | 919 | 0.491189420223 | 9 |
1287 | 1198 | 0.490789085627 | 10 |
help(m.get_similar_items)
Help on method get_similar_items in module graphlab.toolkits.recommender.util: get_similar_items(self, items=None, k=10, verbose=False) method of graphlab.toolkits.recommender.ranking_factorization_recommender.RankingFactorizationRecommender instance Get the k most similar items for each item in items. Each type of recommender has its own model for the similarity between items. For example, the item_similarity_recommender will return the most similar items according to the user-chosen similarity; the factorization_recommender will return the nearest items based on the cosine similarity between latent item factors. Parameters ---------- items : SArray or list; optional An :class:`~graphlab.SArray` or list of item ids for which to get similar items. If 'None', then return the `k` most similar items for all items in the training set. k : int, optional The number of similar items for each item. verbose : bool, optional Progress printing is shown. Returns ------- out : SFrame A SFrame with the top ranked similar items for each item. The columns `item`, 'similar', 'score' and 'rank', where `item` matches the item column name specified at training time. The 'rank' is between 1 and `k` and 'score' gives the similarity score of that item. The value of the score depends on the method used for computing item similarities. Examples -------- >>> sf = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"], 'item_id': ["a", "b", "c", "a", "b", "b", "c", "d"]}) >>> m = graphlab.item_similarity_recommender.create(sf) >>> nn = m.get_similar_items()
'score' gives the similarity score of that item
m.get_similar_items([1287]).join(items, on={'similar': 'movie_id'}).sort('rank')
movie_id | similar | score | rank | title | genre |
---|---|---|---|---|---|
1287 | 940 | 0.62611323595 | 1 | Adventures of Robin Hood, The (1938) ... |
Action|Adventure |
1287 | 1291 | 0.578746318817 | 2 | Indiana Jones and the Last Crusade (1989) ... |
Action|Adventure |
1287 | 2370 | 0.560160756111 | 3 | Emerald Forest, The (1985) ... |
Action|Adventure|Drama |
1287 | 3805 | 0.524255812168 | 4 | Knightriders (1981) | Action|Adventure|Drama |
1287 | 2905 | 0.523368954659 | 5 | Sanjuro (1962) | Action|Adventure |
1287 | 2324 | 0.519006967545 | 6 | Life Is Beautiful (La Vita � bella) (1997) ... |
Comedy|Drama |
1287 | 2804 | 0.510964155197 | 7 | Christmas Story, A (1983) | Comedy|Drama |
1287 | 1214 | 0.496943891048 | 8 | Alien (1979) | Action|Horror|Sci- Fi|Thriller ... |
1287 | 919 | 0.491189420223 | 9 | Wizard of Oz, The (1939) | Adventure|Children's|Dram a|Musical ... |
1287 | 1198 | 0.490789085627 | 10 | Raiders of the Lost Ark (1981) ... |
Action|Adventure |
recs = m.recommend()
recommendations finished on 1000/6040 queries. users per second: 7665.01
recommendations finished on 2000/6040 queries. users per second: 7859.16
recommendations finished on 3000/6040 queries. users per second: 7905.28
recommendations finished on 4000/6040 queries. users per second: 8137.29
recommendations finished on 5000/6040 queries. users per second: 8219.12
recommendations finished on 6000/6040 queries. users per second: 7955.95
recs
user_id | movie_id | score | rank |
---|---|---|---|
1 | 34 | 3.9719229951 | 1 |
1 | 590 | 3.92724250028 | 2 |
1 | 1198 | 3.91728875557 | 3 |
1 | 1282 | 3.91183034102 | 4 |
1 | 1682 | 3.90261084358 | 5 |
1 | 356 | 3.89610960106 | 6 |
1 | 1408 | 3.88136133323 | 7 |
1 | 1210 | 3.87818231205 | 8 |
1 | 912 | 3.87265440965 | 9 |
1 | 1393 | 3.87153578973 | 10 |
data[data['user_id'] == 4]
user_id | movie_id | rating | timestamp | title | genre |
---|---|---|---|---|---|
4 | 3468 | 5 | 978294008 | Hustler, The (1961) | Drama |
4 | 1210 | 3 | 978293924 | Star Wars: Episode VI - Return of the Jedi (1 ... |
Action|Adventure|Romance |Sci-Fi|War ... |
4 | 2951 | 4 | 978294282 | Fistful of Dollars, A (1964) ... |
Action|Western |
4 | 1214 | 4 | 978294260 | Alien (1979) | Action|Horror|Sci- Fi|Thriller ... |
4 | 1036 | 4 | 978294282 | Die Hard (1988) | Action|Thriller |
4 | 260 | 5 | 978294199 | Star Wars: Episode IV - A New Hope (1977) ... |
Action|Adventure|Fantasy |Sci-Fi ... |
4 | 2028 | 5 | 978294230 | Saving Private Ryan (1998) ... |
Action|Drama|War |
4 | 480 | 4 | 978294008 | Jurassic Park (1993) | Action|Adventure|Sci-Fi |
4 | 1196 | 2 | 978294199 | Star Wars: Episode V - The Empire Strikes Back ... |
Action|Adventure|Drama |Sci-Fi|War ... |
4 | 1198 | 5 | 978294199 | Raiders of the Lost Ark (1981) ... |
Action|Adventure |
m.recommend(users=[4], k=20).join(items, on='movie_id')
user_id | movie_id | score | rank | title | genre |
---|---|---|---|---|---|
4 | 527 | 4.10534794144 | 7 | Schindler's List (1993) | Drama|War |
4 | 541 | 4.07768926285 | 9 | Blade Runner (1982) | Film-Noir|Sci-Fi |
4 | 745 | 4.02218672428 | 17 | Close Shave, A (1995) | Animation|Comedy|Thriller |
4 | 750 | 4.31101408861 | 1 | Dr. Strangelove or: How I Learned to Stop Worrying ... |
Sci-Fi|War |
4 | 924 | 4.14404563866 | 5 | 2001: A Space Odyssey (1968) ... |
Drama|Mystery|Sci- Fi|Thriller ... |
4 | 1073 | 4.0288983833 | 15 | Willy Wonka and the Chocolate Factory (1971) ... |
Adventure|Children's|Come dy|Fantasy ... |
4 | 1084 | 4.01916591338 | 19 | Bonnie and Clyde (1967) | Crime|Drama |
4 | 1094 | 4.07409318052 | 10 | Crying Game, The (1992) | Drama|Romance|War |
4 | 1183 | 4.01506134383 | 20 | English Patient, The (1996) ... |
Drama|Romance|War |
4 | 1206 | 4.08579082442 | 8 | Clockwork Orange, A (1971) ... |
Sci-Fi |
m.recommend?
recent_data = graphlab.SFrame()
recent_data['movie_id'] = [30, 1000, 900, 883, 251, 200, 199, 180, 120, 991, 1212]
recent_data['user_id'] = 99999
recent_data['rating'] = [2, 1, 3, 4, 0, 0, 1, 1, 1, 2, 3]
recent_data
movie_id | user_id | rating |
---|---|---|
30 | 99999 | 2 |
1000 | 99999 | 1 |
900 | 99999 | 3 |
883 | 99999 | 4 |
251 | 99999 | 0 |
200 | 99999 | 0 |
199 | 99999 | 1 |
180 | 99999 | 1 |
120 | 99999 | 1 |
991 | 99999 | 2 |
m2.recommend(users=[99999], new_observation_data=recent_data).join(items, on='movie_id').sort('rank')
user_id | movie_id | score | rank | title | genre |
---|---|---|---|---|---|
99999 | 572 | 5.0 | 1 | Foreign Student (1994) | Drama |
99999 | 3881 | 5.0 | 2 | Bittersweet Motel (2000) | Documentary |
99999 | 1830 | 5.0 | 3 | Follow the Bitch (1998) | Comedy |
99999 | 989 | 5.0 | 4 | Schlafes Bruder (Brother of Sleep) (1995) ... |
Drama |
99999 | 3172 | 5.0 | 5 | Ulysses (Ulisse) (1954) | Adventure |
99999 | 3233 | 5.0 | 6 | Smashing Time (1967) | Comedy |
99999 | 3382 | 5.0 | 7 | Song of Freedom (1936) | Drama |
99999 | 787 | 5.0 | 8 | Gate of Heavenly Peace, The (1995) ... |
Documentary |
99999 | 3656 | 5.0 | 9 | Lured (1947) | Crime |
99999 | 3280 | 5.0 | 10 | Baby, The (1973) | Horror |
m.save('my_model')
m_again = graphlab.load_model('my_model')
m_again