#### NOTEBOOK DESCRIPTION from datetime import datetime NOTEBOOK_TITLE = 'taruma_udemy_autoencoders' NOTEBOOK_VERSION = '1.0.0' NOTEBOOK_DATE = 1 # Set 1, if you want add date classifier NOTEBOOK_NAME = "{}_{}".format( NOTEBOOK_TITLE, NOTEBOOK_VERSION.replace('.','_') ) PROJECT_NAME = "{}_{}{}".format( NOTEBOOK_TITLE, NOTEBOOK_VERSION.replace('.','_'), "_" + datetime.utcnow().strftime("%Y%m%d_%H%M") if NOTEBOOK_DATE else "" ) print(f"Nama Notebook: {NOTEBOOK_NAME}") print(f"Nama Proyek: {PROJECT_NAME}") #### System Version import sys, torch print("versi python: {}".format(sys.version)) print("versi pytorch: {}".format(torch.__version__)) #### Load Notebook Extensions %load_ext google.colab.data_table #### Download dataset # ref: https://grouplens.org/datasets/movielens/ !wget -O autoencoders.zip "https://sds-platform-private.s3-us-east-2.amazonaws.com/uploads/P16-AutoEncoders.zip" !unzip autoencoders.zip # Karena ada file .zip dalam direktori, harus diekstrak lagi. # ref: https://askubuntu.com/q/399951 # ref: https://unix.stackexchange.com/q/12902 !find AutoEncoders -type f -name '*.zip' -exec unzip -d AutoEncoders {} \; #### Atur dataset path DATASET_DIRECTORY = 'AutoEncoders/' def showdata(dataframe): print('Dataframe Size: {}'.format(dataframe.shape)) return dataframe # Importing the libraries import numpy as np import pandas as pd import torch import torch.nn as nn import torch.nn.parallel import torch.optim as optim import torch.utils.data from torch.autograd import Variable movies = pd.read_csv(DATASET_DIRECTORY + 'ml-1m/movies.dat', sep='::', header=None, engine='python', encoding='latin-1') showdata(movies).head(10) users = pd.read_csv(DATASET_DIRECTORY + 'ml-1m/users.dat', sep='::', header=None, engine='python', encoding='latin-1') showdata(users).head(10) ratings = pd.read_csv(DATASET_DIRECTORY + 'ml-1m/ratings.dat', sep='::', header=None, engine='python', encoding='latin-1') showdata(ratings).head(10) # Preparing the training set and the test set training_set = pd.read_csv(DATASET_DIRECTORY + 'ml-100k/u1.base', delimiter='\t') training_set = np.array(training_set, dtype='int') test_set = pd.read_csv(DATASET_DIRECTORY + 'ml-100k/u1.test', delimiter='\t') test_set = np.array(test_set, dtype='int') # Getting the number of users and movies nb_users = int(max(max(training_set[:, 0]), max(test_set[:, 0]))) nb_movies = int(max(max(training_set[:, 1]), max(test_set[:, 1]))) # Converting the data into an array with users in lines and movies in columns def convert(data): new_data = [] for id_users in range(1, nb_users+1): id_movies = data[:, 1][data[:, 0] == id_users] id_ratings = data[:, 2][data[:, 0] == id_users] ratings = np.zeros(nb_movies) ratings[id_movies - 1] = id_ratings new_data.append(list(ratings)) return new_data training_set = convert(training_set) test_set = convert(test_set) # Converting the data into Torch tensors training_set = torch.FloatTensor(training_set) test_set = torch.FloatTensor(test_set) training_set # Creating the architecture of the Neural Network class SAE(nn.Module): def __init__(self, ): super(SAE, self).__init__() self.fc1 = nn.Linear(nb_movies, 20) self.fc2 = nn.Linear(20, 10) self.fc3 = nn.Linear(10, 20) self.fc4 = nn.Linear(20, nb_movies) self.activation = nn.Sigmoid() def forward(self, x): x = self.activation(self.fc1(x)) x = self.activation(self.fc2(x)) x = self.activation(self.fc3(x)) x = self.fc4(x) return x sae = SAE() criterion = nn.MSELoss() optimizer = optim.RMSprop(sae.parameters(), lr = 0.01, weight_decay = 0.5) # Training the SAE nb_epoch = 200 for epoch in range(1, nb_epoch + 1): train_loss = 0 s = 0. for id_user in range(nb_users): input = Variable(training_set[id_user]).unsqueeze(0) target = input.clone() if torch.sum(target.data > 0) > 0: output = sae(input) target.require_grad = False output[target == 0] = 0 loss = criterion(output, target) mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10) loss.backward() train_loss += np.sqrt(loss.item()*mean_corrector) s += 1. optimizer.step() print('epoch: '+str(epoch)+' loss: '+str(train_loss/s)) # Testing the SAE test_loss = 0 s = 0. for id_user in range(nb_users): input = Variable(training_set[id_user]).unsqueeze(0) target = Variable(test_set[id_user]).unsqueeze(0) if torch.sum(target.data > 0) > 0: output = sae(input) target.require_grad = False output[target == 0] = 0 loss = criterion(output, target) mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10) test_loss += np.sqrt(loss.item()*mean_corrector) s += 1. print('test loss: '+str(test_loss/s))