import pandas as pd
import numpy as np
movies = pd.read_csv('ml-25m/movies.csv')
print(movies.shape)
movies.columns
(62423, 3)
Index(['movieId', 'title', 'genres'], dtype='object')
year = 2010
movies['year'] = movies.title.str.extract(r'\((\d{4})\)').astype("float")
movie_id_removed = movies.query('year < @year').movieId.tolist()
movies = movies.query('year >= @year')
movies
movieId | title | genres | year | |
---|---|---|---|---|
14156 | 73268 | Daybreakers (2010) | Action|Drama|Horror|Thriller | 2010.0 |
14161 | 73319 | Leap Year (2010) | Comedy|Romance | 2010.0 |
14162 | 73321 | Book of Eli, The (2010) | Action|Adventure|Drama | 2010.0 |
14222 | 73744 | If You Love (Jos rakastat) (2010) | Drama|Musical|Romance | 2010.0 |
14256 | 73929 | Legion (2010) | Action|Fantasy|Horror|Thriller | 2010.0 |
... | ... | ... | ... | ... |
62412 | 209143 | The Painting (2019) | Animation|Documentary | 2019.0 |
62413 | 209145 | Liberté (2019) | Drama | 2019.0 |
62415 | 209151 | Mao Zedong 1949 (2019) | (no genres listed) | 2019.0 |
62418 | 209157 | We (2018) | Drama | 2018.0 |
62420 | 209163 | Bad Poems (2018) | Comedy|Drama | 2018.0 |
20489 rows × 4 columns
len(movie_id_removed)
41524
movies_mapping = movies[['movieId','title']].set_index('movieId').to_dict()['title']
Ratings
ratings = pd.read_csv('ml-25m/ratings.csv', parse_dates=['timestamp'])
print(ratings.columns)
print(ratings.shape)
ratings.userId.nunique()
Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')
(25000095, 4)
162541
ratings = ratings.query('movieId not in @movie_id_removed')
ratings['rating'] = 1
ratings['timestamp'] = pd.to_datetime(ratings['timestamp'], unit='s')
ratings
userId | movieId | rating | timestamp | |
---|---|---|---|---|
712 | 3 | 73268 | 1 | 2015-08-13 14:11:38 |
713 | 3 | 73321 | 1 | 2015-08-13 13:52:05 |
715 | 3 | 74458 | 1 | 2017-04-21 14:39:18 |
716 | 3 | 74789 | 1 | 2019-08-18 00:59:42 |
717 | 3 | 76077 | 1 | 2017-01-18 16:15:09 |
... | ... | ... | ... | ... |
24999773 | 162538 | 111617 | 1 | 2015-08-05 14:15:09 |
24999774 | 162538 | 112138 | 1 | 2015-08-05 14:14:35 |
24999775 | 162538 | 112556 | 1 | 2015-08-05 14:25:33 |
24999776 | 162538 | 116797 | 1 | 2015-08-05 13:25:21 |
24999777 | 162538 | 126548 | 1 | 2015-08-05 14:24:57 |
2711937 rows × 4 columns
Label Encoder
from sklearn.preprocessing import LabelEncoder
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()
ratings['userId'] = user_encoder.fit_transform(ratings.userId)
ratings['movieId'] = movie_encoder.fit_transform(ratings.movieId)
user_encoder.classes_
movie_encoder.classes_
array([ 73268, 73319, 73321, ..., 209151, 209157, 209163])
from scipy.sparse import csr_matrix
np.random.seed(42)
def create_matrix(data, user_col, item_col, rating_col):
"""
creates the sparse user-item interaction matrix
Parameters
----------
data : DataFrame
implicit rating data
user_col : str
user column name
item_col : str
item column name
ratings_col : str
implicit rating column name
"""
data[[user_col, item_col]] = data[[user_col, item_col]].astype('category')
rows = data[user_col].cat.codes
cols = data[item_col].cat.codes
rating = data[rating_col]
user_item_matrix = csr_matrix((rating, (rows, cols)))
return user_item_matrix
user_item_matrix = create_matrix(ratings, 'userId', 'movieId', 'rating')
user_item_matrix.shape
(60780, 20455)
Train Test Split
ratings['test'] = ratings.groupby(['userId'])['timestamp'].rank(method='first', ascending=False)
train_ratings = ratings.query('test != 1').drop(columns = ['test', 'timestamp'])
test_ratings = ratings.query('test == 1').drop(columns = ['test', 'timestamp'])
train_ratings.shape, test_ratings.shape
((2651157, 3), (60780, 3))
Problema de Clasificación
# bla = train_ratings.drop_duplicates(subset = ['userId', 'movieId'], keep = 'first')
# %%time
# unique_movies = set(train_ratings.movieId)
# def create_negative_movies(df, userid = 'userId', movieid = 'movieId',neg_examples = 4):
# unique_movies = set(df[movieid])
# movies = []
# uids = df[userid].unique()
# for u in uids:
# movies.extend(np.random.choice(list(unique_movies - set(df[movieid][df[userid] == u])), size = neg_examples))
# return uids, movies
# %%time
# neg_examples = 4
# users, negative_movies = create_negative_movies(train_ratings)
# negative_movies_df = pd.DataFrame(dict(userId = np.repeat(users, [neg_examples]*len(users)),
# movieId = negative_movies,
# ratings = np.zeros(len(negative_movies)))
# )
# negative_movies_df.to_csv('negative_movies.csv', index = False)
Nueva Implementación
print('Training Dimensions: ', train_ratings.userId.nunique(), train_ratings.movieId.nunique())
print('Test Dimensions: ', test_ratings.userId.nunique(), test_ratings.movieId.nunique())
print('Movies in Train: ', train_ratings.sum())
print('Movies in Test: ', test_ratings.sum())
Training Dimensions: 56706 20391
Test Dimensions: 60780 4176
Movies in Train: rating 2651157
dtype: int64
Movies in Test: rating 60780
dtype: int64
train_users = train_ratings.userId.unique().tolist()
test_users = test_ratings.userId.unique().tolist()
print(len(train_users))
print(len(test_users))
56706
60780
def create_negative_df(user_ids, user_item, neg_examples = 4, test = False):
movies_id = np.arange(user_item.shape[1])
negative_movies = []
examples = []
for i in range(len(user_ids)):
interacted = user_item[i].nonzero()[1]
x = ~np.isin(movies_id, interacted)
x = np.argwhere(x).squeeze(1)
if test:
size = neg_examples
else:
size = len(interacted)*neg_examples
x = np.random.choice(x, size = size)
negative_movies.extend(x)
examples.append(size)
negative_movies_df = pd.DataFrame(dict(userId = np.repeat(user_ids, examples),
movieId = negative_movies,
rating = np.zeros(len(negative_movies)))
)
return negative_movies_df
%%time
train_negative_movies_df = create_negative_df(train_users, user_item_matrix, neg_examples = 4)
train_negative_movies_df.shape
CPU times: user 22.9 s, sys: 268 ms, total: 23.1 s
Wall time: 23.1 s
(10146692, 3)
%%time
test_negative_movies_df = create_negative_df(test_users, user_item_matrix, neg_examples = 99, test = True)
test_negative_movies_df.shape
CPU times: user 24.3 s, sys: 160 ms, total: 24.5 s
Wall time: 24.5 s
(6017220, 3)
full_training_df = train_ratings.append(train_negative_movies_df)
full_test_df = test_ratings.append(test_negative_movies_df)
full_training_df.shape, full_test_df.shape
((12797849, 3), (6078000, 3))
full_training_df.info(memory_usage='deep'), full_test_df.info(memory_usage='deep')
<class 'pandas.core.frame.DataFrame'>
Int64Index: 12797849 entries, 712 to 10146691
Data columns (total 3 columns):
# Column Dtype
--- ------ -----
0 userId int64
1 movieId int64
2 rating float64
dtypes: float64(1), int64(2)
memory usage: 390.6 MB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 6078000 entries, 734 to 6017219
Data columns (total 3 columns):
# Column Dtype
--- ------ -----
0 userId int64
1 movieId int64
2 rating float64
dtypes: float64(1), int64(2)
memory usage: 185.5 MB
(None, None)
full_training_df.columns
Index(['userId', 'movieId', 'rating'], dtype='object')
Creating the Neural Network
import torch
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
pl.seed_everything(42, workers=True)
Global seed set to 42
42
from torch.utils.data import Dataset, DataLoader
from multiprocessing import cpu_count
class MovieData(Dataset):
def __init__(self, users, movies, ratings):
self.users = users
self.movies = movies
self.ratings = ratings
def __len__(self):
return len(self.ratings)
def __getitem__(self, idx):
users = self.users.iloc[idx]
movies = self.movies.iloc[idx]
ratings = self.ratings.iloc[idx]
return dict(
users = torch.tensor(users, dtype=torch.long),
movies = torch.tensor(movies, dtype=torch.long),
ratings = torch.tensor(ratings, dtype=torch.float)
)
class MovieDataModule(pl.LightningDataModule):
def __init__(self, train_df, test_df, batch_size = 512):
super().__init__()
self.train_df = train_df
self.test_df = test_df
self.batch_size = batch_size
def setup(self, stage=None):
self.train_data = MovieData(self.train_df.userId, self.train_df.movieId, self.train_df.rating)
self.test_data = MovieData(self.test_df.userId, self.test_df.movieId, self.test_df.rating)
def train_dataloader(self):
return DataLoader(self.train_data, batch_size=self.batch_size, shuffle=True, pin_memory=True, num_workers = 10)
def test_dataloader(self):
return DataLoader(self.test_data, batch_size=self.batch_size, shuffle=False, pin_memory=True, num_workers = 10)
class NCF(nn.Module):
def __init__(self, dim_users, dim_movies, n_out = 1):
super().__init__()
self.user_embedding = nn.Embedding(dim_users, 8)
self.movie_embedding = nn.Embedding(dim_movies, 8)
self.encoder = nn.Sequential(
nn.Linear(16,64),
nn.ReLU(inplace=True),
nn.Linear(64,32),
nn.ReLU(inplace=True),
nn.Linear(32,n_out)
)
def forward(self, users, movies):
user_emb = self.user_embedding(users)
movie_emb = self.movie_embedding(movies)
x = torch.cat((user_emb, movie_emb), dim = 1)
x = self.encoder(x)
return x
class RecSys(pl.LightningModule):
def __init__(self, model):
super().__init__()
self.model = model
self.criterion = nn.BCEWithLogitsLoss()
def forward(self,users, movies):
x = self.model(users, movies)
return x
def training_step(self, batch, batch_idx):
users, movies, ratings = batch['users'], batch['movies'], batch['ratings']
preds = self(users, movies)
# print('preds:', preds.shape)
# print('ratings: ', ratings.shape)
loss = self.criterion(preds, ratings.view(-1,1))
self.log('train_loss', loss, prog_bar = True, logger = True)
return {'loss': loss}
def configure_optimizers(self):
return torch.optim.Adam(self.model.parameters(), lr = 1e-3)
full_test_df.userId.astype('int64').max(), full_test_df.movieId.astype('int64').max(), full_test_df.shape
(60779, 20454, (6078000, 3))
dim_users = full_training_df.userId.astype('int64').max() + 1
dim_movies = full_training_df.movieId.astype('int64').max() + 1
print(dim_users, dim_movies)
60780 20455
model = NCF(dim_users, dim_movies)
dm = MovieDataModule(full_training_df, full_test_df, batch_size=512)
dm.setup()
train_batch = next(iter(dm.train_dataloader()))
value = np.random.randint(1,32)
train_batch['users'][value], train_batch['movies'][value], train_batch['ratings'][value]
(tensor(55545), tensor(15787), tensor(0.))
train_batch['users'].shape, train_batch['movies'].shape, train_batch['ratings'].shape
(torch.Size([512]), torch.Size([512]), torch.Size([512]))
recommender = RecSys(model)
recommender(train_batch['users'], train_batch['movies']).shape
torch.Size([512, 1])
mc = ModelCheckpoint(
dirpath = 'checkpoints',
#filename = 'best-checkpoint',
save_last = True,
save_top_k = 1,
verbose = True,
monitor = 'train_loss',
mode = 'min'
)
mc.CHECKPOINT_NAME_LAST = 'best-checkpoint-latest'
trainer = pl.Trainer(max_epochs=5,
accelerator="gpu",
devices=1,
callbacks=[mc],
progress_bar_refresh_rate=30,
# fast_dev_run=True,
#overfit_batches=1
)
trainer.fit(recommender, dm)
/home/alfonso/miniconda3/envs/dl/lib/python3.7/site-packages/pytorch_lightning/trainer/connectors/callback_connector.py:97: LightningDeprecationWarning: Setting `Trainer(progress_bar_refresh_rate=30)` is deprecated in v1.5 and will be removed in v1.7. Please pass `pytorch_lightning.callbacks.progress.TQDMProgressBar` with `refresh_rate` directly to the Trainer's `callbacks` argument instead. Or, to disable the progress bar pass `enable_progress_bar = False` to the Trainer.
f"Setting `Trainer(progress_bar_refresh_rate={progress_bar_refresh_rate})` is deprecated in v1.5 and"
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/alfonso/miniconda3/envs/dl/lib/python3.7/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:608: UserWarning: Checkpoint directory /home/alfonso/Documents/kaggle/recom/checkpoints exists and is not empty.
rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
| Name | Type | Params
------------------------------------------------
0 | model | NCF | 653 K
1 | criterion | BCEWithLogitsLoss | 0
------------------------------------------------
653 K Trainable params
0 Non-trainable params
653 K Total params
2.612 Total estimated model params size (MB)
Training: 0it [00:00, ?it/s]
Epoch 0, global step 24996: 'train_loss' reached 0.08028 (best 0.08028), saving model to '/home/alfonso/Documents/kaggle/recom/checkpoints/epoch=0-step=24996.ckpt' as top 1
Epoch 1, global step 49992: 'train_loss' was not in top 1
Epoch 2, global step 74988: 'train_loss' reached 0.07823 (best 0.07823), saving model to '/home/alfonso/Documents/kaggle/recom/checkpoints/epoch=2-step=74988.ckpt' as top 1
Epoch 3, global step 99984: 'train_loss' reached 0.06737 (best 0.06737), saving model to '/home/alfonso/Documents/kaggle/recom/checkpoints/epoch=3-step=99984.ckpt' as top 1
Epoch 4, global step 124980: 'train_loss' reached 0.06487 (best 0.06487), saving model to '/home/alfonso/Documents/kaggle/recom/checkpoints/epoch=4-step=124980.ckpt' as top 1
# from torchmetrics.functional import retrieval_hit_rate
# preds = torch.tensor([[0.9, 0.3, 0.9,0.4],[0.9, 0.3, 0.9,0.4]])
# target = torch.tensor([[False, False, True,False],[False, True, False,False]])
# retrieval_hit_rate(preds, target, k=2)
@torch.inference_mode()
def predict(model, dm):
model.eval()
preds = []
for item in dm.test_dataloader():
pred = torch.sigmoid(model(item['users'], item['movies']))
preds.extend(pred.cpu().detach().numpy())
return preds
predictions= np.array(predict(recommender, dm))
predictions.shape
Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fdcf8434170><function _MultiProcessingDataLoaderIter.__del__ at 0x7fdcf8434170>
Traceback (most recent call last):
Traceback (most recent call last):
File "/home/alfonso/miniconda3/envs/dl/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
File "/home/alfonso/miniconda3/envs/dl/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
self._shutdown_workers()
File "/home/alfonso/miniconda3/envs/dl/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
if w.is_alive():Exception ignored in:
File "/home/alfonso/miniconda3/envs/dl/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
<function _MultiProcessingDataLoaderIter.__del__ at 0x7fdcf8434170>
Traceback (most recent call last):
assert self._parent_pid == os.getpid(), 'can only test a child process' File "/home/alfonso/miniconda3/envs/dl/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1358, in __del__
AssertionError self._shutdown_workers():
can only test a child process File "/home/alfonso/miniconda3/envs/dl/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
if w.is_alive():
File "/home/alfonso/miniconda3/envs/dl/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionErrorself._shutdown_workers():
can only test a child process
File "/home/alfonso/miniconda3/envs/dl/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1341, in _shutdown_workers
if w.is_alive():
File "/home/alfonso/miniconda3/envs/dl/lib/python3.7/multiprocessing/process.py", line 151, in is_alive
assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
(6078000, 1)
full_test_df['preds'] = predictions
full_test_df
userId | movieId | rating | preds | |
---|---|---|---|---|
734 | 0 | 230 | 1.0 | 0.987030 |
1066 | 1 | 929 | 1.0 | 0.749257 |
2855 | 2 | 465 | 1.0 | 0.911151 |
2889 | 3 | 2505 | 1.0 | 0.973490 |
3015 | 4 | 9907 | 1.0 | 0.959640 |
... | ... | ... | ... | ... |
6017215 | 60779 | 2442 | 0.0 | 0.006992 |
6017216 | 60779 | 10800 | 0.0 | 0.003167 |
6017217 | 60779 | 17767 | 0.0 | 0.000137 |
6017218 | 60779 | 7073 | 0.0 | 0.000070 |
6017219 | 60779 | 2124 | 0.0 | 0.000730 |
6078000 rows × 4 columns
recomendations = full_test_df.sort_values(by = ['userId','preds'], ascending=[True, False]).groupby('userId').head(10)
# Hit Ratio @ 10
recomendations.rating.sum()/recomendations.userId.nunique()
0.9457880881869036
Índices Iniciales
def back_to_normal(df, user_encoder, movie_encoder, movies_mapping):
idx_movies = df.movieId.tolist()
idx_users = df.userId.tolist()
return pd.DataFrame(dict(userId = user_encoder.classes_[idx_users],
movieId = pd.Series(movie_encoder.classes_[idx_movies]).map(movies_mapping),
rating = df.rating.tolist()))
visto= back_to_normal(train_ratings, user_encoder, movie_encoder, movies_mapping)
visto.shape
(2651157, 3)
recomendar = back_to_normal(recomendations, user_encoder, movie_encoder, movies_mapping)
recomendar.shape
(607800, 3)
user = 4
print(visto.query('userId == @user')['movieId'])
recomendar.query('userId == @user')
193 Shutter Island (2010)
194 Percy Jackson & the Olympians: The Lightning T...
195 How to Train Your Dragon (2010)
196 Clash of the Titans (2010)
197 Iron Man 2 (2010)
...
303 Spider-Man: Into the Spider-Verse (2018)
304 John Wick: Chapter 3 – Parabellum (2019)
305 Pokémon: Detective Pikachu (2019)
306 Ford v. Ferrari (2019)
307 Fast & Furious Presents: Hobbs & Shaw (2019)
Name: movieId, Length: 115, dtype: object
userId | movieId | rating | |
---|---|---|---|
10 | 4 | Thor: The Dark World (2013) | 0.0 |
11 | 4 | Margin Call (2011) | 0.0 |
12 | 4 | Kubo and the Two Strings (2016) | 0.0 |
13 | 4 | John Carter (2012) | 1.0 |
14 | 4 | Autómata (Automata) (2014) | 0.0 |
15 | 4 | You Were Never Really Here (2017) | 0.0 |
16 | 4 | Aloha (2015) | 0.0 |
17 | 4 | Thanks for Sharing (2012) | 0.0 |
18 | 4 | Eva (2011) | 0.0 |
19 | 4 | Magic Mike XXL (2015) | 0.0 |
user = 6265
print(visto.query('userId == @user')['movieId'])
recomendar.query('userId == @user')
100326 Cabin in the Woods, The (2012)
100327 Snowpiercer (2013)
100328 Gone Girl (2014)
100329 The Imitation Game (2014)
Name: movieId, dtype: object
userId | movieId | rating | |
---|---|---|---|
22630 | 6265 | Midnight in Paris (2011) | 1.0 |
22631 | 6265 | Friends with Benefits (2011) | 0.0 |
22632 | 6265 | Saw VII 3D - The Final Chapter (2010) | 0.0 |
22633 | 6265 | Searching (2018) | 0.0 |
22634 | 6265 | Aladdin (2019) | 0.0 |
22635 | 6265 | The Dark Tower (2017) | 0.0 |
22636 | 6265 | The BFG (2016) | 0.0 |
22637 | 6265 | ARQ (2016) | 0.0 |
22638 | 6265 | A Wrinkle in Time (2018) | 0.0 |
22639 | 6265 | Magic of Belle Isle, The (2012) | 0.0 |
user = 21962
print(visto.query('userId == @user')['movieId'])
recomendar.query('userId == @user')
353218 Shutter Island (2010)
353219 Alice in Wonderland (2010)
353220 Toy Story 3 (2010)
353221 Shrek Forever After (a.k.a. Shrek: The Final C...
Name: movieId, dtype: object
userId | movieId | rating | |
---|---|---|---|
82220 | 21962 | Iron Man 2 (2010) | 1.0 |
82221 | 21962 | Alien: Covenant (2017) | 0.0 |
82222 | 21962 | The Huntsman Winter's War (2016) | 0.0 |
82223 | 21962 | Sisters (2015) | 0.0 |
82224 | 21962 | Scary Movie 5 (Scary MoVie) (2013) | 0.0 |
82225 | 21962 | Cop Car (2015) | 0.0 |
82226 | 21962 | Norwegian Wood (Noruwei no mori) (2010) | 0.0 |
82227 | 21962 | Oslo, August 31st (Oslo, 31. august) (2011) | 0.0 |
82228 | 21962 | Country Strong (2010) | 0.0 |
82229 | 21962 | Premature (2014) | 0.0 |
user = 17568
print(visto.query('userId == @user')['movieId'])
recomendar.query('userId == @user')
279619 Inception (2010)
279620 Easy A (2010)
279621 Men in Black III (M.III.B.) (M.I.B.³) (2012)
279622 Ted (2012)
279623 Cloud Atlas (2012)
279624 Django Unchained (2012)
279625 Elysium (2013)
279626 Wolf of Wall Street, The (2013)
279627 The Lego Movie (2014)
279628 Birdman: Or (The Unexpected Virtue of Ignoranc...
279629 Deadpool (2016)
279630 Big Short, The (2015)
Name: movieId, dtype: object
userId | movieId | rating | |
---|---|---|---|
65420 | 17568 | Dark Knight Rises, The (2012) | 1.0 |
65421 | 17568 | Star Trek Into Darkness (2013) | 0.0 |
65422 | 17568 | Star Trek Into Darkness (2013) | 0.0 |
65423 | 17568 | Furious 7 (2015) | 0.0 |
65424 | 17568 | Beasts of the Southern Wild (2012) | 0.0 |
65425 | 17568 | Stonehearst Asylum (2014) | 0.0 |
65426 | 17568 | The Purge: Election Year (2016) | 0.0 |
65427 | 17568 | Creed II (2018) | 0.0 |
65428 | 17568 | Danny Collins (2015) | 0.0 |
65429 | 17568 | Max Steel (2016) | 0.0 |
user = 63
print(visto.query('userId == @user')['movieId'])
recomendar.query('userId == @user')
623 Easy A (2010)
624 Tangled (2010)
625 Bridesmaids (2011)
626 Horrible Bosses (2011)
627 Crazy, Stupid, Love. (2011)
628 21 Jump Street (2012)
629 Pitch Perfect (2012)
630 Perks of Being a Wallflower, The (2012)
631 Great Gatsby, The (2013)
632 Now You See Me (2013)
633 We're the Millers (2013)
634 About Time (2013)
635 Wolf of Wall Street, The (2013)
636 Gone Girl (2014)
637 Inside Out (2015)
638 Room (2015)
639 Moana (2016)
640 Coco (2017)
Name: movieId, dtype: object
userId | movieId | rating | |
---|---|---|---|
200 | 63 | Spotlight (2015) | 0.0 |
201 | 63 | Twilight Saga: Eclipse, The (2010) | 1.0 |
202 | 63 | Sorcerer's Apprentice, The (2010) | 0.0 |
203 | 63 | Melancholia (2011) | 0.0 |
204 | 63 | Oz the Great and Powerful (2013) | 0.0 |
205 | 63 | Venom (2018) | 0.0 |
206 | 63 | Selma (2014) | 0.0 |
207 | 63 | Burlesque (2010) | 0.0 |
208 | 63 | Silent Hill: Revelation 3D (2012) | 0.0 |
209 | 63 | Double, The (2011) | 0.0 |
user = 162532
print(visto.query('userId == @user')['movieId'])
recomendar.query('userId == @user')
2650878 How to Train Your Dragon (2010)
2650879 Kick-Ass (2010)
2650880 Exit Through the Gift Shop (2010)
2650881 Iron Man 2 (2010)
2650882 Despicable Me (2010)
2650883 Inception (2010)
2650884 Scott Pilgrim vs. the World (2010)
2650885 Social Network, The (2010)
2650886 Easy A (2010)
2650887 Harry Potter and the Deathly Hallows: Part 1 (...
2650888 King's Speech, The (2010)
2650889 Source Code (2011)
2650890 Thor (2011)
2650891 X-Men: First Class (2011)
2650892 Harry Potter and the Deathly Hallows: Part 2 (...
2650893 Captain America: The First Avenger (2011)
2650894 Avengers, The (2012)
2650895 Hugo (2011)
2650896 The Hunger Games (2012)
2650897 Dark Knight Rises, The (2012)
2650898 Sherlock Holmes: A Game of Shadows (2011)
2650899 Intouchables (2011)
2650900 Looper (2012)
2650901 Argo (2012)
2650902 Silver Linings Playbook (2012)
2650903 Hobbit: An Unexpected Journey, The (2012)
2650904 Iron Man 3 (2013)
Name: movieId, dtype: object
userId | movieId | rating | |
---|---|---|---|
607750 | 162532 | Guardians of the Galaxy (2014) | 1.0 |
607751 | 162532 | Only the Brave (2017) | 0.0 |
607752 | 162532 | Immigrant, The (2013) | 0.0 |
607753 | 162532 | Diary of a Wimpy Kid: Rodrick Rules (2011) | 0.0 |
607754 | 162532 | Spy Kids: All the Time in the World in 4D (2011) | 0.0 |
607755 | 162532 | The Belko Experiment (2017) | 0.0 |
607756 | 162532 | All the Way (2016) | 0.0 |
607757 | 162532 | Come Together (2016) | 0.0 |
607758 | 162532 | Batman: Gotham by Gaslight (2018) | 0.0 |
607759 | 162532 | Kizumonogatari Part 1: Tekketsu (2016) | 0.0 |