Skip main navigation

Hurry, only 9 days left to get one year of Unlimited learning for £249.99 £174.99. New subscribers only. T&Cs apply

Find out more

A recommender using deep learning in Python

A recommender using deep learning in Python

Run and modify the following deep learning recommender and share your experiences with others.

from google.colab import drive
import os
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

# Mount Google Drive
if not os.path.exists('/content/drive'):
drive.mount('/content/drive')
# Change directory
os.chdir('drive/MyDrive/recommender-system/code&dataset')

# Load the dataset
ratings = pd.read_csv('ratings.csv', usecols=['userId', 'movieId', 'rating'])
movies = pd.read_csv('movies.csv')

# Merge the ratings and movies dataframes
data = pd.merge(ratings, movies, on='movieId')

# Convert genres into binary labels
genres = set()
for s in data.genres.str.split('|').values:
genres = genres.union(set(s))
genres = sorted(list(genres))
genre_dummies = data['genres'].str.get_dummies().astype(int)
data = pd.concat([data, genre_dummies], axis=1)

# Split the data into training and testing sets
train, test = train_test_split(data, test_size=0.2, random_state=42)

# Convert genres into binary vectors
mlb = MultiLabelBinarizer()
mlb.fit_transform(data['genres'].str.split('|'))
genres_array = mlb.transform(train['genres'].str.split('|'))
test_genres_array = mlb.transform(test['genres'].str.split('|'))

# Define the model
model = Sequential()
model.add(Dense(units=64, input_shape=(len(genres_array[0]),), activation='relu'))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=1, activation='linear'))

# Compile the model
model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))

# Train the model
model.fit(genres_array, train['rating'], epochs=10, batch_size=128, validation_data=(test_genres_array, test['rating']))

# Get list of all genres in the dataset
genres = set('|'.join(movies['genres']).split('|'))

def recommend_movies(userId, topN):
user_ratings = data[['userId', 'movieId', 'rating']][data.userId == userId]
user_unseen_movies = movies[~movies['movieId'].isin(data[data.userId == 1]['movieId'])]
user_unseen_movies = pd.concat([user_unseen_movies, user_unseen_movies.genres.str.get_dummies()], axis=1)
user_unseen_movies = pd.merge(user_unseen_movies, user_ratings, how='left', on=['movieId'])
user_unseen_movies = user_unseen_movies.fillna(0)
user_unseen_movies['rating'] = model.predict(user_unseen_movies[genres])
user_unseen_movies = user_unseen_movies.sort_values(by='rating', ascending=False)
return user_unseen_movies.head(topN)['title'].values

This article is from the free online

Recommender Systems in Python

Created by
FutureLearn - Learning For Life

Reach your personal and professional goals

Unlock access to hundreds of expert online courses and degrees from top universities and educators to gain accredited qualifications and professional CV-building certificates.

Join over 18 million learners to launch, switch or build upon your career, all at your own pace, across a wide range of topic areas.

Start Learning now