Skip main navigation

A User-Based CF Recommender in Python

A User-Based CF Recommender in Python
Run and modify the following code and share your experiences.

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
pd.set_option('display.width', None)

# 0 Load the movies and ratings datasets into two data frames.
movies = pd.read_csv('movies.csv', usecols=['movieId', 'title'])
ratings = pd.read_csv('ratings.csv', usecols=['userId', 'movieId', 'rating'])

# 0 Merge two data frames on the movie ID.
movie_rating = movies.merge(ratings, on='movieId')

# 1 Create a pivot table where rows are users and columns are movies
user_movie_rating = movie_rating.pivot_table(
index = "userId",columns = "title",values = "rating")

# 1 Replace missing values with zeros.
user_movie_rating.fillna(0, inplace=True)

# 2. Compute the cosine similarity matrix between users
user_sim_matrix = cosine_similarity(user_movie_rating)

# Define a function to get the top-k most similar users to a given user
def get_top_similar_users(user_id, k=10):
# Get the similarity scores for the given user
user_sim_scores = list(enumerate(user_sim_matrix[user_id]))
# Sort the list of similarity scores in descending order
user_sim_scores = sorted(user_sim_scores, key=lambda x: x[1], reverse=True)
# Return the top-k most similar users
return user_sim_scores[1:k+1]

# Define a function to generate recommendations for a given user
def recommend_movies(user_id, k=10):
# Get the top-k most similar users to the given user
similar_users = get_top_similar_users(user_id, k)
# Create an empty dictionary to store movie recommendations and their scores
recommendations = {}
# Loop over the top-k most similar users
for user in similar_users:
# Get the movies rated by the current user
user_movies = user_movie_rating.loc[user[0]]
# Loop over the movies rated by the current user
for movie_id, rating in user_movies.items():
# Skip movies the current user has already rated
if rating == 0:
# Add the movie to the recommendations dictionary if it hasn't been recommended before
if movie_id not in recommendations:
recommendations[movie_id] = 0
# Update the score of the movie based on the similarity between the current user and the given user
recommendations[movie_id] += user[1]
# Sort the recommendations dictionary in descending order by score and return the top-k movies
return sorted(recommendations.items(), key=lambda x: x[1], reverse=True)[:k]

user_movie_rating.iloc[:, 0:4].sample(10)
user_movie_rating[['Ice Age (2002)', 'Crimson Tide (1995)']].sample(10)
user_movie_rating[['Forrest Gump (1994)', 'Pulp Fiction (1994)', 'Fight Club (1999)']].sample(10)
This article is from the free online

Recommender Systems in Python

Created by
FutureLearn - Learning For Life

Reach your personal and professional goals

Unlock access to hundreds of expert online courses and degrees from top universities and educators to gain accredited qualifications and professional CV-building certificates.

Join over 18 million learners to launch, switch or build upon your career, all at your own pace, across a wide range of topic areas.

Start Learning now