-
Notifications
You must be signed in to change notification settings - Fork 0
/
recommendation_system.py
50 lines (44 loc) · 2.51 KB
/
recommendation_system.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
class Recommendation():
"""Recommendation class"""
def __init__(self):
"""Class constructor to initialize component"""
url='https://drive.google.com/file/d/1cW_pujQGUR2WqwbqiJU3knd8KNlh_z5p/view?usp=sharing'
url='https://drive.google.com/uc?id=' + url.split('/')[-2]
self._df_movies = pd.read_csv(url)
features = ['genres', 'vote_average', 'overview']
for feature in features:
self._df_movies[feature] = self._df_movies[feature].fillna('')
self._df_movies["combined_features"] = self._df_movies['genres'].str.lower() + ' ' \
+ str(self._df_movies['vote_average']) + ' ' \
+ self._df_movies['overview'].str.lower()
self._tfidf = TfidfVectorizer()
self._tfidf_matrix = self._tfidf.fit_transform(self._df_movies['combined_features'])
def recommendation(self, title=''):
"""Get a movie recommendation based on a movie title.
Args:
title (str): Movie title.
Returns:
str: Dictionary with the movies recommended.
"""
title = title.lower()
movie_index = self._df_movies[self._df_movies['title'].str.lower() == title].index.values
if len(movie_index) > 0:
similarity_scores = cosine_similarity(self._tfidf_matrix[movie_index],
self._tfidf_matrix)
similar_movies_index = similarity_scores.argsort()[0][-6:][::-1]
similar_movies = self._df_movies.iloc[similar_movies_index][['title', 'vote_average']]
# Check that the same movie is not in the list, otherwise delete it
movie_index = similar_movies[similar_movies['title'].str.lower() == title].index.values
similar_movies.sort_values(by='vote_average', ascending=False, inplace=True)
if len(movie_index) > 0:
similar_movies.drop(index=movie_index, inplace=True)
else:
similar_movies.drop(similar_movies.index[-1], inplace=True)
movie_list_str = ''
for _, element in similar_movies.iterrows():
movie_list_str += element['title'] + ' ' + str(element['vote_average']) + '\r\n'
return {'lista recomendada': movie_list_str}
return {'lista recomendada': f'No title were found for {title}'}