diff --git a/README.md b/README.md index ba28763..478c30c 100644 --- a/README.md +++ b/README.md @@ -1 +1,37 @@ -# RecomendationML \ No newline at end of file +

+ +#

**Movies Recomendation using Machine Learning**

+ +## Introduction + +A movie recommendation system, or a movie recommender system, is an ML-based approach to filtering or predicting the +users’ film preferences based on their past choices and behavior. It’s an advanced filtration mechanism that predicts +the possible movie choices of the concerned user and their preferences towards a domain-specific item, aka movie. + +## About this project + +This is a project for the **SoyHenry** academy. + + +## Workflow + +For the project is used Github Action Project, you can access this project in the following [link](https://github.com/users/fedepacher/projects/2).
+This project will follow the conventional commits of the following [link](https://github.com/fedepacher/RecomendationML/wiki/Conventional-Commits).
+Each task will be divided into an issue. [Here](https://github.com/fedepacher/RecomendationML/issues) you can find all the issues available at the moment.
+The scheduled task timetable can be access in the following [link](https://github.com/users/fedepacher/projects/2/views/2).

+The following image shows the workflow proposed for this project:
+ +

+ + +## Data Engineering ETL + +For this task it has created the following [issue](https://github.com/fedepacher/RecomendationML/issues/1) where you can find the description of the task to solve. + +## API development + +For this task it has created the following [issue](https://github.com/fedepacher/RecomendationML/issues/2) where you can find the description of the task to solve.
+It has the following [issue](https://github.com/fedepacher/RecomendationML/issues/3) related. +## Deployment + +To deploy the environment it has ussed the following [tutorial](https://github.com/HX-FNegrete/render-fastapi-tutorial). The related issue can be found [here](https://github.com/fedepacher/RecomendationML/issues/4). diff --git a/_src/assets/movies_cover.png b/_src/assets/movies_cover.png new file mode 100644 index 0000000..6b1982e Binary files /dev/null and b/_src/assets/movies_cover.png differ diff --git a/_src/assets/workflow.png b/_src/assets/workflow.png new file mode 100644 index 0000000..3f8a269 Binary files /dev/null and b/_src/assets/workflow.png differ diff --git a/movies.py b/movies.py index 411a049..5907ef9 100644 --- a/movies.py +++ b/movies.py @@ -2,6 +2,7 @@ class Movies(): + """Class that contains all the functions that the API needs.""" def __init__(self): """Constructor""" @@ -26,12 +27,12 @@ def get_count_movies_month(self, month=''): 'septiembre': '9', 'setiembre': '9', 'octubre': '10', 'noviembre': '11', 'diciembre': '12'} - if month.lower() not in valid_months: - return {'message': f'Month not exists: {month}'} - else: - condition = self._df_movies['release_date'].dt.strftime('%m') == valid_months.get(month.lower()) - return {'message': f'{self._df_movies[condition]["title"].count()} movies were released on ' \ - f'{month}'} + if month.lower() in valid_months: + variable = valid_months.get(month.lower()) + condition = self._df_movies['release_date'].dt.strftime('%m') == variable + return {'month': month, 'amount': self._df_movies[condition]["title"].count()} + + return {'message': f'Month not exists: {month}'} def get_count_movies_day(self, day=''): @@ -46,13 +47,11 @@ def get_count_movies_day(self, day=''): valid_days = {'lunes': 0, 'martes': 1, 'miercoles': 2, 'jueves': 3, 'viernes': 4, 'sabado': 5, 'domingo': 6} - if day.lower() not in valid_days: - return {'message': f'Mes no existente: {day}'} - else: - variable = valid_days.get(day.lower()) + if day.lower() in valid_days: condition = self._df_movies['release_date'].dt.dayofweek == valid_days.get(day.lower()) - return {'message': f'{self._df_movies[condition]["title"].count()} movies were released on '\ - f'{day}'} + return {'day': day, 'amount': self._df_movies[condition]["title"].count()} + + return {'message': f'Mes no existente: {day}'} def get_score_title(self, title=''): @@ -68,11 +67,11 @@ def get_score_title(self, title=''): index = df_aux[df_aux == title.lower()].index if len(index.values) > 0: df_aux = self._df_movies.iloc[index][['title', 'release_year', 'popularity']] - return {'message': f'The movie {df_aux["title"].values[0]} was released on ' \ - f'{df_aux["release_year"].values[0]} with a popularity ' \ - f'of {df_aux["popularity"].values[0].round(1)}'} - else: - return {'message': f'Movie `{title}` not found'} + return {'title': df_aux["title"].values[0], + 'year': df_aux["release_year"].values[0], + 'popularity': df_aux["popularity"].values[0].round(1)} + + return {'message': f'Movie `{title}` not found'} def get_votes_title(self, title=''): @@ -87,15 +86,17 @@ def get_votes_title(self, title=''): df_aux = self._df_movies['title'].str.lower() index = df_aux[df_aux == title.lower()].index if len(index.values) > 0: - df_aux = self._df_movies.iloc[index][['title', 'release_year', 'vote_count', 'vote_average']] + df_aux = self._df_movies.iloc[index][['title', 'release_year', 'vote_count', + 'vote_average']] if df_aux["vote_count"].values[0] >= 2000: - return {'message': f'The movie {df_aux["title"].values[0]} was released on ' \ - f'{df_aux["release_year"].values[0]} with {int(df_aux["vote_count"].values[0])} ' \ - f'votes and {df_aux["vote_average"].values[0].round(1)} of votes averages'} - else: - return {'message': f'Movie `{title}` has not enough votes'} - else: - return {'message': f'Movie `{title}` not found'} + return {'title': df_aux["title"].values[0], + 'year': df_aux["release_year"].values[0], + 'total_votes': int(df_aux["vote_count"].values[0]), + 'average_votes': df_aux["vote_average"].values[0].round(1)} + + return {'message': f'Movie `{title}` has not enough votes'} + + return {'message': f'Movie `{title}` not found'} def get_actor(self, actor=''): @@ -121,11 +122,12 @@ def get_actor(self, actor=''): return_list = [self._df_movies['return'].iloc[ret] for ret in index_list] max_value = max(return_list) index_max_return = index_list[return_list.index(max_value)] - return {'message': f'The actor {actor} has starred in {movies_count} films and has got '\ - f'a maximun return of {self._df_movies.iloc[index_max_return]["return"].round(1)} '\ - f'with an average of {ret_mean} per film.'} - else: - return {'message': f'Actor `{actor}` not found'} + return {'actor': actor, + 'movie_count': movies_count, + 'max_return': self._df_movies.iloc[index_max_return]["return"].round(1), + 'average_return': ret_mean} + + return {'message': f'Actor `{actor}` not found'} def get_director(self, director=''): @@ -148,11 +150,13 @@ def get_director(self, director=''): max_value = max(ret_list) index_var = index_list[ret_list.index(max_value)] - text = '' - for m, d, ret, rev, c in zip(m_list, d_list, ret_list, rev_list, c_list): - text += f'Movie: {m}, date: {d}, return: {ret.round(1)}, revenue: {rev}, cost: {c}\n' - return {'message': f'The success of {director} was `{self._df_movies.iloc[index_var]["title"]}` ' \ - f'with a return of {self._df_movies.iloc[index_var]["return"].round(1)}.\n'\ - f'The movie list is:\n{text}'} - else: - return {'message': f'Director `{director}` not found'} + return {'director': director, + 'max_return_title': self._df_movies.iloc[index_var]["title"], + 'max_return': self._df_movies.iloc[index_var]["return"].round(1), + 'movies': m_list, + 'year': d_list, + 'return_movie': ret_list, + 'budget_movie': c_list, + 'revenue_movie': rev_list} + + return {'message': f'Director `{director}` not found'}