merge: branch 'etl_branch' into main

fedepacher · Jun 9, 2023 · c3953ef · c3953ef
2 parents e024e00 + b1b1cc6
commit c3953ef
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 39 deletions.
diff --git a/README.md b/README.md
@@ -1 +1,37 @@
-# RecomendationML
+<p align=center><img src=_src/assets/movies_cover.png><p>
+
+# <h1 align=center> **Movies Recomendation using Machine Learning** </h1>
+
+## Introduction
+
+A movie recommendation system, or a movie recommender system, is an ML-based approach to filtering or predicting the
+users’ film preferences based on their past choices and behavior. It’s an advanced filtration mechanism that predicts
+the possible movie choices of the concerned user and their preferences towards a domain-specific item, aka movie.
+
+## About this project
+
+This is a project for the **SoyHenry** academy.
+
+
+## Workflow
+
+For the project is used Github Action Project, you can access this project in the following [link](https://github.com/users/fedepacher/projects/2).<br>
+This project will follow the conventional commits of the following [link](https://github.com/fedepacher/RecomendationML/wiki/Conventional-Commits).<br>
+Each task will be divided into an issue. [Here](https://github.com/fedepacher/RecomendationML/issues) you can find all the issues available at the moment.<br>
+The scheduled task timetable can be access in the following [link](https://github.com/users/fedepacher/projects/2/views/2).<br><br>
+The following image shows the workflow proposed for this project:<br>
+
+<p align=center><img src=_src/assets/workflow.png><p>
+
+
+## Data Engineering ETL
+
+For this task it has created the following [issue](https://github.com/fedepacher/RecomendationML/issues/1) where you can find the description of the task to solve.
+
+## API development
+
+For this task it has created the following [issue](https://github.com/fedepacher/RecomendationML/issues/2) where you can find the description of the task to solve.<br>
+It has the following [issue](https://github.com/fedepacher/RecomendationML/issues/3) related.
+## Deployment
+
+To deploy the environment it has ussed the following [tutorial](https://github.com/HX-FNegrete/render-fastapi-tutorial). The related issue can be found [here](https://github.com/fedepacher/RecomendationML/issues/4).
diff --git a/_src/assets/movies_cover.png b/_src/assets/movies_cover.png
diff --git a/_src/assets/workflow.png b/_src/assets/workflow.png
diff --git a/movies.py b/movies.py
@@ -2,6 +2,7 @@
 
 
 class Movies():
+    """Class that contains all the functions that the API needs."""
 
     def __init__(self):
         """Constructor"""
@@ -26,12 +27,12 @@ def get_count_movies_month(self, month=''):
                         'septiembre': '9', 'setiembre': '9', 'octubre': '10',
                         'noviembre': '11', 'diciembre': '12'}
 
-        if month.lower() not in valid_months:
-            return {'message': f'Month not exists: {month}'}
-        else:
-            condition = self._df_movies['release_date'].dt.strftime('%m') == valid_months.get(month.lower())
-            return {'message': f'{self._df_movies[condition]["title"].count()} movies were released on ' \
-                               f'{month}'}
+        if month.lower() in valid_months:
+            variable = valid_months.get(month.lower())
+            condition = self._df_movies['release_date'].dt.strftime('%m') == variable
+            return {'month': month, 'amount': self._df_movies[condition]["title"].count()}
+
+        return {'message': f'Month not exists: {month}'}
 
 
     def get_count_movies_day(self, day=''):
@@ -46,13 +47,11 @@ def get_count_movies_day(self, day=''):
         valid_days = {'lunes': 0, 'martes': 1, 'miercoles': 2, 'jueves': 3,
                       'viernes': 4, 'sabado': 5, 'domingo': 6}
 
-        if day.lower() not in valid_days:
-            return {'message': f'Mes no existente: {day}'}
-        else:
-            variable = valid_days.get(day.lower())
+        if day.lower() in valid_days:
             condition = self._df_movies['release_date'].dt.dayofweek == valid_days.get(day.lower())
-            return {'message': f'{self._df_movies[condition]["title"].count()} movies were released on '\
-                               f'{day}'}
+            return {'day': day, 'amount': self._df_movies[condition]["title"].count()}
+
+        return {'message': f'Mes no existente: {day}'}
 
 
     def get_score_title(self, title=''):
@@ -68,11 +67,11 @@ def get_score_title(self, title=''):
         index = df_aux[df_aux == title.lower()].index
         if len(index.values) > 0:
             df_aux = self._df_movies.iloc[index][['title', 'release_year', 'popularity']]
-            return {'message': f'The movie {df_aux["title"].values[0]} was released on ' \
-                    f'{df_aux["release_year"].values[0]} with a popularity ' \
-                    f'of {df_aux["popularity"].values[0].round(1)}'}
-        else:
-            return {'message': f'Movie `{title}` not found'}
+            return {'title': df_aux["title"].values[0],
+                    'year': df_aux["release_year"].values[0],
+                    'popularity': df_aux["popularity"].values[0].round(1)}
+
+        return {'message': f'Movie `{title}` not found'}
 
 
     def get_votes_title(self, title=''):
@@ -87,15 +86,17 @@ def get_votes_title(self, title=''):
         df_aux = self._df_movies['title'].str.lower()
         index = df_aux[df_aux == title.lower()].index
         if len(index.values) > 0:
-            df_aux = self._df_movies.iloc[index][['title', 'release_year', 'vote_count', 'vote_average']]
+            df_aux = self._df_movies.iloc[index][['title', 'release_year', 'vote_count',
+                                                  'vote_average']]
             if df_aux["vote_count"].values[0] >= 2000:
-                return {'message': f'The movie {df_aux["title"].values[0]} was released on ' \
-                        f'{df_aux["release_year"].values[0]} with {int(df_aux["vote_count"].values[0])} ' \
-                        f'votes and {df_aux["vote_average"].values[0].round(1)} of votes averages'}
-            else:
-                return {'message': f'Movie `{title}` has not enough votes'}
-        else:
-            return {'message': f'Movie `{title}` not found'}
+                return {'title': df_aux["title"].values[0],
+                        'year': df_aux["release_year"].values[0],
+                        'total_votes': int(df_aux["vote_count"].values[0]),
+                        'average_votes': df_aux["vote_average"].values[0].round(1)}
+
+            return {'message': f'Movie `{title}` has not enough votes'}
+
+        return {'message': f'Movie `{title}` not found'}
 
 
     def get_actor(self, actor=''):
@@ -121,11 +122,12 @@ def get_actor(self, actor=''):
             return_list = [self._df_movies['return'].iloc[ret] for ret in index_list]
             max_value = max(return_list)
             index_max_return = index_list[return_list.index(max_value)]
-            return {'message': f'The actor {actor} has starred in {movies_count} films and has got '\
-                               f'a maximun return of {self._df_movies.iloc[index_max_return]["return"].round(1)} '\
-                               f'with an average of {ret_mean} per film.'}
-        else:
-            return {'message': f'Actor `{actor}` not found'}
+            return {'actor': actor,
+                    'movie_count': movies_count,
+                    'max_return': self._df_movies.iloc[index_max_return]["return"].round(1),
+                    'average_return': ret_mean}
+
+        return {'message': f'Actor `{actor}` not found'}
 
 
     def get_director(self, director=''):
@@ -148,11 +150,13 @@ def get_director(self, director=''):
             max_value = max(ret_list)
 
             index_var = index_list[ret_list.index(max_value)]
-            text = ''
-            for m, d, ret, rev, c in zip(m_list, d_list, ret_list, rev_list, c_list):
-                text += f'Movie: {m}, date: {d}, return: {ret.round(1)}, revenue: {rev}, cost: {c}\n'
-            return {'message': f'The success of {director} was `{self._df_movies.iloc[index_var]["title"]}` ' \
-                               f'with a return of {self._df_movies.iloc[index_var]["return"].round(1)}.\n'\
-                               f'The movie list is:\n{text}'}
-        else:
-            return {'message': f'Director `{director}` not found'}
+            return {'director': director,
+                    'max_return_title': self._df_movies.iloc[index_var]["title"],
+                    'max_return': self._df_movies.iloc[index_var]["return"].round(1),
+                    'movies': m_list,
+                    'year': d_list,
+                    'return_movie': ret_list,
+                    'budget_movie': c_list,
+                    'revenue_movie': rev_list}
+
+        return {'message': f'Director `{director}` not found'}