Skip to content

Commit

Permalink
merge: branch 'etl_branch' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
fedepacher committed Jun 9, 2023
2 parents e024e00 + b1b1cc6 commit c3953ef
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 39 deletions.
38 changes: 37 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,37 @@
# RecomendationML
<p align=center><img src=_src/assets/movies_cover.png><p>

# <h1 align=center> **Movies Recomendation using Machine Learning** </h1>

## Introduction

A movie recommendation system, or a movie recommender system, is an ML-based approach to filtering or predicting the
users’ film preferences based on their past choices and behavior. It’s an advanced filtration mechanism that predicts
the possible movie choices of the concerned user and their preferences towards a domain-specific item, aka movie.

## About this project

This is a project for the **SoyHenry** academy.


## Workflow

For the project is used Github Action Project, you can access this project in the following [link](https://github.com/users/fedepacher/projects/2).<br>
This project will follow the conventional commits of the following [link](https://github.com/fedepacher/RecomendationML/wiki/Conventional-Commits).<br>
Each task will be divided into an issue. [Here](https://github.com/fedepacher/RecomendationML/issues) you can find all the issues available at the moment.<br>
The scheduled task timetable can be access in the following [link](https://github.com/users/fedepacher/projects/2/views/2).<br><br>
The following image shows the workflow proposed for this project:<br>

<p align=center><img src=_src/assets/workflow.png><p>


## Data Engineering ETL

For this task it has created the following [issue](https://github.com/fedepacher/RecomendationML/issues/1) where you can find the description of the task to solve.

## API development

For this task it has created the following [issue](https://github.com/fedepacher/RecomendationML/issues/2) where you can find the description of the task to solve.<br>
It has the following [issue](https://github.com/fedepacher/RecomendationML/issues/3) related.
## Deployment

To deploy the environment it has ussed the following [tutorial](https://github.com/HX-FNegrete/render-fastapi-tutorial). The related issue can be found [here](https://github.com/fedepacher/RecomendationML/issues/4).
Binary file added _src/assets/movies_cover.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added _src/assets/workflow.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
80 changes: 42 additions & 38 deletions movies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


class Movies():
"""Class that contains all the functions that the API needs."""

def __init__(self):
"""Constructor"""
Expand All @@ -26,12 +27,12 @@ def get_count_movies_month(self, month=''):
'septiembre': '9', 'setiembre': '9', 'octubre': '10',
'noviembre': '11', 'diciembre': '12'}

if month.lower() not in valid_months:
return {'message': f'Month not exists: {month}'}
else:
condition = self._df_movies['release_date'].dt.strftime('%m') == valid_months.get(month.lower())
return {'message': f'{self._df_movies[condition]["title"].count()} movies were released on ' \
f'{month}'}
if month.lower() in valid_months:
variable = valid_months.get(month.lower())
condition = self._df_movies['release_date'].dt.strftime('%m') == variable
return {'month': month, 'amount': self._df_movies[condition]["title"].count()}

return {'message': f'Month not exists: {month}'}


def get_count_movies_day(self, day=''):
Expand All @@ -46,13 +47,11 @@ def get_count_movies_day(self, day=''):
valid_days = {'lunes': 0, 'martes': 1, 'miercoles': 2, 'jueves': 3,
'viernes': 4, 'sabado': 5, 'domingo': 6}

if day.lower() not in valid_days:
return {'message': f'Mes no existente: {day}'}
else:
variable = valid_days.get(day.lower())
if day.lower() in valid_days:
condition = self._df_movies['release_date'].dt.dayofweek == valid_days.get(day.lower())
return {'message': f'{self._df_movies[condition]["title"].count()} movies were released on '\
f'{day}'}
return {'day': day, 'amount': self._df_movies[condition]["title"].count()}

return {'message': f'Mes no existente: {day}'}


def get_score_title(self, title=''):
Expand All @@ -68,11 +67,11 @@ def get_score_title(self, title=''):
index = df_aux[df_aux == title.lower()].index
if len(index.values) > 0:
df_aux = self._df_movies.iloc[index][['title', 'release_year', 'popularity']]
return {'message': f'The movie {df_aux["title"].values[0]} was released on ' \
f'{df_aux["release_year"].values[0]} with a popularity ' \
f'of {df_aux["popularity"].values[0].round(1)}'}
else:
return {'message': f'Movie `{title}` not found'}
return {'title': df_aux["title"].values[0],
'year': df_aux["release_year"].values[0],
'popularity': df_aux["popularity"].values[0].round(1)}

return {'message': f'Movie `{title}` not found'}


def get_votes_title(self, title=''):
Expand All @@ -87,15 +86,17 @@ def get_votes_title(self, title=''):
df_aux = self._df_movies['title'].str.lower()
index = df_aux[df_aux == title.lower()].index
if len(index.values) > 0:
df_aux = self._df_movies.iloc[index][['title', 'release_year', 'vote_count', 'vote_average']]
df_aux = self._df_movies.iloc[index][['title', 'release_year', 'vote_count',
'vote_average']]
if df_aux["vote_count"].values[0] >= 2000:
return {'message': f'The movie {df_aux["title"].values[0]} was released on ' \
f'{df_aux["release_year"].values[0]} with {int(df_aux["vote_count"].values[0])} ' \
f'votes and {df_aux["vote_average"].values[0].round(1)} of votes averages'}
else:
return {'message': f'Movie `{title}` has not enough votes'}
else:
return {'message': f'Movie `{title}` not found'}
return {'title': df_aux["title"].values[0],
'year': df_aux["release_year"].values[0],
'total_votes': int(df_aux["vote_count"].values[0]),
'average_votes': df_aux["vote_average"].values[0].round(1)}

return {'message': f'Movie `{title}` has not enough votes'}

return {'message': f'Movie `{title}` not found'}


def get_actor(self, actor=''):
Expand All @@ -121,11 +122,12 @@ def get_actor(self, actor=''):
return_list = [self._df_movies['return'].iloc[ret] for ret in index_list]
max_value = max(return_list)
index_max_return = index_list[return_list.index(max_value)]
return {'message': f'The actor {actor} has starred in {movies_count} films and has got '\
f'a maximun return of {self._df_movies.iloc[index_max_return]["return"].round(1)} '\
f'with an average of {ret_mean} per film.'}
else:
return {'message': f'Actor `{actor}` not found'}
return {'actor': actor,
'movie_count': movies_count,
'max_return': self._df_movies.iloc[index_max_return]["return"].round(1),
'average_return': ret_mean}

return {'message': f'Actor `{actor}` not found'}


def get_director(self, director=''):
Expand All @@ -148,11 +150,13 @@ def get_director(self, director=''):
max_value = max(ret_list)

index_var = index_list[ret_list.index(max_value)]
text = ''
for m, d, ret, rev, c in zip(m_list, d_list, ret_list, rev_list, c_list):
text += f'Movie: {m}, date: {d}, return: {ret.round(1)}, revenue: {rev}, cost: {c}\n'
return {'message': f'The success of {director} was `{self._df_movies.iloc[index_var]["title"]}` ' \
f'with a return of {self._df_movies.iloc[index_var]["return"].round(1)}.\n'\
f'The movie list is:\n{text}'}
else:
return {'message': f'Director `{director}` not found'}
return {'director': director,
'max_return_title': self._df_movies.iloc[index_var]["title"],
'max_return': self._df_movies.iloc[index_var]["return"].round(1),
'movies': m_list,
'year': d_list,
'return_movie': ret_list,
'budget_movie': c_list,
'revenue_movie': rev_list}

return {'message': f'Director `{director}` not found'}

0 comments on commit c3953ef

Please sign in to comment.