From b7cfc1cdb18e0b8d16130f97b9019423efc5b3f3 Mon Sep 17 00:00:00 2001 From: Felipe Allegretti Date: Wed, 1 Nov 2023 17:53:26 -0300 Subject: [PATCH] Docstrings improvement (#43) * Setup Interrogate check * Create docstrings --- .github/workflows/check.yml | 6 +- app/api/endpoints/clubs.py | 6 +- app/api/endpoints/competitions.py | 4 +- app/services/base.py | 103 +++++++++++++++++++++++++++ app/services/clubs/players.py | 31 +++++++- app/services/clubs/profile.py | 22 +++++- app/services/clubs/search.py | 27 ++++++- app/services/competitions/clubs.py | 27 ++++++- app/services/competitions/search.py | 28 +++++++- app/services/players/market_value.py | 29 +++++++- app/services/players/profile.py | 21 +++++- app/services/players/search.py | 27 ++++++- app/services/players/stats.py | 26 ++++++- app/services/players/transfers.py | 25 ++++++- app/utils/utils.py | 79 ++++++++++++++++++++ poetry.lock | 63 +++++++++++++++- pyproject.toml | 8 ++- 17 files changed, 510 insertions(+), 22 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 93e4e54..5d73a04 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -1,4 +1,4 @@ -name: Linter Check +name: Code Check on: push: branches: [ main ] @@ -19,3 +19,7 @@ jobs: run: | pip install black black --check . + - name: interrogate + run: | + pip install interrogate + interrogate app/services -vv diff --git a/app/api/endpoints/clubs.py b/app/api/endpoints/clubs.py index 7f3645d..642a461 100644 --- a/app/api/endpoints/clubs.py +++ b/app/api/endpoints/clubs.py @@ -10,21 +10,21 @@ @router.get("/search/{club_name}") -def search_clubs(club_name: str, page_number: Optional[int] = 1): +def search_clubs(club_name: str, page_number: Optional[int] = 1) -> dict: tfmkt = TransfermarktClubSearch(query=club_name, page_number=page_number) found_clubs = tfmkt.search_clubs() return found_clubs @router.get("/{club_id}/profile") -def get_club_profile(club_id: str): +def get_club_profile(club_id: str) -> dict: tfmkt = TransfermarktClubProfile(club_id=club_id) club_profile = tfmkt.get_club_profile() return club_profile @router.get("/{club_id}/players") -def get_club_players(club_id: str, season_id: Optional[str] = None): +def get_club_players(club_id: str, season_id: Optional[str] = None) -> dict: tfmkt = TransfermarktClubPlayers(club_id=club_id, season_id=season_id) club_players = tfmkt.get_club_players() return club_players diff --git a/app/api/endpoints/competitions.py b/app/api/endpoints/competitions.py index 47634f0..abdd1fb 100644 --- a/app/api/endpoints/competitions.py +++ b/app/api/endpoints/competitions.py @@ -9,14 +9,14 @@ @router.get("/search/{competition_name}") -def search_competitions(competition_name: str, page_number: Optional[int] = 1): +def search_competitions(competition_name: str, page_number: Optional[int] = 1) -> dict: tfmkt = TransfermarktCompetitionSearch(query=competition_name, page_number=page_number) competitions = tfmkt.search_competitions() return competitions @router.get("/{competition_id}/clubs") -def get_competition_clubs(competition_id: str, season_id: Optional[str] = None): +def get_competition_clubs(competition_id: str, season_id: Optional[str] = None) -> dict: tfmkt = TransfermarktCompetitionClubs(competition_id=competition_id, season_id=season_id) competition_clubs = tfmkt.get_competition_clubs() return competition_clubs diff --git a/app/services/base.py b/app/services/base.py index 39f5cb9..b5cbd2a 100644 --- a/app/services/base.py +++ b/app/services/base.py @@ -14,11 +14,35 @@ @dataclass class TransfermarktBase: + """ + Base class for making HTTP requests to Transfermarkt and extracting data from the web pages. + + Args: + URL (str): The URL for the web page to be fetched. + Attributes: + page (ElementTree): The parsed web page content. + response (dict): A dictionary to store the response data. + """ + URL: str page: ElementTree = field(default_factory=lambda: None, init=False) response: dict = field(default_factory=lambda: {}, init=False) def make_request(self, url: Optional[str] = None) -> Response: + """ + Make an HTTP GET request to the specified URL. + + Args: + url (str, optional): The URL to make the request to. If not provided, the class's URL + attribute will be used. + + Returns: + Response: An HTTP Response object containing the server's response to the request. + + Raises: + HTTPException: If there are too many redirects, or if the server returns a client or + server error status code. + """ url = self.URL if not url else url try: response: Response = requests.get( @@ -47,22 +71,73 @@ def make_request(self, url: Optional[str] = None) -> Response: return response def request_url_bsoup(self) -> BeautifulSoup: + """ + Fetch the web page content and parse it using BeautifulSoup. + + Returns: + BeautifulSoup: A BeautifulSoup object representing the parsed web page content. + + Raises: + HTTPException: If there are too many redirects, or if the server returns a client or + server error status code. + """ response: Response = self.make_request() return BeautifulSoup(markup=response.content, features="html.parser") @staticmethod def convert_bsoup_to_page(bsoup: BeautifulSoup) -> ElementTree: + """ + Convert a BeautifulSoup object to an ElementTree. + + Args: + bsoup (BeautifulSoup): The BeautifulSoup object representing the parsed web page content. + + Returns: + ElementTree: An ElementTree representing the parsed web page content for further processing. + """ return etree.HTML(str(bsoup)) def request_url_page(self) -> ElementTree: + """ + Fetch the web page content, parse it using BeautifulSoup, and convert it to an ElementTree. + + Returns: + ElementTree: An ElementTree representing the parsed web page content for further + processing. + + Raises: + HTTPException: If there are too many redirects, or if the server returns a client or + server error status code. + """ bsoup: BeautifulSoup = self.request_url_bsoup() return self.convert_bsoup_to_page(bsoup=bsoup) def raise_exception_if_not_found(self, xpath: str): + """ + Raise an exception if the specified XPath does not yield any results on the web page. + + Args: + xpath (str): The XPath expression to query elements on the page. + + Raises: + HTTPException: If the specified XPath query does not yield any results, indicating an invalid request. + """ if not self.get_text_by_xpath(xpath): raise HTTPException(status_code=404, detail=f"Invalid request (url: {self.URL})") def get_list_by_xpath(self, xpath: str, remove_empty: Optional[bool] = True) -> Optional[list]: + """ + Extract a list of elements from the web page using the specified XPath expression. + + Args: + xpath (str): The XPath expression to query elements on the page. + remove_empty (bool, optional): If True, remove empty or whitespace-only elements from + the list. Default is True. + + Returns: + Optional[list]: A list of elements extracted from the web page based on the XPath query. + If remove_empty is True, empty or whitespace-only elements are filtered out. + """ elements: list = self.page.xpath(xpath) if remove_empty: elements_valid: list = [trim(e) for e in elements if trim(e)] @@ -79,6 +154,25 @@ def get_text_by_xpath( iloc_to: Optional[int] = None, join_str: Optional[str] = None, ) -> Optional[str]: + """ + Extract text content from the web page using the specified XPath expression. + + Args: + xpath (str): The XPath expression to query elements on the page. + pos (int, optional): Index of the element to extract if multiple elements match the + XPath. Default is 0. + iloc (int, optional): Extract a single element by index, used as an alternative to 'pos'. + iloc_from (int, optional): Extract a range of elements starting from the specified + index (inclusive). + iloc_to (int, optional): Extract a range of elements up to the specified + index (exclusive). + join_str (str, optional): If provided, join multiple text elements into a single string + using this separator. + + Returns: + Optional[str]: The extracted text content from the web page based on the XPath query and + optional parameters. If no matching element is found, None is returned. + """ element = self.page.xpath(xpath) if not element: @@ -108,6 +202,15 @@ def get_text_by_xpath( return None def get_search_last_page_number(self, xpath_base: str) -> int: + """ + Retrieve the last page number for search results based on the provided base XPath. + + Args: + xpath_base (str): The base XPath for extracting page number information. + + Returns: + int: The last page number for search results. Returns 1 if no page numbers are found. + """ url_page_number_last = self.get_text_by_xpath(xpath_base + Commons.Search.PAGE_NUMBER_LAST) url_page_number_active = self.get_text_by_xpath(xpath_base + Commons.Search.PAGE_NUMBER_ACTIVE) diff --git a/app/services/clubs/players.py b/app/services/clubs/players.py index d43b918..6282291 100644 --- a/app/services/clubs/players.py +++ b/app/services/clubs/players.py @@ -9,11 +9,21 @@ @dataclass class TransfermarktClubPlayers(TransfermarktBase): + """ + A class for retrieving and parsing the players of a football club from Transfermarkt. + + Args: + club_id (str): The unique identifier of the football club. + season_id (str): The unique identifier of the season. + URL (str): The URL template for the club's players page on Transfermarkt. + """ + club_id: str = None season_id: str = None URL: str = "https://www.transfermarkt.com/-/kader/verein/{club_id}/saison_id/{season_id}/plus/1" - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktClubPlayers class.""" self.URL = self.URL.format(club_id=self.club_id, season_id=self.season_id) self.page = self.request_url_page() self.raise_exception_if_not_found(xpath=Clubs.Players.CLUB_NAME) @@ -21,13 +31,21 @@ def __post_init__(self): self.__update_past_flag() def __update_season_id(self): + """Update the season ID if it's not provided by extracting it from the website.""" if self.season_id is None: self.season_id = extract_from_url(self.get_text_by_xpath(Clubs.Players.CLUB_URL), "season_id") - def __update_past_flag(self): + def __update_past_flag(self) -> None: + """Check if the season is the current or if it's a past one and update the flag accordingly.""" self.past = "Current club" in self.get_list_by_xpath(Clubs.Players.PAST_FLAG) - def __parse_club_players(self) -> list: + def __parse_club_players(self) -> list[dict]: + """ + Parse player information from the webpage and return a list of dictionaries, each representing a player. + + Returns: + list[dict]: A list of player information dictionaries. + """ page_nationalities = self.page.xpath(Clubs.Players.PAGE_NATIONALITIES) page_players_infos = self.page.xpath(Clubs.Players.PAGE_INFOS) page_players_signed_from = self.page.xpath( @@ -102,6 +120,13 @@ def __parse_club_players(self) -> list: ] def get_club_players(self) -> dict: + """ + Retrieve and parse player information for the specified football club. + + Returns: + dict: A dictionary containing the club's unique identifier, player information, and the timestamp of when + the data was last updated. + """ self.response["id"] = self.club_id self.response["players"] = self.__parse_club_players() self.response["updatedAt"] = datetime.now() diff --git a/app/services/clubs/profile.py b/app/services/clubs/profile.py index ca3edd0..9a63f6f 100644 --- a/app/services/clubs/profile.py +++ b/app/services/clubs/profile.py @@ -9,15 +9,33 @@ @dataclass class TransfermarktClubProfile(TransfermarktBase): + """ + A class for retrieving and parsing the profile information of a football club from Transfermarkt. + + Args: + club_id (str): The unique identifier of the football club. + URL (str): The URL template for the club's profile page on Transfermarkt. + """ + club_id: str = None URL: str = "https://www.transfermarkt.us/-/datenfakten/verein/{club_id}" - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktClubProfile class.""" self.URL = self.URL.format(club_id=self.club_id) self.page = self.request_url_page() self.raise_exception_if_not_found(xpath=Clubs.Profile.URL) - def get_club_profile(self): + def get_club_profile(self) -> dict: + """ + Retrieve and parse the profile information of the football club from Transfermarkt. + + This method extracts various attributes of the club's profile, such as name, official name, address, contact + information, stadium details, and more. + + Returns: + dict: A dictionary containing the club's profile information. + """ self.response["id"] = self.club_id self.response["url"] = self.get_text_by_xpath(Clubs.Profile.URL) self.response["name"] = self.get_text_by_xpath(Clubs.Profile.NAME) diff --git a/app/services/clubs/search.py b/app/services/clubs/search.py index a996501..d9d7c50 100644 --- a/app/services/clubs/search.py +++ b/app/services/clubs/search.py @@ -8,17 +8,35 @@ @dataclass class TransfermarktClubSearch(TransfermarktBase): + """ + A class for searching football clubs on Transfermarkt and retrieving search results. + + Args: + query (str): The search query for finding football clubs. + URL (str): The URL template for the search query. + page_number (int): The page number of search results (default is 1). + """ + query: str = None URL: str = ( "https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query={query}&Verein_page={page_number}" ) page_number: int = 1 - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktClubSearch class.""" self.URL = self.URL.format(query=self.query, page_number=self.page_number) self.page = self.request_url_page() def __parse_search_results(self) -> list: + """ + Parse the search results page and extract information about the found football clubs. + + Returns: + list: A list of dictionaries, where each dictionary contains information about a + football club found in the search results, including the club's unique identifier, + URL, name, country, squad size, and market value. + """ clubs_names = self.get_list_by_xpath(Clubs.Search.NAMES) clubs_urls = self.get_list_by_xpath(Clubs.Search.URLS) clubs_countries = self.get_list_by_xpath(Clubs.Search.COUNTRIES) @@ -46,6 +64,13 @@ def __parse_search_results(self) -> list: ] def search_clubs(self) -> dict: + """ + Perform a search for football clubs on Transfermarkt and retrieve search results. + + Returns: + dict: A dictionary containing the search query, current page number, last page number, + search results, and the timestamp of when the search was conducted. + """ self.response["query"] = self.query self.response["pageNumber"] = self.page_number self.response["lastPageNumber"] = self.get_search_last_page_number(Clubs.Search.BASE) diff --git a/app/services/competitions/clubs.py b/app/services/competitions/clubs.py index 2ce5546..154dee0 100644 --- a/app/services/competitions/clubs.py +++ b/app/services/competitions/clubs.py @@ -8,16 +8,34 @@ @dataclass class TransfermarktCompetitionClubs(TransfermarktBase): + """ + A class for retrieving and parsing the list of football clubs in a specific competition on Transfermarkt. + + Args: + competition_id (str): The unique identifier of the competition. + season_id (str): The season identifier. If not provided, it will be extracted from the URL. + URL (str): The URL template for the competition's page on Transfermarkt. + """ + competition_id: str = None season_id: str = None URL: str = "https://www.transfermarkt.com/-/startseite/wettbewerb/{competition_id}/plus/?saison_id={season_id}" - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktCompetitionClubs class.""" self.URL = self.URL.format(competition_id=self.competition_id, season_id=self.season_id) self.page = self.request_url_page() self.raise_exception_if_not_found(xpath=Competitions.Profile.NAME) def __parse_competition_clubs(self) -> list: + """ + Parse the competition's page and extract information about the football clubs participating + in the competition. + + Returns: + list: A list of dictionaries, where each dictionary contains information about a + football club in the competition, including the club's unique identifier and name. + """ urls = self.get_list_by_xpath(Competitions.Clubs.URLS) names = self.get_list_by_xpath(Competitions.Clubs.NAMES) ids = [extract_from_url(url) for url in urls] @@ -25,6 +43,13 @@ def __parse_competition_clubs(self) -> list: return [{"id": idx, "name": name} for idx, name in zip(ids, names)] def get_competition_clubs(self) -> dict: + """ + Retrieve and parse the list of football clubs participating in a specific competition. + + Returns: + dict: A dictionary containing the competition's unique identifier, name, season identifier, list of clubs + participating in the competition, and the timestamp of when the data was last updated. + """ self.response["id"] = self.competition_id self.response["name"] = self.get_text_by_xpath(Competitions.Profile.NAME) self.response["seasonID"] = extract_from_url( diff --git a/app/services/competitions/search.py b/app/services/competitions/search.py index e2733ac..6b63f7c 100644 --- a/app/services/competitions/search.py +++ b/app/services/competitions/search.py @@ -8,17 +8,35 @@ @dataclass class TransfermarktCompetitionSearch(TransfermarktBase): + """ + A class for searching football competitions on Transfermarkt and retrieving search results. + + Args: + query (str): The search query for finding football clubs. + URL (str): The URL template for the search query. + page_number (int): The page number of search results (default is 1). + """ + query: str = None URL: str = ( "https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query={query}&Wettbewerb_page={page_number}" ) page_number: int = 1 - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktCompetitionSearch class.""" self.URL = self.URL.format(query=self.query, page_number=self.page_number) self.page = self.request_url_page() def __parse_search_results(self) -> list: + """ + Parse and retrieve the search results for football competitions from Transfermarkt. + + Returns: + list: A list of dictionaries, each containing details of a football competition, + including its unique identifier, name, country, associated clubs, number of players, + total market value, mean market value, and continent. + """ idx = [extract_from_url(url) for url in self.get_list_by_xpath(Competitions.Search.URLS)] name = self.get_list_by_xpath(Competitions.Search.NAMES) country = self.get_list_by_xpath(Competitions.Search.COUNTRIES) @@ -51,7 +69,13 @@ def __parse_search_results(self) -> list: ) ] - def search_competitions(self): + def search_competitions(self) -> dict: + """ + Perform a search for football competitions and retrieve the search results. + + Returns: + dict: A dictionary containing search results, including competition details. + """ self.response["query"] = self.query self.response["pageNumber"] = self.page_number self.response["lastPageNumber"] = self.get_search_last_page_number(Competitions.Search.BASE) diff --git a/app/services/players/market_value.py b/app/services/players/market_value.py index 7b249e4..6668404 100644 --- a/app/services/players/market_value.py +++ b/app/services/players/market_value.py @@ -14,17 +14,37 @@ @dataclass class TransfermarktPlayerMarketValue(TransfermarktBase): + """ + Represents a service for retrieving and parsing the market value history of a football player on Transfermarkt. + + Args: + player_id (str): The unique identifier of the player. + + Attributes: + URL (str): The URL to fetch the player's market value data. + URL_MARKET_VALUE (str): The URL to fetch the player's market value history chart data. + """ + player_id: str = None URL: str = "https://www.transfermarkt.com/-/marktwertverlauf/spieler/{player_id}" URL_MARKET_VALUE: str = "https://www.transfermarkt.com/ceapi/marketValueDevelopment/graph/{player_id}" - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktPlayerMarketValue class.""" self.URL = self.URL.format(player_id=self.player_id) self.page = self.request_url_page() self.raise_exception_if_not_found(xpath=Players.Profile.NAME) self.market_value_chart = self.make_request(url=self.URL_MARKET_VALUE.format(player_id=self.player_id)) def __parse_market_value_history(self) -> list: + """ + Parse the market value history of a football player from the retrieved data. + + Returns: + list: A list of dictionaries, where each dictionary represents a data point in the + player's market value history. Each dictionary contains keys 'date', 'age', + 'clubID', 'clubName', and 'value' with their respective values. + """ data = json.loads(self.market_value_chart.content).get("list") club_image = None @@ -44,6 +64,13 @@ def __parse_market_value_history(self) -> list: ] def get_player_market_value(self) -> dict: + """ + Retrieve and parse the market value history of a football player. + + Returns: + dict: A dictionary containing the player's unique identifier, current market value, + market value history, ranking, and the timestamp of when the data was last updated. + """ self.response["id"] = self.player_id self.response["marketValue"] = self.get_text_by_xpath(Players.MarketValue.CURRENT, join_str="") self.response["marketValueHistory"] = self.__parse_market_value_history() diff --git a/app/services/players/profile.py b/app/services/players/profile.py index 9d370b2..70692e8 100644 --- a/app/services/players/profile.py +++ b/app/services/players/profile.py @@ -11,15 +11,34 @@ @dataclass class TransfermarktPlayerProfile(TransfermarktBase): + """ + Represents a service for retrieving and parsing the profile information of a football player on Transfermarkt. + + Args: + player_id (str): The unique identifier of the player. + + Attributes: + URL (str): The URL to fetch the player's profile data. + """ + player_id: str = None URL: str = "https://www.transfermarkt.com/-/profil/spieler/{player_id}" - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktPlayerProfile class.""" self.URL = self.URL.format(player_id=self.player_id) self.page = self.request_url_page() self.raise_exception_if_not_found(xpath=Players.Profile.URL) def get_player_profile(self) -> dict: + """ + Retrieve and parse the player's profile information, including their personal details, + club affiliations, market value, agent information, social media links, and more. + + Returns: + dict: A dictionary containing the player's unique identifier, profile information, and the timestamp of when + the data was last updated. + """ self.response["id"] = self.get_text_by_xpath(Players.Profile.ID) self.response["url"] = self.get_text_by_xpath(Players.Profile.URL) self.response["name"] = self.get_text_by_xpath(Players.Profile.NAME) diff --git a/app/services/players/search.py b/app/services/players/search.py index b8cc0f1..52993d3 100644 --- a/app/services/players/search.py +++ b/app/services/players/search.py @@ -9,18 +9,35 @@ @dataclass class TransfermarktPlayerSearch(TransfermarktBase): + """ + A class for searching football players on Transfermarkt and retrieving search results. + + Args: + query (str): The search query for finding football clubs. + URL (str): The URL template for the search query. + page_number (int): The page number of search results (default is 1). + """ + query: str = None URL: str = ( "https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query={query}&Spieler_page={page_number}" ) page_number: int = 1 - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktPlayerSearch class.""" self.URL = self.URL.format(query=self.query, page_number=self.page_number) self.page = self.request_url_page() self.raise_exception_if_not_found(xpath=Players.Search.FOUND) def __parse_search_results(self) -> list: + """ + Parse and return a list of player search results. Each result includes player information such as their unique + identifier, name, position, club (including ID and name), age, nationality, and market value. + + Returns: + list: A list of dictionaries, with each dictionary representing a player search result. + """ idx = [extract_from_url(url) for url in self.get_list_by_xpath(Players.Search.URL)] name = self.get_list_by_xpath(Players.Search.NAME) position = self.get_list_by_xpath(Players.Search.POSITION) @@ -58,6 +75,14 @@ def __parse_search_results(self) -> list: ] def search_players(self) -> dict: + """ + Retrieve and parse the search results for players matching the specified query. The results + include player information such as their name, position, club, age, nationality, and market value. + + Returns: + dict: A dictionary containing the search query, page number, last page number, search + results, and the timestamp of when the data was last updated. + """ self.response["query"] = self.query self.response["pageNumber"] = self.page_number self.response["lastPageNumber"] = self.get_search_last_page_number(Players.Search.BASE) diff --git a/app/services/players/stats.py b/app/services/players/stats.py index 73bf7a8..dfbec9c 100644 --- a/app/services/players/stats.py +++ b/app/services/players/stats.py @@ -13,15 +13,32 @@ @dataclass class TransfermarktPlayerStats(TransfermarktBase): + """ + A class for retrieving and parsing the players stats from Transfermarkt. + + Args: + player_id (str): The unique identifier of the player. + URL (str): The URL template for the player's stats page on Transfermarkt. + """ + player_id: str = None URL: str = "https://www.transfermarkt.com/-/leistungsdatendetails/spieler/{player_id}" - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktPlayerStats class.""" self.URL = self.URL.format(player_id=self.player_id) self.page = self.request_url_page() self.raise_exception_if_not_found(xpath=Players.Profile.URL) def __parse_player_stats(self) -> list: + """ + Parse and extract player statistics data from the Transfermarkt player stats page. + + Returns: + list: A list of dictionaries where each dictionary represents the statistics for a specific competition. + Each dictionary includes keys for competition ID, club ID, season ID, competition name, and various + statistical values for the player. + """ rows = self.page.xpath(Players.Stats.ROWS) headers = to_camel_case( ["Competition id", "Club id", "Season id", "Competition name"] @@ -43,6 +60,13 @@ def __parse_player_stats(self) -> list: return [zip_lists_into_dict(headers, stat) for stat in data] def get_player_stats(self) -> dict: + """ + Retrieve and parse player statistics data for the specified player from Transfermarkt. + + Returns: + dict: A dictionary containing the player's unique identifier, parsed player statistics, and the timestamp of + when the data was last updated. + """ self.response["id"] = self.player_id self.response["stats"] = self.__parse_player_stats() self.response["updatedAt"] = datetime.now() diff --git a/app/services/players/transfers.py b/app/services/players/transfers.py index 690c3c4..a488fb1 100644 --- a/app/services/players/transfers.py +++ b/app/services/players/transfers.py @@ -12,15 +12,31 @@ @dataclass class TransfermarktPlayerTransfers(TransfermarktBase): + """ + A class for retrieving and parsing the player's transfer history and youth club details from Transfermarkt. + + Args: + player_id (str): The unique identifier of the player. + URL (str): The URL template for the player's transfers page on Transfermarkt. + """ + player_id: str = None URL: str = "https://www.transfermarkt.com/-/transfers/spieler/{player_id}" - def __post_init__(self): + def __post_init__(self) -> None: + """Initialize the TransfermarktPlayerTransfers class.""" self.URL = self.URL.format(player_id=self.player_id) self.page = self.request_url_page() self.raise_exception_if_not_found(xpath=Players.Profile.NAME) def __parse_player_transfers_history(self) -> list: + """ + Parse and retrieve the transfer history of the specified player from Transfermarkt. + + Returns: + list: A list of dictionaries containing details of player transfers, including season, date, old club, + new club, market value, and transfer fee. + """ urls = self.get_list_by_xpath(Players.Transfers.TRANSFERS_URLS) seasons = self.get_list_by_xpath(Players.Transfers.SEASONS) dates = self.get_list_by_xpath(Players.Transfers.DATES) @@ -61,6 +77,13 @@ def __parse_player_transfers_history(self) -> list: ] def get_player_transfers(self) -> dict: + """ + Retrieve and parse the transfer history and youth clubs of the specified player from Transfermarkt. + + Returns: + dict: A dictionary containing the player's unique identifier, parsed transfer history, youth clubs, + and the timestamp of when the data was last updated. + """ self.response["id"] = self.player_id self.response["transfers"] = self.__parse_player_transfers_history() self.response["youthClubs"] = safe_split(self.get_text_by_xpath(Players.Transfers.YOUTH_CLUBS), ",") diff --git a/app/utils/utils.py b/app/utils/utils.py index cab63e3..8a52cd9 100644 --- a/app/utils/utils.py +++ b/app/utils/utils.py @@ -3,6 +3,15 @@ def clean_response(response: Union[dict, list]) -> Union[dict, list]: + """ + Recursively clean a dictionary or list by removing empty values and certain placeholders. + + Args: + response (Union[dict, list]): The dictionary or list to be cleaned. + + Returns: + Union[dict, list]: The cleaned dictionary or list. + """ if isinstance(response, dict): return { k: v @@ -15,10 +24,30 @@ def clean_response(response: Union[dict, list]) -> Union[dict, list]: def zip_lists_into_dict(list_keys: list, list_values: list) -> dict: + """ + Create a dictionary by pairing elements from two lists. + + Args: + list_keys (list): List of keys. + list_values (list): List of values. + + Returns: + dict: A dictionary created by pairing elements from the input lists. + """ return {k: v for k, v in zip(list_keys, list_values)} def extract_from_url(tfmkt_url: str, element: str = "id") -> Optional[str]: + """ + Extract a specific element from a Transfermarkt URL using regular expressions. + + Args: + tfmkt_url (str): The Transfermarkt URL from which to extract the element. + element (str, optional): The element to extract (e.g., 'id', 'season_id', 'transfer_id'). + + Returns: + Optional[str]: The extracted element value or None if not found. + """ regex: str = ( r"/(?P[\w%-]+)" r"/(?P[\w-]+)" @@ -35,6 +64,15 @@ def extract_from_url(tfmkt_url: str, element: str = "id") -> Optional[str]: def trim(text: Union[list, str]) -> str: + """ + Trim and clean up text by removing leading and trailing whitespace and special characters. + + Args: + text (Union[list, str]): The text or list of text to be trimmed. + + Returns: + str: The trimmed and cleaned text. + """ if isinstance(text, list): text = "".join(text) @@ -42,6 +80,17 @@ def trim(text: Union[list, str]) -> str: def safe_regex(text: Optional[str], regex, group: str) -> Optional[str]: + """ + Safely apply a regular expression and extract a specific group from the matched text. + + Args: + text (Optional[str]): The text to apply the regular expression to. + regex: The regular expression pattern. + group (str): The name of the group to extract. + + Returns: + Optional[str]: The extracted group value or None if not found or if the input is not a string. + """ if not isinstance(text, str): return None @@ -53,6 +102,17 @@ def safe_regex(text: Optional[str], regex, group: str) -> Optional[str]: def remove_str(text: Optional[str], strings_to_remove: Union[str, list]) -> Optional[str]: + """ + Remove specified strings from a text and return the cleaned text. + + Args: + text (Optional[str]): The text to remove strings from. + strings_to_remove (Union[str, list]): A string or list of strings to remove. + + Returns: + Optional[str]: The cleaned text with specified strings removed or None if not found or if + the input is not a string. + """ if not isinstance(text, str): return None @@ -65,6 +125,16 @@ def remove_str(text: Optional[str], strings_to_remove: Union[str, list]) -> Opti def safe_split(text: Optional[str], delimiter: str) -> Optional[list]: + """ + Split a text using a delimiter and return a list of cleaned, trimmed values. + + Args: + text (Optional[str]): The text to split. + delimiter (str): The delimiter used for splitting. + + Returns: + Optional[list]: A list of split and cleaned values or None if the input is not a string. + """ if not isinstance(text, str): return None @@ -72,6 +142,15 @@ def safe_split(text: Optional[str], delimiter: str) -> Optional[list]: def to_camel_case(headers: list) -> list: + """ + Convert a list of headers to camelCase format. + + Args: + headers (list): A list of headers in snake_case or space-separated format. + + Returns: + list: A list of headers in camelCase format with special handling for 'Id' to 'ID'. + """ camel_case_headers = ["".join(word.capitalize() for word in header.split()) for header in headers] camel_case_headers = [header[0].lower() + header[1:] for header in camel_case_headers] diff --git a/poetry.lock b/poetry.lock index e847451..c33add9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -800,6 +800,31 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "interrogate" +version = "1.5.0" +description = "Interrogate a codebase for docstring coverage." +optional = false +python-versions = ">=3.6" +files = [ + {file = "interrogate-1.5.0-py3-none-any.whl", hash = "sha256:a4ccc5cbd727c74acc98dee6f5e79ef264c0bcfa66b68d4e123069b2af89091a"}, + {file = "interrogate-1.5.0.tar.gz", hash = "sha256:b6f325f0aa84ac3ac6779d8708264d366102226c5af7d69058cecffcff7a6d6c"}, +] + +[package.dependencies] +attrs = "*" +click = ">=7.1" +colorama = "*" +py = "*" +tabulate = "*" +toml = "*" + +[package.extras] +dev = ["cairosvg", "pre-commit", "pytest", "pytest-cov", "pytest-mock", "sphinx", "sphinx-autobuild", "wheel"] +docs = ["sphinx", "sphinx-autobuild"] +png = ["cairosvg"] +tests = ["pytest", "pytest-cov", "pytest-mock"] + [[package]] name = "ipykernel" version = "6.26.0" @@ -1824,6 +1849,17 @@ files = [ [package.extras] tests = ["pytest"] +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] + [[package]] name = "pycparser" version = "2.21" @@ -2621,6 +2657,20 @@ typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\"" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + [[package]] name = "terminado" version = "0.17.1" @@ -2659,6 +2709,17 @@ webencodings = ">=0.4" doc = ["sphinx", "sphinx_rtd_theme"] test = ["flake8", "isort", "pytest"] +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + [[package]] name = "tomli" version = "2.0.1" @@ -3078,4 +3139,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "cded2a39a481e21a18fccd57d0d20a29ae49f4256d94aca426d677cbf202d2aa" +content-hash = "f27a5c6a06c7b3457a246496257330c175724129dcd96005862abc832d9a8f20" diff --git a/pyproject.toml b/pyproject.toml index 393042b..fe791d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "transfermarkt-api" -version = "2.0.0" +version = "2.0.1" description = "API service to get data from Transfermarkt" authors = ["Felipe Allegretti "] license = "MIT" @@ -21,6 +21,7 @@ setuptools = "==68.2.2" [tool.poetry.group.check.dependencies] black = "==23.1.0" ruff = "==0.0.275" +interrogate = "==1.5.0" [tool.poetry.group.tests.dependencies] pytest = "==7.3.1" @@ -49,3 +50,8 @@ addopts = "--cov --cov-report term-missing --exitfirst" testpaths = [ "tests", ] + +[tool.interrogate] +ignore-init-module = true +ignore-module = true +fail-under = 100 \ No newline at end of file