From 55f88e28a5dc31724bd973e82d580ad43ea4ef59 Mon Sep 17 00:00:00 2001 From: Felipe Allegretti Date: Tue, 19 Sep 2023 15:23:56 -0300 Subject: [PATCH] Implement goalkeeper parser --- app/services/players/stats.py | 69 ++++++++++++++++++++++++++++++++++- app/utils/xpath.py | 1 + 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/app/services/players/stats.py b/app/services/players/stats.py index fa95457..d7971c5 100644 --- a/app/services/players/stats.py +++ b/app/services/players/stats.py @@ -8,6 +8,7 @@ clean_response, extract_from_url, get_text_by_xpath, + remove_str, request_url_page, trim, ) @@ -22,8 +23,17 @@ class TransfermarktPlayerStats: def __post_init__(self): self._request_player_page() self._check_player_found() + self._check_player_goalkeeper() def get_player_stats(self) -> dict: + self.player_stats["id"] = self.player_id + self.player_stats["stats"] = ( + self.parse_player_stats_goalkeeper() if self.goalkeeper else self.parse_player_stats_field() + ) + self.player_stats["lastUpdate"] = datetime.now() + return clean_response(self.player_stats) + + def parse_player_stats_field(self) -> dict: stats: list[ElementTree] = self.page.xpath(Players.Stats.PLAYER_STATS) seasons = [elem.text for elem in stats[::9]] @@ -40,7 +50,7 @@ def get_player_stats(self) -> dict: yellow_cards = [card[0] for card in cards] second_yellow_cards = [card[1] for card in cards] red_cards = [card[2] for card in cards] - minutes_played = [trim(elem.text) for elem in stats[8::9]] + minutes_played = [remove_str(trim(elem.text), ["'", "."]) for elem in stats[8::9]] self.player_stats["id"] = self.player_id self.player_stats["stats"] = [ @@ -77,6 +87,60 @@ def get_player_stats(self) -> dict: self.player_stats["lastUpdate"] = datetime.now() return clean_response(self.player_stats) + def parse_player_stats_goalkeeper(self): + stats: list[ElementTree] = self.page.xpath(Players.Stats.PLAYER_STATS) + + seasons = [elem.text for elem in stats[::10]] + competitions_ids = [ + extract_from_url(get_text_by_xpath(elem, Players.Stats.COMPETITIONS_IDS)) for elem in stats[2::10] + ] + competitions_names = [get_text_by_xpath(elem, Players.Stats.COMPETITIONS_NAMES) for elem in stats[2::10]] + clubs_ids = [extract_from_url(get_text_by_xpath(elem, Players.Stats.CLUBS_IDS)) for elem in stats[3::10]] + clubs_names = [get_text_by_xpath(elem, Players.Stats.CLUBS_NAMES) for elem in stats[3::10]] + appearances = [get_text_by_xpath(elem, Players.Stats.APPEARANCES) for elem in stats[4::10]] + + goals = [elem.text for elem in stats[5::10]] + cards = [trim(elem.text).split("/") for elem in stats[6::10]] + yellow_cards = [card[0] for card in cards] + second_yellow_cards = [card[1] for card in cards] + red_cards = [card[2] for card in cards] + goals_conceded = [elem.text for elem in stats[7::10]] + clean_sheets = [elem.text for elem in stats[8::10]] + minutes_played = [remove_str(trim(elem.text), ["'", "."]) for elem in stats[9::10]] + + return [ + { + "season": season, + "competitionId": competition_id, + "competitionName": competition_name, + "clubId": club_id, + "clubName": club_name, + "appearances": appearance, + "goals": goal, + "yellowCards": yellow_card, + "secondYellowCards": second_yellow_card, + "redCards": red_card, + "goalsConceded": red_card, + "cleanSheets": red_card, + "minutesPlayed": minute_played, + } + for season, competition_id, competition_name, club_id, club_name, appearance, goal, yellow_card, second_yellow_card, red_card, conceded, clean, minute_played in zip( # noqa: E501 + seasons, + competitions_ids, + competitions_names, + clubs_ids, + clubs_names, + appearances, + goals, + yellow_cards, + second_yellow_cards, + red_cards, + goals_conceded, + clean_sheets, + minutes_played, + ) + ] + def _request_player_page(self) -> None: player_url = f"https://www.transfermarkt.com/-/leistungsdatendetails/spieler/{self.player_id}" self.page = request_url_page(url=player_url) @@ -84,3 +148,6 @@ def _request_player_page(self) -> None: def _check_player_found(self) -> None: if not get_text_by_xpath(self, Players.Profile.URL): raise HTTPException(status_code=404, detail=f"Player Stats not found for id: {self.player_id}") + + def _check_player_goalkeeper(self): + self.goalkeeper = get_text_by_xpath(self, Players.Stats.GOALKEEPER) == "Goalkeeper" diff --git a/app/utils/xpath.py b/app/utils/xpath.py index 3f3224b..0411ce0 100644 --- a/app/utils/xpath.py +++ b/app/utils/xpath.py @@ -84,6 +84,7 @@ class Stats: CLUBS_IDS: str = ".//a/@href" CLUBS_NAMES: str = ".//a//@title" APPEARANCES: str = ".//a//text()" + GOALKEEPER: str = "//li[contains(text(), 'Position:')]//span//text()" class Clubs: