Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TransfermarktPlayerInjuries service #49

Merged
merged 5 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions app/api/endpoints/players.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from fastapi import APIRouter

from app.services.players.injuries import TransfermarktPlayerInjuries
from app.services.players.market_value import TransfermarktPlayerMarketValue
from app.services.players.profile import TransfermarktPlayerProfile
from app.services.players.search import TransfermarktPlayerSearch
Expand Down Expand Up @@ -42,5 +43,13 @@ def get_player_transfers(player_id: str):
@router.get("/{player_id}/stats")
def get_player_stats(player_id: str):
tfmkt = TransfermarktPlayerStats(player_id=player_id)
player_market_value = tfmkt.get_player_stats()
return player_market_value
player_stats = tfmkt.get_player_stats()
return player_stats


# Define the endpoint for injuries
@router.get("/{player_id}/injuries")
def get_player_injuries(player_id: str, page_number: Optional[int] = 1):
tfmkt = TransfermarktPlayerInjuries(player_id=player_id, page_number=page_number)
players_injuries = tfmkt.get_player_injuries()
return players_injuries
18 changes: 7 additions & 11 deletions app/services/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from requests import Response, TooManyRedirects

from app.utils.utils import trim
from app.utils.xpath import Commons
from app.utils.xpath import Pagination


@dataclass
Expand Down Expand Up @@ -201,23 +201,19 @@ def get_text_by_xpath(
except IndexError:
return None

def get_search_last_page_number(self, xpath_base: str) -> int:
def get_last_page_number(self, xpath_base: str = "") -> int:
"""
Retrieve the last page number for search results based on the provided base XPath.
Retrieve the last page number for a paginated result based on the provided base XPath.

Args:
xpath_base (str): The base XPath for extracting page number information.

Returns:
int: The last page number for search results. Returns 1 if no page numbers are found.
"""
url_page_number_last = self.get_text_by_xpath(xpath_base + Commons.Search.PAGE_NUMBER_LAST)
url_page_number_active = self.get_text_by_xpath(xpath_base + Commons.Search.PAGE_NUMBER_ACTIVE)

if url_page_number_last:
return int(url_page_number_last.split("=")[-1])

if url_page_number_active:
return int(url_page_number_active.split("=")[-1])

for xpath in [Pagination.PAGE_NUMBER_LAST, Pagination.PAGE_NUMBER_ACTIVE]:
url_page = self.get_text_by_xpath(xpath_base + xpath)
if url_page:
return int(url_page.split("=")[-1].split("/")[-1])
return 1
2 changes: 1 addition & 1 deletion app/services/clubs/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def search_clubs(self) -> dict:
"""
self.response["query"] = self.query
self.response["pageNumber"] = self.page_number
self.response["lastPageNumber"] = self.get_search_last_page_number(Clubs.Search.BASE)
self.response["lastPageNumber"] = self.get_last_page_number(Clubs.Search.BASE)
self.response["results"] = self.__parse_search_results()
self.response["updatedAt"] = datetime.now()

Expand Down
2 changes: 1 addition & 1 deletion app/services/competitions/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def search_competitions(self) -> dict:
"""
self.response["query"] = self.query
self.response["pageNumber"] = self.page_number
self.response["lastPageNumber"] = self.get_search_last_page_number(Competitions.Search.BASE)
self.response["lastPageNumber"] = self.get_last_page_number(Competitions.Search.BASE)
self.response["results"] = self.__parse_search_results()
self.response["updatedAt"] = datetime.now()

Expand Down
57 changes: 57 additions & 0 deletions app/services/players/injuries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from dataclasses import dataclass
from datetime import datetime
from typing import List, Optional
from xml.etree import ElementTree

from app.services.base import TransfermarktBase
from app.utils.utils import clean_response, extract_from_url, trim
from app.utils.xpath import Players


@dataclass
class TransfermarktPlayerInjuries(TransfermarktBase):
player_id: str = None
URL: str = "https://www.transfermarkt.com/player/verletzungen/spieler/{player_id}/plus/1/page/{page_number}"
page_number: int = 1

def __post_init__(self):
self.URL = self.URL.format(player_id=self.player_id, page_number=self.page_number)
self.page = self.request_url_page()
self.raise_exception_if_not_found(xpath=Players.Profile.URL)

def __parse_player_injuries(self) -> Optional[List[dict]]:
injuries: ElementTree = self.page.xpath(Players.Injuries.RESULTS)
player_injuries = []

for injury in injuries:
season = trim(injury.xpath(Players.Injuries.SEASONS))
injury_type = trim(injury.xpath(Players.Injuries.INJURY))
date_from = trim(injury.xpath(Players.Injuries.FROM))
date_until = trim(injury.xpath(Players.Injuries.UNTIL))
days = trim(injury.xpath(Players.Injuries.DAYS))
games_missed = trim(injury.xpath(Players.Injuries.GAMES_MISSED))
games_missed_clubs_urls = injury.xpath(Players.Injuries.GAMES_MISSED_CLUBS_URLS)
games_missed_clubs_ids = [extract_from_url(club_url) for club_url in games_missed_clubs_urls]

player_injuries.append(
{
"season": season,
"injury": injury_type,
"from": date_from,
"until": date_until,
"days": days,
"gamesMissed": games_missed,
"gamesMissedClubs": games_missed_clubs_ids,
},
)

return player_injuries

def get_player_injuries(self) -> dict:
self.response["id"] = self.player_id
self.response["pageNumber"] = self.page_number
self.response["lastPageNumber"] = self.get_last_page_number()
self.response["injuries"] = self.__parse_player_injuries()
self.response["updatedAt"] = datetime.now()

return clean_response(self.response)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TransfermarktPlayerInjuries class is well-structured and follows the inheritance pattern from TransfermarktBase. The use of utility functions and XPath expressions from other modules is consistent with the application's design. The methods __post_init__, __parse_player_injuries, and get_player_injuries are clearly defined and seem to follow the intended logic for fetching and parsing player injury data.

However, there are a few points to consider:

  1. The player_id attribute is initialized as None by default (line 13), which could lead to a malformed URL if not provided. It might be safer to require player_id as a mandatory parameter during initialization to avoid potential runtime errors.

  2. The __parse_player_injuries method is prefixed with double underscores, which in Python indicates a name mangling to create a private version of the variable or method. If this method is intended to be used only within this class, this is fine. However, if it is meant to be accessed from outside, consider renaming it with a single underscore to indicate it is a protected member of the class.

  3. The get_player_injuries method updates the self.response dictionary directly (lines 51-55). Ensure that self.response is defined in the base class or within this class to avoid an AttributeError.

  4. The datetime.now() method used in line 55 does not include timezone information. It's recommended to use datetime.now(timezone.utc) to have timezone-aware timestamps, which are generally more robust for applications that may run in different time zones or that store data that could be accessed from multiple time zones.

  5. The clean_response utility function is used to return the response (line 57). Ensure that this function is thoroughly tested, especially since it's handling the output of the service which could be critical for the application's functionality.

  6. Error handling is not visible in the provided hunk. It's important to ensure that there are proper error handling mechanisms in place, especially for network calls and XPath queries which can fail or return unexpected results.

  7. The use of XPath queries (lines 23-34) assumes that the structure of the HTML from transfermarkt.com will not change. If the website's layout changes, these queries may break. It's advisable to have a fallback mechanism or a monitoring system to alert when the data extraction process fails.

  8. The method get_last_page_number is called without being defined within this class (line 53). It's assumed to be a method from the base class TransfermarktBase. Ensure that this method exists and is correctly implemented in the base class.

  9. The extract_from_url utility function is used to extract club IDs from URLs (line 34). Ensure that this function is robust and can handle any edge cases or unexpected URL formats.

  10. The self.request_url_page() method is called in the __post_init__ method (line 19). Ensure that this method includes proper exception handling for HTTP errors or connection issues.

Overall, the class seems to be well-integrated with the rest of the application, and the methods are consistent with the PR's objectives. The above points are mainly for ensuring robustness and maintainability of the code.

6 changes: 1 addition & 5 deletions app/services/players/market_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,7 @@

from app.services.base import TransfermarktBase
from app.utils.regex import REGEX_CHART_CLUB_ID
from app.utils.utils import (
clean_response,
safe_regex,
zip_lists_into_dict,
)
from app.utils.utils import clean_response, safe_regex, zip_lists_into_dict
from app.utils.xpath import Players


Expand Down
2 changes: 1 addition & 1 deletion app/services/players/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def search_players(self) -> dict:
"""
self.response["query"] = self.query
self.response["pageNumber"] = self.page_number
self.response["lastPageNumber"] = self.get_search_last_page_number(Players.Search.BASE)
self.response["lastPageNumber"] = self.get_last_page_number(Players.Search.BASE)
self.response["results"] = self.__parse_search_results()
self.response["updatedAt"] = datetime.now()

Expand Down
7 changes: 1 addition & 6 deletions app/services/players/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,7 @@
from datetime import datetime

from app.services.base import TransfermarktBase
from app.utils.utils import (
clean_response,
extract_from_url,
to_camel_case,
zip_lists_into_dict,
)
from app.utils.utils import clean_response, extract_from_url, to_camel_case, zip_lists_into_dict
from app.utils.xpath import Players


Expand Down
6 changes: 1 addition & 5 deletions app/services/players/transfers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@
from datetime import datetime

from app.services.base import TransfermarktBase
from app.utils.utils import (
clean_response,
extract_from_url,
safe_split,
)
from app.utils.utils import clean_response, extract_from_url, safe_split
from app.utils.xpath import Players


Expand Down
17 changes: 13 additions & 4 deletions app/utils/xpath.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
class Players:
class Injuries:
RESULTS = "//div[@id='yw1']//tbody//tr"
SEASONS = ".//td[1]//text()"
INJURY = ".//td[2]//text()"
FROM = ".//td[3]//text()"
UNTIL = ".//td[4]//text()"
DAYS = ".//td[5]//text()"
GAMES_MISSED = ".//td[6]//span//text()"
GAMES_MISSED_CLUBS_URLS = ".//td[6]//a//@href"

class Profile:
ID = "//div[@data-action='profil']//@data-id"
URL = "//a[@class='tm-subnav-item megamenu']//@href"
Expand Down Expand Up @@ -160,7 +170,6 @@ class Clubs:
NAMES = "//td[@class='hauptlink no-border-links']//a//text()"


class Commons:
class Search:
PAGE_NUMBER_LAST = "//li[@class='tm-pagination__list-item tm-pagination__list-item--icon-last-page']//@href"
PAGE_NUMBER_ACTIVE = "//li[@class='tm-pagination__list-item tm-pagination__list-item--active']//@href"
class Pagination:
PAGE_NUMBER_LAST = "//li[contains(@class, 'list-item--icon-last-page')]//@href"
PAGE_NUMBER_ACTIVE = "//li[contains(@class, 'list-item--active')]//@href"
Loading