Skip to content

Commit

Permalink
Docstrings improvement (#43)
Browse files Browse the repository at this point in the history
* Setup Interrogate check

* Create docstrings
  • Loading branch information
felipeall committed Nov 1, 2023
1 parent 0f68120 commit b7cfc1c
Show file tree
Hide file tree
Showing 17 changed files with 510 additions and 22 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Linter Check
name: Code Check
on:
push:
branches: [ main ]
Expand All @@ -19,3 +19,7 @@ jobs:
run: |
pip install black
black --check .
- name: interrogate
run: |
pip install interrogate
interrogate app/services -vv
6 changes: 3 additions & 3 deletions app/api/endpoints/clubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@


@router.get("/search/{club_name}")
def search_clubs(club_name: str, page_number: Optional[int] = 1):
def search_clubs(club_name: str, page_number: Optional[int] = 1) -> dict:
tfmkt = TransfermarktClubSearch(query=club_name, page_number=page_number)
found_clubs = tfmkt.search_clubs()
return found_clubs


@router.get("/{club_id}/profile")
def get_club_profile(club_id: str):
def get_club_profile(club_id: str) -> dict:
tfmkt = TransfermarktClubProfile(club_id=club_id)
club_profile = tfmkt.get_club_profile()
return club_profile


@router.get("/{club_id}/players")
def get_club_players(club_id: str, season_id: Optional[str] = None):
def get_club_players(club_id: str, season_id: Optional[str] = None) -> dict:
tfmkt = TransfermarktClubPlayers(club_id=club_id, season_id=season_id)
club_players = tfmkt.get_club_players()
return club_players
4 changes: 2 additions & 2 deletions app/api/endpoints/competitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@


@router.get("/search/{competition_name}")
def search_competitions(competition_name: str, page_number: Optional[int] = 1):
def search_competitions(competition_name: str, page_number: Optional[int] = 1) -> dict:
tfmkt = TransfermarktCompetitionSearch(query=competition_name, page_number=page_number)
competitions = tfmkt.search_competitions()
return competitions


@router.get("/{competition_id}/clubs")
def get_competition_clubs(competition_id: str, season_id: Optional[str] = None):
def get_competition_clubs(competition_id: str, season_id: Optional[str] = None) -> dict:
tfmkt = TransfermarktCompetitionClubs(competition_id=competition_id, season_id=season_id)
competition_clubs = tfmkt.get_competition_clubs()
return competition_clubs
103 changes: 103 additions & 0 deletions app/services/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,35 @@

@dataclass
class TransfermarktBase:
"""
Base class for making HTTP requests to Transfermarkt and extracting data from the web pages.
Args:
URL (str): The URL for the web page to be fetched.
Attributes:
page (ElementTree): The parsed web page content.
response (dict): A dictionary to store the response data.
"""

URL: str
page: ElementTree = field(default_factory=lambda: None, init=False)
response: dict = field(default_factory=lambda: {}, init=False)

def make_request(self, url: Optional[str] = None) -> Response:
"""
Make an HTTP GET request to the specified URL.
Args:
url (str, optional): The URL to make the request to. If not provided, the class's URL
attribute will be used.
Returns:
Response: An HTTP Response object containing the server's response to the request.
Raises:
HTTPException: If there are too many redirects, or if the server returns a client or
server error status code.
"""
url = self.URL if not url else url
try:
response: Response = requests.get(
Expand Down Expand Up @@ -47,22 +71,73 @@ def make_request(self, url: Optional[str] = None) -> Response:
return response

def request_url_bsoup(self) -> BeautifulSoup:
"""
Fetch the web page content and parse it using BeautifulSoup.
Returns:
BeautifulSoup: A BeautifulSoup object representing the parsed web page content.
Raises:
HTTPException: If there are too many redirects, or if the server returns a client or
server error status code.
"""
response: Response = self.make_request()
return BeautifulSoup(markup=response.content, features="html.parser")

@staticmethod
def convert_bsoup_to_page(bsoup: BeautifulSoup) -> ElementTree:
"""
Convert a BeautifulSoup object to an ElementTree.
Args:
bsoup (BeautifulSoup): The BeautifulSoup object representing the parsed web page content.
Returns:
ElementTree: An ElementTree representing the parsed web page content for further processing.
"""
return etree.HTML(str(bsoup))

def request_url_page(self) -> ElementTree:
"""
Fetch the web page content, parse it using BeautifulSoup, and convert it to an ElementTree.
Returns:
ElementTree: An ElementTree representing the parsed web page content for further
processing.
Raises:
HTTPException: If there are too many redirects, or if the server returns a client or
server error status code.
"""
bsoup: BeautifulSoup = self.request_url_bsoup()
return self.convert_bsoup_to_page(bsoup=bsoup)

def raise_exception_if_not_found(self, xpath: str):
"""
Raise an exception if the specified XPath does not yield any results on the web page.
Args:
xpath (str): The XPath expression to query elements on the page.
Raises:
HTTPException: If the specified XPath query does not yield any results, indicating an invalid request.
"""
if not self.get_text_by_xpath(xpath):
raise HTTPException(status_code=404, detail=f"Invalid request (url: {self.URL})")

def get_list_by_xpath(self, xpath: str, remove_empty: Optional[bool] = True) -> Optional[list]:
"""
Extract a list of elements from the web page using the specified XPath expression.
Args:
xpath (str): The XPath expression to query elements on the page.
remove_empty (bool, optional): If True, remove empty or whitespace-only elements from
the list. Default is True.
Returns:
Optional[list]: A list of elements extracted from the web page based on the XPath query.
If remove_empty is True, empty or whitespace-only elements are filtered out.
"""
elements: list = self.page.xpath(xpath)
if remove_empty:
elements_valid: list = [trim(e) for e in elements if trim(e)]
Expand All @@ -79,6 +154,25 @@ def get_text_by_xpath(
iloc_to: Optional[int] = None,
join_str: Optional[str] = None,
) -> Optional[str]:
"""
Extract text content from the web page using the specified XPath expression.
Args:
xpath (str): The XPath expression to query elements on the page.
pos (int, optional): Index of the element to extract if multiple elements match the
XPath. Default is 0.
iloc (int, optional): Extract a single element by index, used as an alternative to 'pos'.
iloc_from (int, optional): Extract a range of elements starting from the specified
index (inclusive).
iloc_to (int, optional): Extract a range of elements up to the specified
index (exclusive).
join_str (str, optional): If provided, join multiple text elements into a single string
using this separator.
Returns:
Optional[str]: The extracted text content from the web page based on the XPath query and
optional parameters. If no matching element is found, None is returned.
"""
element = self.page.xpath(xpath)

if not element:
Expand Down Expand Up @@ -108,6 +202,15 @@ def get_text_by_xpath(
return None

def get_search_last_page_number(self, xpath_base: str) -> int:
"""
Retrieve the last page number for search results based on the provided base XPath.
Args:
xpath_base (str): The base XPath for extracting page number information.
Returns:
int: The last page number for search results. Returns 1 if no page numbers are found.
"""
url_page_number_last = self.get_text_by_xpath(xpath_base + Commons.Search.PAGE_NUMBER_LAST)
url_page_number_active = self.get_text_by_xpath(xpath_base + Commons.Search.PAGE_NUMBER_ACTIVE)

Expand Down
31 changes: 28 additions & 3 deletions app/services/clubs/players.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,43 @@

@dataclass
class TransfermarktClubPlayers(TransfermarktBase):
"""
A class for retrieving and parsing the players of a football club from Transfermarkt.
Args:
club_id (str): The unique identifier of the football club.
season_id (str): The unique identifier of the season.
URL (str): The URL template for the club's players page on Transfermarkt.
"""

club_id: str = None
season_id: str = None
URL: str = "https://www.transfermarkt.com/-/kader/verein/{club_id}/saison_id/{season_id}/plus/1"

def __post_init__(self):
def __post_init__(self) -> None:
"""Initialize the TransfermarktClubPlayers class."""
self.URL = self.URL.format(club_id=self.club_id, season_id=self.season_id)
self.page = self.request_url_page()
self.raise_exception_if_not_found(xpath=Clubs.Players.CLUB_NAME)
self.__update_season_id()
self.__update_past_flag()

def __update_season_id(self):
"""Update the season ID if it's not provided by extracting it from the website."""
if self.season_id is None:
self.season_id = extract_from_url(self.get_text_by_xpath(Clubs.Players.CLUB_URL), "season_id")

def __update_past_flag(self):
def __update_past_flag(self) -> None:
"""Check if the season is the current or if it's a past one and update the flag accordingly."""
self.past = "Current club" in self.get_list_by_xpath(Clubs.Players.PAST_FLAG)

def __parse_club_players(self) -> list:
def __parse_club_players(self) -> list[dict]:
"""
Parse player information from the webpage and return a list of dictionaries, each representing a player.
Returns:
list[dict]: A list of player information dictionaries.
"""
page_nationalities = self.page.xpath(Clubs.Players.PAGE_NATIONALITIES)
page_players_infos = self.page.xpath(Clubs.Players.PAGE_INFOS)
page_players_signed_from = self.page.xpath(
Expand Down Expand Up @@ -102,6 +120,13 @@ def __parse_club_players(self) -> list:
]

def get_club_players(self) -> dict:
"""
Retrieve and parse player information for the specified football club.
Returns:
dict: A dictionary containing the club's unique identifier, player information, and the timestamp of when
the data was last updated.
"""
self.response["id"] = self.club_id
self.response["players"] = self.__parse_club_players()
self.response["updatedAt"] = datetime.now()
Expand Down
22 changes: 20 additions & 2 deletions app/services/clubs/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,33 @@

@dataclass
class TransfermarktClubProfile(TransfermarktBase):
"""
A class for retrieving and parsing the profile information of a football club from Transfermarkt.
Args:
club_id (str): The unique identifier of the football club.
URL (str): The URL template for the club's profile page on Transfermarkt.
"""

club_id: str = None
URL: str = "https://www.transfermarkt.us/-/datenfakten/verein/{club_id}"

def __post_init__(self):
def __post_init__(self) -> None:
"""Initialize the TransfermarktClubProfile class."""
self.URL = self.URL.format(club_id=self.club_id)
self.page = self.request_url_page()
self.raise_exception_if_not_found(xpath=Clubs.Profile.URL)

def get_club_profile(self):
def get_club_profile(self) -> dict:
"""
Retrieve and parse the profile information of the football club from Transfermarkt.
This method extracts various attributes of the club's profile, such as name, official name, address, contact
information, stadium details, and more.
Returns:
dict: A dictionary containing the club's profile information.
"""
self.response["id"] = self.club_id
self.response["url"] = self.get_text_by_xpath(Clubs.Profile.URL)
self.response["name"] = self.get_text_by_xpath(Clubs.Profile.NAME)
Expand Down
27 changes: 26 additions & 1 deletion app/services/clubs/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,35 @@

@dataclass
class TransfermarktClubSearch(TransfermarktBase):
"""
A class for searching football clubs on Transfermarkt and retrieving search results.
Args:
query (str): The search query for finding football clubs.
URL (str): The URL template for the search query.
page_number (int): The page number of search results (default is 1).
"""

query: str = None
URL: str = (
"https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query={query}&Verein_page={page_number}"
)
page_number: int = 1

def __post_init__(self):
def __post_init__(self) -> None:
"""Initialize the TransfermarktClubSearch class."""
self.URL = self.URL.format(query=self.query, page_number=self.page_number)
self.page = self.request_url_page()

def __parse_search_results(self) -> list:
"""
Parse the search results page and extract information about the found football clubs.
Returns:
list: A list of dictionaries, where each dictionary contains information about a
football club found in the search results, including the club's unique identifier,
URL, name, country, squad size, and market value.
"""
clubs_names = self.get_list_by_xpath(Clubs.Search.NAMES)
clubs_urls = self.get_list_by_xpath(Clubs.Search.URLS)
clubs_countries = self.get_list_by_xpath(Clubs.Search.COUNTRIES)
Expand Down Expand Up @@ -46,6 +64,13 @@ def __parse_search_results(self) -> list:
]

def search_clubs(self) -> dict:
"""
Perform a search for football clubs on Transfermarkt and retrieve search results.
Returns:
dict: A dictionary containing the search query, current page number, last page number,
search results, and the timestamp of when the search was conducted.
"""
self.response["query"] = self.query
self.response["pageNumber"] = self.page_number
self.response["lastPageNumber"] = self.get_search_last_page_number(Clubs.Search.BASE)
Expand Down
Loading

0 comments on commit b7cfc1c

Please sign in to comment.