Skip to content

Commit

Permalink
update: more documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
gitronald committed May 6, 2024
1 parent cc5517e commit 5568d44
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 11 deletions.
2 changes: 1 addition & 1 deletion WebSearcher/component_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
}


def classify_type(cmpt: bs4.element.Tag):
def classify_type(cmpt: bs4.element.Tag) -> str:
"""Component classifier
Args:
Expand Down
13 changes: 7 additions & 6 deletions WebSearcher/locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ def download_locations(
"""
os.makedirs(data_dir, exist_ok=True)

full_url = get_latest_url(url)
fp = os.path.join(data_dir, full_url.split('/')[-1])
url_latest = get_latest_url(url)
fp = os.path.join(data_dir, url_latest.split('/')[-1])
fp_unzip = fp.replace('.zip', '')

# Check if the current version already exists
Expand All @@ -63,10 +63,11 @@ def download_locations(
elif os.path.exists(fp_unzip):
print(f"Version up to date: {fp_unzip}")
else:
print(f"Version out of date")
# Download and save
try:
print(f'Getting: {full_url}')
response = requests.get(full_url)
print(f'getting: {url_latest}')
response = requests.get(url_latest)
except Exception:
log.exception('Failed to retrieve location data')

Expand All @@ -87,8 +88,8 @@ def get_latest_url(url:str):

# Get current CSV url and use as filename
geo_url = sorted(geo_urls)[-1]
full_url = 'https://developers.google.com' + geo_url
return full_url
url_latest = 'https://developers.google.com' + geo_url
return url_latest

except Exception:
log.exception("Failed to retrieve location data url")
Expand Down
9 changes: 5 additions & 4 deletions WebSearcher/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,22 @@
from bs4 import BeautifulSoup


def get_component_parser(cmpt_type, cmpt_funcs=type_functions):
def get_component_parser(cmpt_type:str, cmpt_funcs:dict=type_functions) -> callable:
"""Returns the parser for a given component type"""
try:
return cmpt_funcs[cmpt_type]
except KeyError as e:
return not_implemented


def not_implemented(cmpt):
def not_implemented(cmpt) -> list:
"""Placeholder function for component parsers that are not implemented"""
parsed = BaseResult(type=classify_type(cmpt), sub_rank=0).model_dump()
parsed['error'] = 'not implemented'
return [parsed]


def parse_component(cmpt, cmpt_type='', cmpt_rank=0):
def parse_component(cmpt, cmpt_type:str = '', cmpt_rank:int = 0) -> list:
"""Parse a SERP component
Args:
Expand Down Expand Up @@ -69,7 +69,8 @@ def parse_component(cmpt, cmpt_type='', cmpt_rank=0):
return parsed_cmpt


def parse_serp(serp, serp_id=None, crawl_id=None, verbose=False, make_soup=False):
def parse_serp(serp:BeautifulSoup, serp_id:str = None, crawl_id:str = None,
verbose:bool = False, make_soup:bool = False) -> list:
"""Parse a Search Engine Result Page (SERP)
Args:
Expand Down

0 comments on commit 5568d44

Please sign in to comment.