Skip to content

Commit

Permalink
chore(spiders): increase wait_exponential backoff for retries
Browse files Browse the repository at this point in the history
  • Loading branch information
sergerdn committed Dec 18, 2024
1 parent e6e69b3 commit 590a131
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions ozon_collector/spiders/OzonDataQuerySpider.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,13 @@ async def execute_js_in_browser(self, page: Page, rendered_js: str) -> Any:
self.logger.info("Executing JavaScript in the browser.")
return await page.evaluate(rendered_js)

# Apply retry logic with Tenacity
# Apply retry logic with Tenacity and exponential backoff
@retry(
retry=retry_if_exception_type(Exception),
stop=stop_after_attempt(5), # Retry up to 5 times
wait=wait_exponential(multiplier=1, min=4, max=10), # Exponential backoff
# Retry up to 10 times
stop=stop_after_attempt(10),
# Exponential backoff with max delay of 60 minutes
wait=wait_exponential(multiplier=1, min=60, max=60 * 60),
before=before_log(logger, logging.DEBUG), # Log before each retry attempt
after=after_log(logger, logging.DEBUG),
reraise=True, # Reraise the exception if retries fail
Expand Down Expand Up @@ -287,9 +289,15 @@ def handle_console(msg: ConsoleMessage) -> None:
url = "https://data.ozon.ru/app/search-queries?__%s" % quote_plus(query_keyword)
# Change the URL in the browser's address bar without reloading
await page.evaluate(f"window.history.pushState(null, '', '{url}')")
await asyncio.sleep(5)

items = await self._render_execute_and_get_items(page, query_keyword)
await asyncio.sleep(10)

while True:
try:
items = await self._render_execute_and_get_items(page, query_keyword)
except Exception as e:
self.logger.error(e)
breakpoint()
break

for item in items:
# Skip processing if the item’s query already matches the current query_keyword
Expand Down

0 comments on commit 590a131

Please sign in to comment.