From 4208cb0be37b7ad8937315a7964bb65275e19661 Mon Sep 17 00:00:00 2001 From: rkuo-danswer Date: Sun, 27 Oct 2024 14:53:46 -0700 Subject: [PATCH] Update reset_indexes.py (#2921) Error Handling: Add more specific error handling to make it easier to debug issues. Configuration Management: Use environment variables or a configuration file for settings like DOCUMENT_INDEX_NAME and DOCUMENT_ID_ENDPOINT. Logging: Improve logging to include more details about the operations. Retry Mechanism: Add a retry mechanism for network requests to handle transient errors. Testing: Add unit tests for the functions to ensure they work as expected Co-authored-by: YASH <139299779+Yash-2707@users.noreply.github.com> --- backend/scripts/reset_indexes.py | 56 +++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 9 deletions(-) diff --git a/backend/scripts/reset_indexes.py b/backend/scripts/reset_indexes.py index 4ec8d9bf312..1411a082499 100644 --- a/backend/scripts/reset_indexes.py +++ b/backend/scripts/reset_indexes.py @@ -1,8 +1,10 @@ # This file is purely for development use, not included in any builds import os import sys +from time import sleep import requests +from requests.exceptions import RequestException # makes it so `PYTHONPATH=.` is not required when running this script parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -15,22 +17,58 @@ logger = setup_logger() -def wipe_vespa_index() -> None: +def wipe_vespa_index() -> bool: + """ + Wipes the Vespa index by deleting all documents. + """ continuation = None should_continue = True + RETRIES = 3 + while should_continue: params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME} if continuation: - params = {**params, "continuation": continuation} - response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) - response.raise_for_status() + params["continuation"] = continuation + + for attempt in range(RETRIES): + try: + response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) + response.raise_for_status() + + response_json = response.json() + logger.info(f"Response: {response_json}") + + continuation = response_json.get("continuation") + should_continue = bool(continuation) + break # Exit the retry loop if the request is successful + + except RequestException: + logger.exception("Request failed") + sleep(2**attempt) # Exponential backoff + else: + logger.error(f"Max retries ({RETRIES}) exceeded. Exiting.") + return False + + return True + + +def main() -> int: + """ + Main function to execute the script. + """ + try: + succeeded = wipe_vespa_index() + except Exception: + logger.exception("wipe_vespa_index exceptioned.") + return 1 - response_json = response.json() - print(response_json) + if not succeeded: + logger.info("Vespa index wipe failed.") + return 0 - continuation = response_json.get("continuation") - should_continue = bool(continuation) + logger.info("Vespa index wiped successfully.") + return 1 if __name__ == "__main__": - wipe_vespa_index() + sys.exit(main())