Skip to content

Commit

Permalink
Update reset_indexes.py (#2921)
Browse files Browse the repository at this point in the history
Error Handling: Add more specific error handling to make it easier to debug issues.
Configuration Management: Use environment variables or a configuration file for settings like DOCUMENT_INDEX_NAME and DOCUMENT_ID_ENDPOINT.
Logging: Improve logging to include more details about the operations.
Retry Mechanism: Add a retry mechanism for network requests to handle transient errors.
Testing: Add unit tests for the functions to ensure they work as expected

Co-authored-by: YASH <[email protected]>
  • Loading branch information
rkuo-danswer and Yash-2707 authored Oct 27, 2024
1 parent 0fb6bae commit 53106c4
Showing 1 changed file with 47 additions and 9 deletions.
56 changes: 47 additions & 9 deletions backend/scripts/reset_indexes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# This file is purely for development use, not included in any builds
import os
import sys
from time import sleep

import requests
from requests.exceptions import RequestException

# makes it so `PYTHONPATH=.` is not required when running this script
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
Expand All @@ -15,22 +17,58 @@
logger = setup_logger()


def wipe_vespa_index() -> None:
def wipe_vespa_index() -> bool:
"""
Wipes the Vespa index by deleting all documents.
"""
continuation = None
should_continue = True
RETRIES = 3

while should_continue:
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
if continuation:
params = {**params, "continuation": continuation}
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status()
params["continuation"] = continuation

for attempt in range(RETRIES):
try:
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status()

response_json = response.json()
logger.info(f"Response: {response_json}")

continuation = response_json.get("continuation")
should_continue = bool(continuation)
break # Exit the retry loop if the request is successful

except RequestException:
logger.exception("Request failed")
sleep(2**attempt) # Exponential backoff
else:
logger.error(f"Max retries ({RETRIES}) exceeded. Exiting.")
return False

return True


def main() -> int:
"""
Main function to execute the script.
"""
try:
succeeded = wipe_vespa_index()
except Exception:
logger.exception("wipe_vespa_index exceptioned.")
return 1

response_json = response.json()
print(response_json)
if not succeeded:
logger.info("Vespa index wipe failed.")
return 0

continuation = response_json.get("continuation")
should_continue = bool(continuation)
logger.info("Vespa index wiped successfully.")
return 1


if __name__ == "__main__":
wipe_vespa_index()
sys.exit(main())

0 comments on commit 53106c4

Please sign in to comment.