Skip to content

Commit

Permalink
small fix for uuid
Browse files Browse the repository at this point in the history
  • Loading branch information
javier-cohere authored and javier-cohere committed Sep 12, 2024
1 parent 8405a0d commit 16ec7ef
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
23 changes: 17 additions & 6 deletions compass_sdk/compass.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
DEFAULT_MAX_CHUNKS_PER_REQUEST,
DEFAULT_MAX_ERROR_RATE,
DEFAULT_MAX_RETRIES,
DEFAULT_SLEEP_RETRY_SECONDS,
DEFAULT_SLEEP_RETRY_SECONDS, UUID_NAMESPACE,
)
from compass_sdk.utils import open_document

Expand Down Expand Up @@ -281,22 +281,28 @@ def parse_and_insert_doc(
*,
index_name: str,
filename: str,
file_id: Optional[str] = None,
file_uuid: Optional[str] = None,
max_retries: int = DEFAULT_MAX_RETRIES,
sleep_retry_seconds: int = DEFAULT_SLEEP_RETRY_SECONDS,
) -> Optional[str]:
"""
Parse and insert a document into an index in Compass
:param index_name: the name of the index
:param filename: the filename of the document
:param file_id: the id of the document (optional)
:param file_uuid: the id of the document (optional)
:param max_retries: the maximum number of times to retry a request if it fails
:param sleep_retry_seconds: the number of seconds to wait before retrying an API request
:return: an error message if the request failed, otherwise None
"""
def is_valid_uuid(fileid: Optional[str]) -> bool:
try:
uuid.UUID(fileid)
return True
except ValueError:
return False

def generate_uuid(b64_string: str) -> uuid.UUID:
namespace = uuid.UUID("00000000-0000-0000-0000-000000000000")
namespace = uuid.UUID(UUID_NAMESPACE)
return uuid.uuid5(namespace, b64_string)

doc = open_document(filename)
Expand All @@ -313,10 +319,15 @@ def generate_uuid(b64_string: str) -> uuid.UUID:

# Open the file and read the bytes. Get some metadata and send it to Compass
b64 = base64.b64encode(doc.filebytes).decode("utf-8")
file_id = file_id or generate_uuid(b64)
if file_uuid and not is_valid_uuid(file_uuid):
err = f"Invalid UUID: {file_uuid}. Namespace: {UUID_NAMESPACE}"
logger.error(err)
return err
file_uuid = file_uuid or generate_uuid(b64)

Check failure on line 326 in compass_sdk/compass.py

View workflow job for this annotation

GitHub Actions / typecheck (3.11, .)

Type "str | UUID" is not assignable to declared type "str | None" (reportAssignmentType)
content_type = get_content_type(file=BytesIO(doc.filebytes), filepath=filename)

put_doc_input = ParseableDocument(
id=file_id,
id=file_uuid,

Check failure on line 330 in compass_sdk/compass.py

View workflow job for this annotation

GitHub Actions / typecheck (3.11, .)

Argument of type "str | None" cannot be assigned to parameter "id" of type "UUID" in function "__init__"   Type "str | None" is not assignable to type "UUID"     "str" is not assignable to "UUID" (reportArgumentType)
filename=filename,
bytes=b64,
content_type=content_type,
Expand Down
2 changes: 2 additions & 0 deletions compass_sdk/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,5 @@
"date",
"authors",
]

UUID_NAMESPACE = "00000000-0000-0000-0000-000000000000"

0 comments on commit 16ec7ef

Please sign in to comment.