From 43574a531573644f5cc5c64e4fb4f4e87c67296a Mon Sep 17 00:00:00 2001 From: Sebastian Niehus Date: Mon, 12 Aug 2024 14:33:22 +0200 Subject: [PATCH] feat: Use urllib.urljoin where appropriate TASK: PHS-636 --- .../connectors/argilla/default_client.py | 41 ++++++++------ .../document_index/document_index.py | 56 ++++++++++++++----- src/intelligence_layer/core/tracer/tracer.py | 3 +- tests/core/tracer/test_in_memory_tracer.py | 2 +- 4 files changed, 67 insertions(+), 35 deletions(-) diff --git a/src/intelligence_layer/connectors/argilla/default_client.py b/src/intelligence_layer/connectors/argilla/default_client.py index 6f70168e8..0b71aa085 100644 --- a/src/intelligence_layer/connectors/argilla/default_client.py +++ b/src/intelligence_layer/connectors/argilla/default_client.py @@ -10,6 +10,7 @@ TypeVar, cast, ) +from urllib.parse import urljoin from uuid import uuid4 from pydantic import BaseModel, computed_field @@ -259,7 +260,9 @@ def split_dataset(self, dataset_id: str, n_splits: int) -> None: def _create_metadata_property(self, dataset_id: str, n_splits: int) -> None: response = self.session.get( - f"{self.api_url}api/v1/me/datasets/{dataset_id}/metadata-properties" + urljoin( + self.api_url, f"api/v1/me/datasets/{dataset_id}/metadata-properties" + ) ) response.raise_for_status() existing_split_id = [ @@ -269,7 +272,9 @@ def _create_metadata_property(self, dataset_id: str, n_splits: int) -> None: ] if len(existing_split_id) > 0: self.session.delete( - f"{self.api_url}api/v1/metadata-properties/{existing_split_id[0]}" + urljoin( + self.api_url, f"api/v1/metadata-properties/{existing_split_id[0]}" + ) ) data = { @@ -281,7 +286,7 @@ def _create_metadata_property(self, dataset_id: str, n_splits: int) -> None: } response = self.session.post( - f"{self.api_url}api/v1/datasets/{dataset_id}/metadata-properties", + urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/metadata-properties"), json=data, ) response.raise_for_status() @@ -314,14 +319,14 @@ def chunks( ] } response = self.session.patch( - f"{self.api_url}api/v1/datasets/{dataset_id}/records", + urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records"), json=data, ) response.raise_for_status() def _evaluations(self, dataset_id: str) -> Mapping[str, Any]: response = self.session.get( - self.api_url + f"api/v1/datasets/{dataset_id}/records", + urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records"), params={"response_status": "submitted", "include": "responses"}, ) response.raise_for_status() @@ -340,7 +345,7 @@ def records(self, dataset_id: str) -> Iterable[Record]: def create_evaluation(self, evaluation: ArgillaEvaluation) -> None: response = self.session.post( - self.api_url + f"api/v1/records/{evaluation.record_id}/responses", + urljoin(self.api_url, f"api/v1/records/{evaluation.record_id}/responses"), json={ "status": "submitted", "values": { @@ -352,13 +357,13 @@ def create_evaluation(self, evaluation: ArgillaEvaluation) -> None: response.raise_for_status() def _list_workspaces(self) -> Mapping[str, Any]: - url = self.api_url + "api/v1/me/workspaces" + url = urljoin(self.api_url, "api/v1/me/workspaces") response = self.session.get(url) response.raise_for_status() return cast(Mapping[str, Any], response.json()) def _create_workspace(self, workspace_name: str) -> Mapping[str, Any]: - url = self.api_url + "api/workspaces" + url = urljoin(self.api_url, "api/workspaces") data = { "name": workspace_name, } @@ -367,20 +372,20 @@ def _create_workspace(self, workspace_name: str) -> Mapping[str, Any]: return cast(Mapping[str, Any], response.json()) def _list_datasets(self, workspace_id: str) -> Mapping[str, Any]: - url = self.api_url + f"api/v1/me/datasets?workspace_id={workspace_id}" + url = urljoin(self.api_url, f"api/v1/me/datasets?workspace_id={workspace_id}") response = self.session.get(url) response.raise_for_status() return cast(Mapping[str, Any], response.json()) def _publish_dataset(self, dataset_id: str) -> None: - url = self.api_url + f"api/v1/datasets/{dataset_id}/publish" + url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/publish") response = self.session.put(url) response.raise_for_status() def _create_dataset( self, name: str, workspace_id: str, guidelines: str = "No guidelines." ) -> Mapping[str, Any]: - url = self.api_url + "api/v1/datasets" + url = urljoin(self.api_url, "api/v1/datasets") data = { "name": name, "guidelines": guidelines, @@ -392,13 +397,13 @@ def _create_dataset( return cast(Mapping[str, Any], response.json()) def _list_fields(self, dataset_id: str) -> Sequence[Any]: - url = self.api_url + f"api/v1/datasets/{dataset_id}/fields" + url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/fields") response = self.session.get(url) response.raise_for_status() return cast(Sequence[Any], response.json()) def _create_field(self, name: str, title: str, dataset_id: str) -> None: - url = self.api_url + f"api/v1/datasets/{dataset_id}/fields" + url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/fields") data = { "name": name, "title": title, @@ -416,7 +421,7 @@ def _create_question( settings: Mapping[str, Any], dataset_id: str, ) -> None: - url = self.api_url + f"api/v1/datasets/{dataset_id}/questions" + url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/questions") data = { "name": name, "title": title, @@ -433,7 +438,7 @@ def _list_records( optional_params: Optional[Mapping[str, str]] = None, page_size: int = 50, ) -> Iterable[Mapping[str, Any]]: - url = self.api_url + f"api/v1/datasets/{dataset_id}/records" + url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records") for offset in count(0, page_size): params: Mapping[str, str | int] = { **(optional_params or {}), @@ -457,7 +462,7 @@ def _create_records( records: Sequence[RecordData], dataset_id: str, ) -> None: - url = self.api_url + f"api/v1/datasets/{dataset_id}/records" + url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records") for batch in batch_iterator(records, 200): data = { "items": [ @@ -480,11 +485,11 @@ def delete_workspace(self, workspace_id: str) -> None: self._delete_workspace(workspace_id) def _delete_workspace(self, workspace_id: str) -> None: - url = self.api_url + f"api/v1/workspaces/{workspace_id}" + url = urljoin(self.api_url, f"api/v1/workspaces/{workspace_id}") response = self.session.delete(url) response.raise_for_status() def _delete_dataset(self, dataset_id: str) -> None: - url = self.api_url + f"api/v1/datasets/{dataset_id}" + url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}") response = self.session.delete(url) response.raise_for_status() diff --git a/src/intelligence_layer/connectors/document_index/document_index.py b/src/intelligence_layer/connectors/document_index/document_index.py index cb6537035..dfb3cbc05 100644 --- a/src/intelligence_layer/connectors/document_index/document_index.py +++ b/src/intelligence_layer/connectors/document_index/document_index.py @@ -5,7 +5,7 @@ from http import HTTPStatus from json import dumps from typing import Annotated, Any, Literal, Optional, Union -from urllib.parse import quote +from urllib.parse import quote, urljoin import requests from pydantic import BaseModel, Field, field_validator, model_validator @@ -434,7 +434,7 @@ def list_namespaces(self) -> Sequence[str]: Returns: List of all available namespaces. """ - url = f"{self._base_document_index_url}/namespaces" + url = urljoin(self._base_document_index_url, "namespaces") response = requests.get(url, headers=self.headers) self._raise_for_status(response) return [str(namespace) for namespace in response.json()] @@ -448,7 +448,10 @@ def create_collection(self, collection_path: CollectionPath) -> None: Args: collection_path: Path to the collection of interest. """ - url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}" + url_suffix = ( + f"/collections/{collection_path.namespace}/{collection_path.collection}" + ) + url = urljoin(self._base_document_index_url, url_suffix) response = requests.put(url, headers=self.headers) self._raise_for_status(response) @@ -458,7 +461,10 @@ def delete_collection(self, collection_path: CollectionPath) -> None: Args: collection_path: Path to the collection of interest. """ - url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}" + url_suffix = ( + f"collections/{collection_path.namespace}/{collection_path.collection}" + ) + url = urljoin(self._base_document_index_url, url_suffix) response = requests.delete(url, headers=self.headers) self._raise_for_status(response) @@ -472,7 +478,8 @@ def list_collections(self, namespace: str) -> Sequence[CollectionPath]: Returns: List of all `CollectionPath` instances in the given namespace. """ - url = f"{self._base_document_index_url}/collections/{namespace}" + url_suffix = f"collections/{namespace}" + url = urljoin(self._base_document_index_url, url_suffix) response = requests.get(url, headers=self.headers) self._raise_for_status(response) return [ @@ -489,7 +496,8 @@ def create_index( index_path: Path to the index. index_configuration: Configuration of the index to be created. """ - url = f"{self._base_document_index_url}/indexes/{index_path.namespace}/{index_path.index}" + url_suffix = f"indexes/{index_path.namespace}/{index_path.index}" + url = urljoin(self._base_document_index_url, url_suffix) data = { "chunk_size": index_configuration.chunk_size, @@ -537,7 +545,9 @@ def index_configuration(self, index_path: IndexPath) -> IndexConfiguration: Returns: Configuration of the index. """ - url = f"{self._base_document_index_url}/indexes/{index_path.namespace}/{index_path.index}" + url_suffix = f"indexes/{index_path.namespace}/{index_path.index}" + url = urljoin(self._base_document_index_url, url_suffix) + response = requests.get(url, headers=self.headers) self._raise_for_status(response) response_json: Mapping[str, Any] = response.json() @@ -556,7 +566,9 @@ def assign_index_to_collection( collection_path: Path to the collection of interest. index_name: Name of the index. """ - url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}" + url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}" + url = urljoin(self._base_document_index_url, url_suffix) + response = requests.put(url, headers=self.headers) self._raise_for_status(response) @@ -597,7 +609,9 @@ def delete_index_from_collection( index_name: Name of the index. collection_path: Path to the collection of interest. """ - url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}" + url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}" + url = urljoin(self._base_document_index_url, url_suffix) + response = requests.delete(url, headers=self.headers) self._raise_for_status(response) @@ -625,7 +639,9 @@ def list_assigned_index_names( Returns: List of all indexes that are assigned to the collection. """ - url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes" + url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes" + url = urljoin(self._base_document_index_url, url_suffix) + response = requests.get(url, headers=self.headers) self._raise_for_status(response) return [str(index_name) for index_name in response.json()] @@ -676,7 +692,9 @@ def add_document( contents: Actual content of the document. Currently only supports text. """ - url = f"{self._base_document_index_url}/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}" + url_suffix = f"collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}" + url = urljoin(self._base_document_index_url, url_suffix) + response = requests.put( url, data=dumps(contents._to_modalities_json()), headers=self.headers ) @@ -688,7 +706,9 @@ def delete_document(self, document_path: DocumentPath) -> None: Args: document_path: Consists of `collection_path` and name of document to be deleted. """ - url = f"{self._base_document_index_url}/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}" + url_suffix = f"/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}" + url = urljoin(self._base_document_index_url, url_suffix) + response = requests.delete(url, headers=self.headers) self._raise_for_status(response) @@ -701,7 +721,9 @@ def document(self, document_path: DocumentPath) -> DocumentContents: Returns: Content of the retrieved document. """ - url = f"{self._base_document_index_url}/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}" + url_suffix = f"collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}" + url = urljoin(self._base_document_index_url, url_suffix) + response = requests.get(url, headers=self.headers) self._raise_for_status(response) return DocumentContents._from_modalities_json(response.json()) @@ -728,7 +750,10 @@ def documents( max_documents=None, starts_with=None ) - url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/docs" + url_suffix = ( + f"collections/{collection_path.namespace}/{collection_path.collection}/docs" + ) + url = urljoin(self._base_document_index_url, url_suffix) query_params = {} if filter_query_params.max_documents: @@ -756,7 +781,8 @@ def search( Returns: Result of the search operation. Will be empty if nothing was retrieved. """ - url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}/search" + url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}/search" + url = urljoin(self._base_document_index_url, url_suffix) filters: list[dict[str, Any]] = [{"with": [{"modality": "text"}]}] if search_query.filters: diff --git a/src/intelligence_layer/core/tracer/tracer.py b/src/intelligence_layer/core/tracer/tracer.py index 0beb67c19..6bb8dca0d 100644 --- a/src/intelligence_layer/core/tracer/tracer.py +++ b/src/intelligence_layer/core/tracer/tracer.py @@ -8,6 +8,7 @@ from enum import Enum from types import TracebackType from typing import TYPE_CHECKING, Literal, Optional, Union +from urllib.parse import urljoin from uuid import UUID, uuid4 import requests @@ -112,7 +113,7 @@ def submit_to_trace_viewer(exported_spans: Sequence[ExportedSpan]) -> bool: """ trace_viewer_url = os.getenv("TRACE_VIEWER_URL", "http://localhost:3000") - trace_viewer_trace_upload = f"{trace_viewer_url}/trace" + trace_viewer_trace_upload = urljoin(trace_viewer_url, "/trace") try: res = requests.post( trace_viewer_trace_upload, diff --git a/tests/core/tracer/test_in_memory_tracer.py b/tests/core/tracer/test_in_memory_tracer.py index fa41c1487..ffb4c81a5 100644 --- a/tests/core/tracer/test_in_memory_tracer.py +++ b/tests/core/tracer/test_in_memory_tracer.py @@ -185,7 +185,7 @@ def set_env(name: str, value: str | None) -> Iterator[None]: os.environ.update(old_environ) -def test_in_memory_tracer_trace_viewer_doesnt_crash_if_it_cant_reach_document_index() -> ( +def test_in_memory_tracer_submit_to_trace_viewer_doesnt_crash_if_it_cant_reach_the_trace_viewer() -> ( None ): # note that this test sets the environment variable, which might