From 43574a531573644f5cc5c64e4fb4f4e87c67296a Mon Sep 17 00:00:00 2001
From: Sebastian Niehus <sebastian.niehus@ext.aleph-alpha.com>
Date: Mon, 12 Aug 2024 14:33:22 +0200
Subject: [PATCH] feat: Use urllib.urljoin where appropriate TASK: PHS-636

---
 .../connectors/argilla/default_client.py      | 41 ++++++++------
 .../document_index/document_index.py          | 56 ++++++++++++++-----
 src/intelligence_layer/core/tracer/tracer.py  |  3 +-
 tests/core/tracer/test_in_memory_tracer.py    |  2 +-
 4 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/src/intelligence_layer/connectors/argilla/default_client.py b/src/intelligence_layer/connectors/argilla/default_client.py
index 6f70168e8..0b71aa085 100644
--- a/src/intelligence_layer/connectors/argilla/default_client.py
+++ b/src/intelligence_layer/connectors/argilla/default_client.py
@@ -10,6 +10,7 @@
     TypeVar,
     cast,
 )
+from urllib.parse import urljoin
 from uuid import uuid4
 
 from pydantic import BaseModel, computed_field
@@ -259,7 +260,9 @@ def split_dataset(self, dataset_id: str, n_splits: int) -> None:
 
     def _create_metadata_property(self, dataset_id: str, n_splits: int) -> None:
         response = self.session.get(
-            f"{self.api_url}api/v1/me/datasets/{dataset_id}/metadata-properties"
+            urljoin(
+                self.api_url, f"api/v1/me/datasets/{dataset_id}/metadata-properties"
+            )
         )
         response.raise_for_status()
         existing_split_id = [
@@ -269,7 +272,9 @@ def _create_metadata_property(self, dataset_id: str, n_splits: int) -> None:
         ]
         if len(existing_split_id) > 0:
             self.session.delete(
-                f"{self.api_url}api/v1/metadata-properties/{existing_split_id[0]}"
+                urljoin(
+                    self.api_url, f"api/v1/metadata-properties/{existing_split_id[0]}"
+                )
             )
 
         data = {
@@ -281,7 +286,7 @@ def _create_metadata_property(self, dataset_id: str, n_splits: int) -> None:
         }
 
         response = self.session.post(
-            f"{self.api_url}api/v1/datasets/{dataset_id}/metadata-properties",
+            urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/metadata-properties"),
             json=data,
         )
         response.raise_for_status()
@@ -314,14 +319,14 @@ def chunks(
                 ]
             }
             response = self.session.patch(
-                f"{self.api_url}api/v1/datasets/{dataset_id}/records",
+                urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records"),
                 json=data,
             )
             response.raise_for_status()
 
     def _evaluations(self, dataset_id: str) -> Mapping[str, Any]:
         response = self.session.get(
-            self.api_url + f"api/v1/datasets/{dataset_id}/records",
+            urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records"),
             params={"response_status": "submitted", "include": "responses"},
         )
         response.raise_for_status()
@@ -340,7 +345,7 @@ def records(self, dataset_id: str) -> Iterable[Record]:
 
     def create_evaluation(self, evaluation: ArgillaEvaluation) -> None:
         response = self.session.post(
-            self.api_url + f"api/v1/records/{evaluation.record_id}/responses",
+            urljoin(self.api_url, f"api/v1/records/{evaluation.record_id}/responses"),
             json={
                 "status": "submitted",
                 "values": {
@@ -352,13 +357,13 @@ def create_evaluation(self, evaluation: ArgillaEvaluation) -> None:
         response.raise_for_status()
 
     def _list_workspaces(self) -> Mapping[str, Any]:
-        url = self.api_url + "api/v1/me/workspaces"
+        url = urljoin(self.api_url, "api/v1/me/workspaces")
         response = self.session.get(url)
         response.raise_for_status()
         return cast(Mapping[str, Any], response.json())
 
     def _create_workspace(self, workspace_name: str) -> Mapping[str, Any]:
-        url = self.api_url + "api/workspaces"
+        url = urljoin(self.api_url, "api/workspaces")
         data = {
             "name": workspace_name,
         }
@@ -367,20 +372,20 @@ def _create_workspace(self, workspace_name: str) -> Mapping[str, Any]:
         return cast(Mapping[str, Any], response.json())
 
     def _list_datasets(self, workspace_id: str) -> Mapping[str, Any]:
-        url = self.api_url + f"api/v1/me/datasets?workspace_id={workspace_id}"
+        url = urljoin(self.api_url, f"api/v1/me/datasets?workspace_id={workspace_id}")
         response = self.session.get(url)
         response.raise_for_status()
         return cast(Mapping[str, Any], response.json())
 
     def _publish_dataset(self, dataset_id: str) -> None:
-        url = self.api_url + f"api/v1/datasets/{dataset_id}/publish"
+        url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/publish")
         response = self.session.put(url)
         response.raise_for_status()
 
     def _create_dataset(
         self, name: str, workspace_id: str, guidelines: str = "No guidelines."
     ) -> Mapping[str, Any]:
-        url = self.api_url + "api/v1/datasets"
+        url = urljoin(self.api_url, "api/v1/datasets")
         data = {
             "name": name,
             "guidelines": guidelines,
@@ -392,13 +397,13 @@ def _create_dataset(
         return cast(Mapping[str, Any], response.json())
 
     def _list_fields(self, dataset_id: str) -> Sequence[Any]:
-        url = self.api_url + f"api/v1/datasets/{dataset_id}/fields"
+        url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/fields")
         response = self.session.get(url)
         response.raise_for_status()
         return cast(Sequence[Any], response.json())
 
     def _create_field(self, name: str, title: str, dataset_id: str) -> None:
-        url = self.api_url + f"api/v1/datasets/{dataset_id}/fields"
+        url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/fields")
         data = {
             "name": name,
             "title": title,
@@ -416,7 +421,7 @@ def _create_question(
         settings: Mapping[str, Any],
         dataset_id: str,
     ) -> None:
-        url = self.api_url + f"api/v1/datasets/{dataset_id}/questions"
+        url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/questions")
         data = {
             "name": name,
             "title": title,
@@ -433,7 +438,7 @@ def _list_records(
         optional_params: Optional[Mapping[str, str]] = None,
         page_size: int = 50,
     ) -> Iterable[Mapping[str, Any]]:
-        url = self.api_url + f"api/v1/datasets/{dataset_id}/records"
+        url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records")
         for offset in count(0, page_size):
             params: Mapping[str, str | int] = {
                 **(optional_params or {}),
@@ -457,7 +462,7 @@ def _create_records(
         records: Sequence[RecordData],
         dataset_id: str,
     ) -> None:
-        url = self.api_url + f"api/v1/datasets/{dataset_id}/records"
+        url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records")
         for batch in batch_iterator(records, 200):
             data = {
                 "items": [
@@ -480,11 +485,11 @@ def delete_workspace(self, workspace_id: str) -> None:
         self._delete_workspace(workspace_id)
 
     def _delete_workspace(self, workspace_id: str) -> None:
-        url = self.api_url + f"api/v1/workspaces/{workspace_id}"
+        url = urljoin(self.api_url, f"api/v1/workspaces/{workspace_id}")
         response = self.session.delete(url)
         response.raise_for_status()
 
     def _delete_dataset(self, dataset_id: str) -> None:
-        url = self.api_url + f"api/v1/datasets/{dataset_id}"
+        url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}")
         response = self.session.delete(url)
         response.raise_for_status()
diff --git a/src/intelligence_layer/connectors/document_index/document_index.py b/src/intelligence_layer/connectors/document_index/document_index.py
index cb6537035..dfb3cbc05 100644
--- a/src/intelligence_layer/connectors/document_index/document_index.py
+++ b/src/intelligence_layer/connectors/document_index/document_index.py
@@ -5,7 +5,7 @@
 from http import HTTPStatus
 from json import dumps
 from typing import Annotated, Any, Literal, Optional, Union
-from urllib.parse import quote
+from urllib.parse import quote, urljoin
 
 import requests
 from pydantic import BaseModel, Field, field_validator, model_validator
@@ -434,7 +434,7 @@ def list_namespaces(self) -> Sequence[str]:
         Returns:
             List of all available namespaces.
         """
-        url = f"{self._base_document_index_url}/namespaces"
+        url = urljoin(self._base_document_index_url, "namespaces")
         response = requests.get(url, headers=self.headers)
         self._raise_for_status(response)
         return [str(namespace) for namespace in response.json()]
@@ -448,7 +448,10 @@ def create_collection(self, collection_path: CollectionPath) -> None:
         Args:
             collection_path: Path to the collection of interest.
         """
-        url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}"
+        url_suffix = (
+            f"/collections/{collection_path.namespace}/{collection_path.collection}"
+        )
+        url = urljoin(self._base_document_index_url, url_suffix)
         response = requests.put(url, headers=self.headers)
         self._raise_for_status(response)
 
@@ -458,7 +461,10 @@ def delete_collection(self, collection_path: CollectionPath) -> None:
         Args:
             collection_path: Path to the collection of interest.
         """
-        url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}"
+        url_suffix = (
+            f"collections/{collection_path.namespace}/{collection_path.collection}"
+        )
+        url = urljoin(self._base_document_index_url, url_suffix)
         response = requests.delete(url, headers=self.headers)
         self._raise_for_status(response)
 
@@ -472,7 +478,8 @@ def list_collections(self, namespace: str) -> Sequence[CollectionPath]:
         Returns:
             List of all `CollectionPath` instances in the given namespace.
         """
-        url = f"{self._base_document_index_url}/collections/{namespace}"
+        url_suffix = f"collections/{namespace}"
+        url = urljoin(self._base_document_index_url, url_suffix)
         response = requests.get(url, headers=self.headers)
         self._raise_for_status(response)
         return [
@@ -489,7 +496,8 @@ def create_index(
             index_path: Path to the index.
             index_configuration: Configuration of the index to be created.
         """
-        url = f"{self._base_document_index_url}/indexes/{index_path.namespace}/{index_path.index}"
+        url_suffix = f"indexes/{index_path.namespace}/{index_path.index}"
+        url = urljoin(self._base_document_index_url, url_suffix)
 
         data = {
             "chunk_size": index_configuration.chunk_size,
@@ -537,7 +545,9 @@ def index_configuration(self, index_path: IndexPath) -> IndexConfiguration:
         Returns:
             Configuration of the index.
         """
-        url = f"{self._base_document_index_url}/indexes/{index_path.namespace}/{index_path.index}"
+        url_suffix = f"indexes/{index_path.namespace}/{index_path.index}"
+        url = urljoin(self._base_document_index_url, url_suffix)
+
         response = requests.get(url, headers=self.headers)
         self._raise_for_status(response)
         response_json: Mapping[str, Any] = response.json()
@@ -556,7 +566,9 @@ def assign_index_to_collection(
             collection_path: Path to the collection of interest.
             index_name: Name of the index.
         """
-        url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}"
+        url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}"
+        url = urljoin(self._base_document_index_url, url_suffix)
+
         response = requests.put(url, headers=self.headers)
         self._raise_for_status(response)
 
@@ -597,7 +609,9 @@ def delete_index_from_collection(
             index_name: Name of the index.
             collection_path: Path to the collection of interest.
         """
-        url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}"
+        url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}"
+        url = urljoin(self._base_document_index_url, url_suffix)
+
         response = requests.delete(url, headers=self.headers)
         self._raise_for_status(response)
 
@@ -625,7 +639,9 @@ def list_assigned_index_names(
         Returns:
             List of all indexes that are assigned to the collection.
         """
-        url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes"
+        url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes"
+        url = urljoin(self._base_document_index_url, url_suffix)
+
         response = requests.get(url, headers=self.headers)
         self._raise_for_status(response)
         return [str(index_name) for index_name in response.json()]
@@ -676,7 +692,9 @@ def add_document(
             contents: Actual content of the document.
                 Currently only supports text.
         """
-        url = f"{self._base_document_index_url}/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
+        url_suffix = f"collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
+        url = urljoin(self._base_document_index_url, url_suffix)
+
         response = requests.put(
             url, data=dumps(contents._to_modalities_json()), headers=self.headers
         )
@@ -688,7 +706,9 @@ def delete_document(self, document_path: DocumentPath) -> None:
         Args:
             document_path: Consists of `collection_path` and name of document to be deleted.
         """
-        url = f"{self._base_document_index_url}/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
+        url_suffix = f"/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
+        url = urljoin(self._base_document_index_url, url_suffix)
+
         response = requests.delete(url, headers=self.headers)
         self._raise_for_status(response)
 
@@ -701,7 +721,9 @@ def document(self, document_path: DocumentPath) -> DocumentContents:
         Returns:
             Content of the retrieved document.
         """
-        url = f"{self._base_document_index_url}/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
+        url_suffix = f"collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
+        url = urljoin(self._base_document_index_url, url_suffix)
+
         response = requests.get(url, headers=self.headers)
         self._raise_for_status(response)
         return DocumentContents._from_modalities_json(response.json())
@@ -728,7 +750,10 @@ def documents(
                 max_documents=None, starts_with=None
             )
 
-        url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/docs"
+        url_suffix = (
+            f"collections/{collection_path.namespace}/{collection_path.collection}/docs"
+        )
+        url = urljoin(self._base_document_index_url, url_suffix)
 
         query_params = {}
         if filter_query_params.max_documents:
@@ -756,7 +781,8 @@ def search(
         Returns:
             Result of the search operation. Will be empty if nothing was retrieved.
         """
-        url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}/search"
+        url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}/search"
+        url = urljoin(self._base_document_index_url, url_suffix)
 
         filters: list[dict[str, Any]] = [{"with": [{"modality": "text"}]}]
         if search_query.filters:
diff --git a/src/intelligence_layer/core/tracer/tracer.py b/src/intelligence_layer/core/tracer/tracer.py
index 0beb67c19..6bb8dca0d 100644
--- a/src/intelligence_layer/core/tracer/tracer.py
+++ b/src/intelligence_layer/core/tracer/tracer.py
@@ -8,6 +8,7 @@
 from enum import Enum
 from types import TracebackType
 from typing import TYPE_CHECKING, Literal, Optional, Union
+from urllib.parse import urljoin
 from uuid import UUID, uuid4
 
 import requests
@@ -112,7 +113,7 @@ def submit_to_trace_viewer(exported_spans: Sequence[ExportedSpan]) -> bool:
 
     """
     trace_viewer_url = os.getenv("TRACE_VIEWER_URL", "http://localhost:3000")
-    trace_viewer_trace_upload = f"{trace_viewer_url}/trace"
+    trace_viewer_trace_upload = urljoin(trace_viewer_url, "/trace")
     try:
         res = requests.post(
             trace_viewer_trace_upload,
diff --git a/tests/core/tracer/test_in_memory_tracer.py b/tests/core/tracer/test_in_memory_tracer.py
index fa41c1487..ffb4c81a5 100644
--- a/tests/core/tracer/test_in_memory_tracer.py
+++ b/tests/core/tracer/test_in_memory_tracer.py
@@ -185,7 +185,7 @@ def set_env(name: str, value: str | None) -> Iterator[None]:
         os.environ.update(old_environ)
 
 
-def test_in_memory_tracer_trace_viewer_doesnt_crash_if_it_cant_reach_document_index() -> (
+def test_in_memory_tracer_submit_to_trace_viewer_doesnt_crash_if_it_cant_reach_the_trace_viewer() -> (
     None
 ):
     # note that this test sets the environment variable, which might