Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Use urllib.urljoin where appropriate #986

Merged
merged 2 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 23 additions & 18 deletions src/intelligence_layer/connectors/argilla/default_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
TypeVar,
cast,
)
from urllib.parse import urljoin
from uuid import uuid4

from pydantic import BaseModel, computed_field
Expand Down Expand Up @@ -259,7 +260,9 @@ def split_dataset(self, dataset_id: str, n_splits: int) -> None:

def _create_metadata_property(self, dataset_id: str, n_splits: int) -> None:
response = self.session.get(
f"{self.api_url}api/v1/me/datasets/{dataset_id}/metadata-properties"
urljoin(
self.api_url, f"api/v1/me/datasets/{dataset_id}/metadata-properties"
)
)
response.raise_for_status()
existing_split_id = [
Expand All @@ -269,7 +272,9 @@ def _create_metadata_property(self, dataset_id: str, n_splits: int) -> None:
]
if len(existing_split_id) > 0:
self.session.delete(
f"{self.api_url}api/v1/metadata-properties/{existing_split_id[0]}"
urljoin(
self.api_url, f"api/v1/metadata-properties/{existing_split_id[0]}"
)
)

data = {
Expand All @@ -281,7 +286,7 @@ def _create_metadata_property(self, dataset_id: str, n_splits: int) -> None:
}

response = self.session.post(
f"{self.api_url}api/v1/datasets/{dataset_id}/metadata-properties",
urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/metadata-properties"),
json=data,
)
response.raise_for_status()
Expand Down Expand Up @@ -314,14 +319,14 @@ def chunks(
]
}
response = self.session.patch(
f"{self.api_url}api/v1/datasets/{dataset_id}/records",
urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records"),
json=data,
)
response.raise_for_status()

def _evaluations(self, dataset_id: str) -> Mapping[str, Any]:
response = self.session.get(
self.api_url + f"api/v1/datasets/{dataset_id}/records",
urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records"),
params={"response_status": "submitted", "include": "responses"},
)
response.raise_for_status()
Expand All @@ -340,7 +345,7 @@ def records(self, dataset_id: str) -> Iterable[Record]:

def create_evaluation(self, evaluation: ArgillaEvaluation) -> None:
response = self.session.post(
self.api_url + f"api/v1/records/{evaluation.record_id}/responses",
urljoin(self.api_url, f"api/v1/records/{evaluation.record_id}/responses"),
json={
"status": "submitted",
"values": {
Expand All @@ -352,13 +357,13 @@ def create_evaluation(self, evaluation: ArgillaEvaluation) -> None:
response.raise_for_status()

def _list_workspaces(self) -> Mapping[str, Any]:
url = self.api_url + "api/v1/me/workspaces"
url = urljoin(self.api_url, "api/v1/me/workspaces")
response = self.session.get(url)
response.raise_for_status()
return cast(Mapping[str, Any], response.json())

def _create_workspace(self, workspace_name: str) -> Mapping[str, Any]:
url = self.api_url + "api/workspaces"
url = urljoin(self.api_url, "api/workspaces")
data = {
"name": workspace_name,
}
Expand All @@ -367,20 +372,20 @@ def _create_workspace(self, workspace_name: str) -> Mapping[str, Any]:
return cast(Mapping[str, Any], response.json())

def _list_datasets(self, workspace_id: str) -> Mapping[str, Any]:
url = self.api_url + f"api/v1/me/datasets?workspace_id={workspace_id}"
url = urljoin(self.api_url, f"api/v1/me/datasets?workspace_id={workspace_id}")
response = self.session.get(url)
response.raise_for_status()
return cast(Mapping[str, Any], response.json())

def _publish_dataset(self, dataset_id: str) -> None:
url = self.api_url + f"api/v1/datasets/{dataset_id}/publish"
url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/publish")
response = self.session.put(url)
response.raise_for_status()

def _create_dataset(
self, name: str, workspace_id: str, guidelines: str = "No guidelines."
) -> Mapping[str, Any]:
url = self.api_url + "api/v1/datasets"
url = urljoin(self.api_url, "api/v1/datasets")
data = {
"name": name,
"guidelines": guidelines,
Expand All @@ -392,13 +397,13 @@ def _create_dataset(
return cast(Mapping[str, Any], response.json())

def _list_fields(self, dataset_id: str) -> Sequence[Any]:
url = self.api_url + f"api/v1/datasets/{dataset_id}/fields"
url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/fields")
response = self.session.get(url)
response.raise_for_status()
return cast(Sequence[Any], response.json())

def _create_field(self, name: str, title: str, dataset_id: str) -> None:
url = self.api_url + f"api/v1/datasets/{dataset_id}/fields"
url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/fields")
data = {
"name": name,
"title": title,
Expand All @@ -416,7 +421,7 @@ def _create_question(
settings: Mapping[str, Any],
dataset_id: str,
) -> None:
url = self.api_url + f"api/v1/datasets/{dataset_id}/questions"
url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/questions")
data = {
"name": name,
"title": title,
Expand All @@ -433,7 +438,7 @@ def _list_records(
optional_params: Optional[Mapping[str, str]] = None,
page_size: int = 50,
) -> Iterable[Mapping[str, Any]]:
url = self.api_url + f"api/v1/datasets/{dataset_id}/records"
url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records")
for offset in count(0, page_size):
params: Mapping[str, str | int] = {
**(optional_params or {}),
Expand All @@ -457,7 +462,7 @@ def _create_records(
records: Sequence[RecordData],
dataset_id: str,
) -> None:
url = self.api_url + f"api/v1/datasets/{dataset_id}/records"
url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}/records")
for batch in batch_iterator(records, 200):
data = {
"items": [
Expand All @@ -480,11 +485,11 @@ def delete_workspace(self, workspace_id: str) -> None:
self._delete_workspace(workspace_id)

def _delete_workspace(self, workspace_id: str) -> None:
url = self.api_url + f"api/v1/workspaces/{workspace_id}"
url = urljoin(self.api_url, f"api/v1/workspaces/{workspace_id}")
response = self.session.delete(url)
response.raise_for_status()

def _delete_dataset(self, dataset_id: str) -> None:
url = self.api_url + f"api/v1/datasets/{dataset_id}"
url = urljoin(self.api_url, f"api/v1/datasets/{dataset_id}")
response = self.session.delete(url)
response.raise_for_status()
56 changes: 41 additions & 15 deletions src/intelligence_layer/connectors/document_index/document_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from http import HTTPStatus
from json import dumps
from typing import Annotated, Any, Literal, Optional, Union
from urllib.parse import quote
from urllib.parse import quote, urljoin

import requests
from pydantic import BaseModel, Field, field_validator, model_validator
Expand Down Expand Up @@ -434,7 +434,7 @@ def list_namespaces(self) -> Sequence[str]:
Returns:
List of all available namespaces.
"""
url = f"{self._base_document_index_url}/namespaces"
url = urljoin(self._base_document_index_url, "namespaces")
response = requests.get(url, headers=self.headers)
self._raise_for_status(response)
return [str(namespace) for namespace in response.json()]
Expand All @@ -448,7 +448,10 @@ def create_collection(self, collection_path: CollectionPath) -> None:
Args:
collection_path: Path to the collection of interest.
"""
url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}"
url_suffix = (
f"/collections/{collection_path.namespace}/{collection_path.collection}"
)
url = urljoin(self._base_document_index_url, url_suffix)
response = requests.put(url, headers=self.headers)
self._raise_for_status(response)

Expand All @@ -458,7 +461,10 @@ def delete_collection(self, collection_path: CollectionPath) -> None:
Args:
collection_path: Path to the collection of interest.
"""
url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}"
url_suffix = (
f"collections/{collection_path.namespace}/{collection_path.collection}"
)
url = urljoin(self._base_document_index_url, url_suffix)
response = requests.delete(url, headers=self.headers)
self._raise_for_status(response)

Expand All @@ -472,7 +478,8 @@ def list_collections(self, namespace: str) -> Sequence[CollectionPath]:
Returns:
List of all `CollectionPath` instances in the given namespace.
"""
url = f"{self._base_document_index_url}/collections/{namespace}"
url_suffix = f"collections/{namespace}"
url = urljoin(self._base_document_index_url, url_suffix)
response = requests.get(url, headers=self.headers)
self._raise_for_status(response)
return [
Expand All @@ -489,7 +496,8 @@ def create_index(
index_path: Path to the index.
index_configuration: Configuration of the index to be created.
"""
url = f"{self._base_document_index_url}/indexes/{index_path.namespace}/{index_path.index}"
url_suffix = f"indexes/{index_path.namespace}/{index_path.index}"
url = urljoin(self._base_document_index_url, url_suffix)

data = {
"chunk_size": index_configuration.chunk_size,
Expand Down Expand Up @@ -537,7 +545,9 @@ def index_configuration(self, index_path: IndexPath) -> IndexConfiguration:
Returns:
Configuration of the index.
"""
url = f"{self._base_document_index_url}/indexes/{index_path.namespace}/{index_path.index}"
url_suffix = f"indexes/{index_path.namespace}/{index_path.index}"
url = urljoin(self._base_document_index_url, url_suffix)

response = requests.get(url, headers=self.headers)
self._raise_for_status(response)
response_json: Mapping[str, Any] = response.json()
Expand All @@ -556,7 +566,9 @@ def assign_index_to_collection(
collection_path: Path to the collection of interest.
index_name: Name of the index.
"""
url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}"
url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}"
url = urljoin(self._base_document_index_url, url_suffix)

response = requests.put(url, headers=self.headers)
self._raise_for_status(response)

Expand Down Expand Up @@ -597,7 +609,9 @@ def delete_index_from_collection(
index_name: Name of the index.
collection_path: Path to the collection of interest.
"""
url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}"
url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}"
url = urljoin(self._base_document_index_url, url_suffix)

response = requests.delete(url, headers=self.headers)
self._raise_for_status(response)

Expand Down Expand Up @@ -625,7 +639,9 @@ def list_assigned_index_names(
Returns:
List of all indexes that are assigned to the collection.
"""
url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes"
url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes"
url = urljoin(self._base_document_index_url, url_suffix)

response = requests.get(url, headers=self.headers)
self._raise_for_status(response)
return [str(index_name) for index_name in response.json()]
Expand Down Expand Up @@ -676,7 +692,9 @@ def add_document(
contents: Actual content of the document.
Currently only supports text.
"""
url = f"{self._base_document_index_url}/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
url_suffix = f"collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
url = urljoin(self._base_document_index_url, url_suffix)

response = requests.put(
url, data=dumps(contents._to_modalities_json()), headers=self.headers
)
Expand All @@ -688,7 +706,9 @@ def delete_document(self, document_path: DocumentPath) -> None:
Args:
document_path: Consists of `collection_path` and name of document to be deleted.
"""
url = f"{self._base_document_index_url}/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
url_suffix = f"/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
url = urljoin(self._base_document_index_url, url_suffix)

response = requests.delete(url, headers=self.headers)
self._raise_for_status(response)

Expand All @@ -701,7 +721,9 @@ def document(self, document_path: DocumentPath) -> DocumentContents:
Returns:
Content of the retrieved document.
"""
url = f"{self._base_document_index_url}/collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
url_suffix = f"collections/{document_path.collection_path.namespace}/{document_path.collection_path.collection}/docs/{document_path.encoded_document_name()}"
url = urljoin(self._base_document_index_url, url_suffix)

response = requests.get(url, headers=self.headers)
self._raise_for_status(response)
return DocumentContents._from_modalities_json(response.json())
Expand All @@ -728,7 +750,10 @@ def documents(
max_documents=None, starts_with=None
)

url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/docs"
url_suffix = (
f"collections/{collection_path.namespace}/{collection_path.collection}/docs"
)
url = urljoin(self._base_document_index_url, url_suffix)

query_params = {}
if filter_query_params.max_documents:
Expand Down Expand Up @@ -756,7 +781,8 @@ def search(
Returns:
Result of the search operation. Will be empty if nothing was retrieved.
"""
url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}/search"
url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}/search"
url = urljoin(self._base_document_index_url, url_suffix)

filters: list[dict[str, Any]] = [{"with": [{"modality": "text"}]}]
if search_query.filters:
Expand Down
3 changes: 2 additions & 1 deletion src/intelligence_layer/core/tracer/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from enum import Enum
from types import TracebackType
from typing import TYPE_CHECKING, Literal, Optional, Union
from urllib.parse import urljoin
from uuid import UUID, uuid4

import requests
Expand Down Expand Up @@ -112,7 +113,7 @@ def submit_to_trace_viewer(exported_spans: Sequence[ExportedSpan]) -> bool:

"""
trace_viewer_url = os.getenv("TRACE_VIEWER_URL", "http://localhost:3000")
trace_viewer_trace_upload = f"{trace_viewer_url}/trace"
trace_viewer_trace_upload = urljoin(trace_viewer_url, "/trace")
try:
res = requests.post(
trace_viewer_trace_upload,
Expand Down
2 changes: 1 addition & 1 deletion tests/core/tracer/test_in_memory_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def set_env(name: str, value: str | None) -> Iterator[None]:
os.environ.update(old_environ)


def test_in_memory_tracer_trace_viewer_doesnt_crash_if_it_cant_reach_document_index() -> (
def test_in_memory_tracer_submit_to_trace_viewer_doesnt_crash_if_it_cant_reach_the_trace_viewer() -> (
None
):
# note that this test sets the environment variable, which might
Expand Down
3 changes: 2 additions & 1 deletion tests/examples/qa/test_single_chunk_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ def test_qa_with_logit_bias_for_no_answer(
" ".join([first_token] * max_tokens),
first_token * max_tokens,
]
assert any(output.answer == a for a in acceptable_answers)
answer = output.answer
assert answer == acceptable_answers[0] or answer == acceptable_answers[1]


def test_qa_highlights_will_not_become_out_of_bounds(
Expand Down