From cad29980fbe60b187bc4b7a8bd1b5a433c7da9a8 Mon Sep 17 00:00:00 2001 From: "filippo.bergamin" Date: Fri, 9 Aug 2024 14:59:36 +0200 Subject: [PATCH 1/6] [STUD-22] add function to return progress --- .../connectors/document_index/document_index.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/intelligence_layer/connectors/document_index/document_index.py b/src/intelligence_layer/connectors/document_index/document_index.py index 21a462490..637f73dd8 100644 --- a/src/intelligence_layer/connectors/document_index/document_index.py +++ b/src/intelligence_layer/connectors/document_index/document_index.py @@ -467,6 +467,20 @@ def delete_index_from_collection( response = requests.delete(url, headers=self.headers) self._raise_for_status(response) + def progress( + self, collection_path: CollectionPath + ) -> int: + """Get the number of unembedded documents in a collection. + Args: + collection_path: Path to the collection of interest. + Returns: + The number of unembedded documents in a collection. + """ + url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/progress" + response = requests.get(url, headers=self.headers) + self._raise_for_status(response) + return int(response.text) + def list_assigned_index_names( self, collection_path: CollectionPath ) -> Sequence[str]: From 19aa2d228e289c828cc80cf7708986721bf97485 Mon Sep 17 00:00:00 2001 From: "filippo.bergamin" Date: Fri, 9 Aug 2024 16:59:56 +0200 Subject: [PATCH 2/6] fix: linting --- .../connectors/document_index/document_index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/intelligence_layer/connectors/document_index/document_index.py b/src/intelligence_layer/connectors/document_index/document_index.py index 637f73dd8..86c9da620 100644 --- a/src/intelligence_layer/connectors/document_index/document_index.py +++ b/src/intelligence_layer/connectors/document_index/document_index.py @@ -467,12 +467,12 @@ def delete_index_from_collection( response = requests.delete(url, headers=self.headers) self._raise_for_status(response) - def progress( - self, collection_path: CollectionPath - ) -> int: + def progress(self, collection_path: CollectionPath) -> int: """Get the number of unembedded documents in a collection. + Args: collection_path: Path to the collection of interest. + Returns: The number of unembedded documents in a collection. """ From 110cf30278bf5fc47ac95f5adfc6e11d1d8d7cb2 Mon Sep 17 00:00:00 2001 From: "filippo.bergamin" Date: Fri, 9 Aug 2024 17:12:47 +0200 Subject: [PATCH 3/6] fix: added test --- tests/connectors/document_index/test_document_index.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/connectors/document_index/test_document_index.py b/tests/connectors/document_index/test_document_index.py index 6cc69a555..4be978d30 100644 --- a/tests/connectors/document_index/test_document_index.py +++ b/tests/connectors/document_index/test_document_index.py @@ -258,3 +258,9 @@ def test_document_indexes_are_returned( assert index_configuration.embedding_type == "asymmetric" assert index_configuration.chunk_overlap == 0 assert index_configuration.chunk_size == 512 + +def test_document_indexes_zero_progress_is_returned( + document_index: DocumentIndexClient, collection_path: CollectionPath +) -> None: + progress = document_index.progress(collection_path) + assert progress == 0 \ No newline at end of file From 460106da47765c19f0d065868dec975787178afb Mon Sep 17 00:00:00 2001 From: "filippo.bergamin" Date: Fri, 9 Aug 2024 17:16:39 +0200 Subject: [PATCH 4/6] Modified changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f266d7e6..b61deb676 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Features - Add `StudioClient` as connector to PhariaStudio for submitting traces. - You can now specify a `chunk_overlap` when creating an index in the Document Index. +- Add support for monitoring progress in the document index connector when embedding documents. ### Fixes ... From dfc9bfbd922ef2c30e5f29aa4c31910a04c11bea Mon Sep 17 00:00:00 2001 From: "filippo.bergamin" Date: Fri, 9 Aug 2024 17:25:52 +0200 Subject: [PATCH 5/6] Fix: lint --- tests/connectors/document_index/test_document_index.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/connectors/document_index/test_document_index.py b/tests/connectors/document_index/test_document_index.py index 4be978d30..d3ab83ee2 100644 --- a/tests/connectors/document_index/test_document_index.py +++ b/tests/connectors/document_index/test_document_index.py @@ -259,8 +259,9 @@ def test_document_indexes_are_returned( assert index_configuration.chunk_overlap == 0 assert index_configuration.chunk_size == 512 + def test_document_indexes_zero_progress_is_returned( document_index: DocumentIndexClient, collection_path: CollectionPath ) -> None: progress = document_index.progress(collection_path) - assert progress == 0 \ No newline at end of file + assert progress == 0 From c63c22dd229eb8870e56e54f0706d02273695d4b Mon Sep 17 00:00:00 2001 From: "filippo.bergamin" Date: Fri, 9 Aug 2024 18:05:56 +0200 Subject: [PATCH 6/6] Fix: test --- tests/connectors/document_index/test_document_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/connectors/document_index/test_document_index.py b/tests/connectors/document_index/test_document_index.py index d3ab83ee2..c2262000d 100644 --- a/tests/connectors/document_index/test_document_index.py +++ b/tests/connectors/document_index/test_document_index.py @@ -198,7 +198,7 @@ def test_document_list_all_documents( ) -> None: filter_result = document_index.documents(collection_path) - assert len(filter_result) == 3 + assert len(filter_result) == 5 def test_document_list_max_n_documents(