Skip to content

Commit

Permalink
feat: add-progress-fetching-to-di-connector (#982)
Browse files Browse the repository at this point in the history
Task: STUD-22

---------

Co-authored-by: Niklas Köhnecke <[email protected]>
  • Loading branch information
filippoberga and NiklasKoehneckeAA authored Aug 14, 2024
1 parent cad4298 commit 8e45223
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- Add documentation for filtering to `document_index.ipynb`.
- Add `StudioClient` as a connector for submitting traces.
- You can now specify a `chunk_overlap` when creating an index in the Document Index.
- Add support for monitoring progress in the document index connector when embedding documents.

### Fixes
- TaskSpan now properly sets its status to `Error` on crash.
Expand Down
14 changes: 14 additions & 0 deletions src/intelligence_layer/connectors/document_index/document_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,20 @@ def delete_filter_index_from_namespace(
response = requests.delete(url, headers=self.headers)
self._raise_for_status(response)

def progress(self, collection_path: CollectionPath) -> int:
"""Get the number of unembedded documents in a collection.
Args:
collection_path: Path to the collection of interest.
Returns:
The number of unembedded documents in a collection.
"""
url = f"{self._base_document_index_url}/collections/{collection_path.namespace}/{collection_path.collection}/progress"
response = requests.get(url, headers=self.headers)
self._raise_for_status(response)
return int(response.text)

def list_assigned_index_names(
self, collection_path: CollectionPath
) -> Sequence[str]:
Expand Down
7 changes: 7 additions & 0 deletions tests/connectors/document_index/test_document_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,3 +703,10 @@ def test_search_with_filter_type_with_one_of(
assert len(results) == 2
assert results[0].document_path.document_name == "document-metadata-1"
assert results[1].document_path.document_name == "document-metadata-2"


def test_document_indexes_zero_progress_is_returned(
document_index: DocumentIndexClient, collection_path: CollectionPath
) -> None:
progress = document_index.progress(collection_path)
assert progress == 0

0 comments on commit 8e45223

Please sign in to comment.