From 3aed7b78b2ccd54f0f9306f9ef1acad174d12b3c Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Tue, 19 Sep 2023 03:43:13 +0300 Subject: [PATCH] [BUG]: Fixed BF index overflow issue with subsequent delete (#1150) Refs: #989 ## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - When the BF index overflows (batch_size upon insertion of large batch it is cleared, if a subsequent delete request comes to delete Ids which were in the cleared BF index a warning is raised for non-existent embedding. The issue was resolved by separately checking if BF the record exists in the BF index and conditionally execute the BF removal ## Test plan *How are these changes tested?* - [x] Tests pass locally with `pytest` for python ## Documentation Changes N/A --- chromadb/segment/impl/vector/local_persistent_hnsw.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/chromadb/segment/impl/vector/local_persistent_hnsw.py b/chromadb/segment/impl/vector/local_persistent_hnsw.py index a0b52acd07a..6e1df7b1f1f 100644 --- a/chromadb/segment/impl/vector/local_persistent_hnsw.py +++ b/chromadb/segment/impl/vector/local_persistent_hnsw.py @@ -225,11 +225,13 @@ def _write_records(self, records: Sequence[EmbeddingRecord]) -> None: exists_in_index = self._id_to_label.get( id, None ) is not None or self._brute_force_index.has_id(id) + exists_in_bf_index = self._brute_force_index.has_id(id) if op == Operation.DELETE: if exists_in_index: self._curr_batch.apply(record) - self._brute_force_index.delete([record]) + if exists_in_bf_index: + self._brute_force_index.delete([record]) else: logger.warning(f"Delete of nonexisting embedding ID: {id}")