Skip to content

Commit

Permalink
embed-db delete-collection command and .delete() method, closes #219
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Sep 3, 2023
1 parent 87af2dd commit 213e0b0
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 10 deletions.
10 changes: 10 additions & 0 deletions docs/embeddings/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,13 @@ Add `-d/--database` to specify a different database file:
```bash
llm embed-db collections -d my-embeddings.db
```
## llm embed-db delete-collection

To delete a collection from the database, run this:
```bash
llm embed-db delete-collection collection-name
```
Pass `-d` to specify a different database file:
```bash
llm embed-db delete-collection collection-name -d my-embeddings.db
```
1 change: 1 addition & 0 deletions docs/embeddings/python-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ A collection instance has the following properties and methods:
- `similar(query: str, number: int=10)` - returns a list of entries that are most similar to the embedding of the given query string
- `similar_by_id(id: str, number: int=10)` - returns a list of entries that are most similar to the embedding of the item with the given ID
- `similar_by_vector(vector: List[float], number: int=10, skip_id: str=None)` - returns a list of entries that are most similar to the given embedding vector, optionally skipping the entry with the given ID
- `delete()` - deletes the collection and its embeddings from the database

(embeddings-python-similar)=
## Retrieving similar items
Expand Down
19 changes: 17 additions & 2 deletions docs/help.md
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,9 @@ Options:
--help Show this message and exit.
Commands:
collections Output the path to the embeddings database
path Output the path to the embeddings database
collections Output the path to the embeddings database
delete-collection Delete the specified collection
path Output the path to the embeddings database
```
#### llm embed-db path --help
```
Expand All @@ -486,6 +487,20 @@ Options:
--json Output as JSON
--help Show this message and exit.
```
#### llm embed-db delete-collection --help
```
Usage: llm embed-db delete-collection [OPTIONS] COLLECTION
Delete the specified collection
Example usage:
llm embed-db delete-collection my-collection
Options:
-d, --database FILE Path to embeddings database
--help Show this message and exit.
```
### llm openai --help
```
Usage: llm openai [OPTIONS] COMMAND [ARGS]...
Expand Down
27 changes: 27 additions & 0 deletions llm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,33 @@ def embed_db_collections(database, json_):
)


@embed_db.command(name="delete-collection")
@click.argument("collection")
@click.option(
"-d",
"--database",
type=click.Path(file_okay=True, allow_dash=False, dir_okay=False, writable=True),
envvar="LLM_EMBEDDINGS_DB",
help="Path to embeddings database",
)
def embed_db_delete_collection(collection, database):
"""
Delete the specified collection
Example usage:
\b
llm embed-db delete-collection my-collection
"""
database = database or (user_dir() / "embeddings.db")
db = sqlite_utils.Database(str(database))
try:
collection_obj = Collection(collection, db, create=False)
except Collection.DoesNotExist:
raise click.ClickException("Collection does not exist")
collection_obj.delete()


def template_dir():
path = user_dir() / "templates"
path.mkdir(parents=True, exist_ok=True)
Expand Down
8 changes: 8 additions & 0 deletions llm/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,14 @@ def similar(self, text: str, number: int = 10) -> List[Entry]:
comparison_vector = self.model().embed(text)
return self.similar_by_vector(comparison_vector, number)

def delete(self):
"""
Delete the collection and its embeddings from the database
"""
with self.db.conn:
self.db.execute("delete from embeddings where collection_id = ?", [self.id])
self.db.execute("delete from collections where id = ?", [self.id])

@staticmethod
def content_hash(text: str) -> bytes:
"Hash content for deduplication. Override to change hashing behavior."
Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,11 @@ def mocked_localai(requests_mock):
},
headers={"Content-Type": "application/json"},
)


@pytest.fixture
def collection():
collection = llm.Collection("test", model_id="embed-demo")
collection.embed(1, "hello world")
collection.embed(2, "goodbye world")
return collection
17 changes: 9 additions & 8 deletions tests/test_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@
from unittest.mock import ANY


@pytest.fixture
def collection():
collection = llm.Collection("test", model_id="embed-demo")
collection.embed(1, "hello world")
collection.embed(2, "goodbye world")
return collection


def test_demo_plugin():
model = llm.get_embedding_model("embed-demo")
assert model.embed("hello world") == [5, 5] + [0] * 14
Expand Down Expand Up @@ -118,3 +110,12 @@ def test_embed_multi(with_metadata):
else:
assert len(rows_with_metadata) == 0
assert len(rows_with_content) == 1000


def test_collection_delete(collection):
db = collection.db
assert db["embeddings"].count == 2
assert db["collections"].count == 1
collection.delete()
assert db["embeddings"].count == 0
assert db["collections"].count == 0
19 changes: 19 additions & 0 deletions tests/test_embed_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,25 @@ def test_embed_store(user_path, metadata, metadata_error):
else:
assert result2.output == "items: embed-demo\n 1 embedding\n"

# And test deleting it too
result = runner.invoke(cli, ["embed-db", "delete-collection", "items"])
assert result.exit_code == 0
assert db["collections"].count == 0
assert db["embeddings"].count == 0


def test_collection_delete_errors(user_path):
db = sqlite_utils.Database(str(user_path / "embeddings.db"))
collection = Collection("items", db, model_id="embed-demo")
collection.embed("1", "hello")
assert db["collections"].count == 1
assert db["embeddings"].count == 1
runner = CliRunner()
result = runner.invoke(cli, ["embed-db", "delete-collection", "does-not-exist"])
assert result.exit_code == 1
assert "Collection does not exist" in result.output
assert db["collections"].count == 1


@pytest.mark.parametrize(
"args,expected_error",
Expand Down

0 comments on commit 213e0b0

Please sign in to comment.