Skip to content

Commit

Permalink
Store updated timestamp on embeddings, closes #211
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Sep 3, 2023
1 parent 51488c5 commit 73a9043
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 1 deletion.
3 changes: 3 additions & 0 deletions llm/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
from sqlite_utils import Database
from sqlite_utils.db import Table
import time
from typing import cast, Any, Dict, Iterable, List, Optional, Tuple


Expand Down Expand Up @@ -133,6 +134,7 @@ def embed(
"embedding": encode(embedding),
"content": text if store else None,
"metadata": json.dumps(metadata) if metadata else None,
"updated": int(time.time()),
},
replace=True,
)
Expand Down Expand Up @@ -184,6 +186,7 @@ def embed_multi_with_metadata(
"embedding": llm.encode(embedding),
"content": text if store else None,
"metadata": json.dumps(metadata) if metadata else None,
"updated": int(time.time()),
}
for (embedding, (id, text, metadata)) in zip(embeddings, batch)
),
Expand Down
12 changes: 12 additions & 0 deletions llm/embeddings_migrations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from sqlite_migrate import Migrations
import time

embeddings_migrations = Migrations("llm.embeddings")

Expand All @@ -22,3 +23,14 @@ def m001_create_tables(db):
@embeddings_migrations()
def m002_foreign_key(db):
db["embeddings"].add_foreign_key("collection_id", "collections", "id")


@embeddings_migrations()
def m003_add_updated(db):
db["embeddings"].add_column("updated", int)
# Pretty-print the schema
db["embeddings"].transform()
# Assume anything existing was last updated right now
db.query(
"update embeddings set updated = ? where updated is null", [int(time.time())]
)
6 changes: 5 additions & 1 deletion tests/test_embed.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import json
import llm
from llm.embeddings import Entry
import sqlite_utils
import pytest
import sqlite_utils
from unittest.mock import ANY


@pytest.fixture
Expand Down Expand Up @@ -65,15 +66,18 @@ def test_collection(collection):
"embedding": llm.encode([5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
"content": None,
"metadata": None,
"updated": ANY,
},
{
"collection_id": 1,
"id": "2",
"embedding": llm.encode([7, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
"content": None,
"metadata": None,
"updated": ANY,
},
]
assert isinstance(rows[0]["updated"], int) and rows[0]["updated"] > 0


def test_similar(collection):
Expand Down
2 changes: 2 additions & 0 deletions tests/test_embed_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import pytest
import sqlite_utils
from unittest.mock import ANY


@pytest.mark.parametrize(
Expand Down Expand Up @@ -98,6 +99,7 @@ def test_embed_store(user_path):
),
"content": None,
"metadata": None,
"updated": ANY,
}
]
# Should show up in 'llm embed-db collections'
Expand Down
1 change: 1 addition & 0 deletions tests/test_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def test_migrations_for_embeddings():
"embedding": bytes,
"content": str,
"metadata": str,
"updated": int,
}
assert db["embeddings"].foreign_keys[0].column == "collection_id"
assert db["embeddings"].foreign_keys[0].other_table == "collections"

0 comments on commit 73a9043

Please sign in to comment.