Mix.install([
{:sqlite_vec, github: "joelpaulkoch/sqlite_vec"},
{:ecto, "~> 3.12"},
{:ecto_sql, "~> 3.12"},
{:ecto_sqlite3, "~> 0.17.2"},
{:kino, "~> 0.14.1"},
{:nx, "~> 0.9.1"}
])
You can load the extension in the runtime configuration like so:
config :ecto_sqlite3, load_extensions: [SqliteVec.path()]
Next, we must define our Repo.
defmodule MyApp.Repo do
use Ecto.Repo,
otp_app: :my_app,
adapter: Ecto.Adapters.SQLite3
end
db_path = Path.join(System.tmp_dir!(), "demo.db")
Kino.start_child({MyApp.Repo, database: db_path, load_extensions: [SqliteVec.path()]})
You can use the vector types in any regular table. Note however, that using virtual tables as described below will be faster.
defmodule MyApp.Repo.Migrations.CreateEmbeddingsTable do
use Ecto.Migration
def up do
execute("CREATE TABLE embeddings(id INTEGER PRIMARY KEY, embedding float[4], metadata TEXT)")
end
def down do
execute("DROP TABLE embeddings")
end
end
Ecto.Migrator.up(MyApp.Repo, 1, MyApp.Repo.Migrations.CreateEmbeddingsTable)
Schema definition:
defmodule Embedding do
use Ecto.Schema
schema "embeddings" do
field(:embedding, SqliteVec.Ecto.Float32)
field(:metadata, :string)
end
end
Insert some vectors:
MyApp.Repo.insert(%Embedding{
embedding: SqliteVec.Float32.new([1, 2, 3, 4])
})
MyApp.Repo.insert(%Embedding{
embedding: SqliteVec.Float32.new([3, 4, 5, 6])
})
MyApp.Repo.insert(%Embedding{
embedding: SqliteVec.Float32.new(Nx.tensor([3, 4, 5, 6], type: :f32)),
metadata: "from tensor"
})
Query them:
import Ecto.Query
import SqliteVec.Ecto.Query
v = SqliteVec.Float32.new([2, 2, 3, 3])
MyApp.Repo.all(
from(i in Embedding,
order_by: vec_distance_L2(i.embedding, vec_f32(v))
)
)
You can create a virtual table to enable fast KNN queries. Since version 0.1.6, auxiliary columns and metadata are available in virtual tables. Please refer to this documentation to decide what's best for your use case.
defmodule MyApp.Repo.Migrations.CreateVirtualEmbeddingsTable do
use Ecto.Migration
def up do
execute(
"CREATE VIRTUAL TABLE virtual_embeddings_table USING vec0(id INTEGER PRIMARY KEY, embedding float[2], metadata TEXT, +auxiliary_column TEXT)"
)
end
def down do
execute("DROP VIRTUAL TABLE virtual_embeddings_table")
end
end
Ecto.Migrator.up(MyApp.Repo, 2, MyApp.Repo.Migrations.CreateVirtualEmbeddingsTable)
defmodule VirtualEmbedding do
use Ecto.Schema
schema "virtual_embeddings_table" do
field(:embedding, SqliteVec.Ecto.Float32)
field(:metadata, :string)
field(:auxiliary_column, :string)
end
end
MyApp.Repo.insert(%VirtualEmbedding{
embedding: SqliteVec.Float32.new([0, 0]),
metadata: "first vector",
auxiliary_column: "first vector"
})
MyApp.Repo.insert(%VirtualEmbedding{
embedding: SqliteVec.Float32.new([1, 1]),
metadata: "second vector",
auxiliary_column: "second vector"
})
MyApp.Repo.insert(%VirtualEmbedding{
embedding: SqliteVec.Float32.new([2, 3]),
metadata: "third vector",
auxiliary_column: "third vector"
})
You can perform a K-nearest-neighbors query using match
and limit
.
import Ecto.Query
import SqliteVec.Ecto.Query
v = SqliteVec.Float32.new([2, 2])
MyApp.Repo.all(
from(i in VirtualEmbedding,
where: match(i.embedding, vec_f32(v)),
limit: 3
)
)