forked from nilp0inter/pgai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
delayed_embed.sql
50 lines (37 loc) · 1.64 KB
/
delayed_embed.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
select set_config('ai.openai_api_key', :'OPENAI_API_KEY', false) is not null as set_config;
CREATE EXTENSION IF NOT EXISTS ai CASCADE;
CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE;
CREATE TABLE IF NOT EXISTS document_embedding (
id BIGINT PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
metadata JSONB,
contents TEXT,
embedding VECTOR(1536)
);
CREATE INDEX document_embedding_idx ON document_embedding
USING diskann (embedding);
-- imagine that we're going to build a background job with timescaledb action
-- that will populate the embedding column with the embeddings of the contents
-- the content will be populated using the `ai` extension and the `vectorscale`
-- extension. The `ai` extension will be used to tokenize the text and the `vectorscale`
-- extension will be used to generate the embeddings.
-- The following is a simple example of how to populate the embedding column
-- with the embeddings of the contents.
create or replace function populate_embedding(job_id int, config jsonb) returns void as $$
declare
r record;
api_key text;
begin
api_key := config->>'api_key';
for r in select id, contents from document_embedding where embedding is null limit 1 for update skip locked loop
update document_embedding
set embedding = openai_embed('text-embedding-ada-002', r.contents, api_key)
where id = r.id;
end loop;
end;
$$ language plpgsql;
SELECT add_job('populate_embedding','10s', fixed_schedule => true,
config => format('{"api_key": "%s"}', :'OPENAI_API_KEY')::jsonb);
insert into document_embedding (contents) values
('Test document embedding');
select pg_sleep(10);
select * from document_embedding;