Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding nullable column to postgres images table with type Vector to s… #357

Open
wants to merge 12 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y ffmpeg cmake swig libavcodec-dev libavformat-dev
RUN ln -s /usr/bin/ffmpeg /usr/local/bin/ffmpeg

#RUN git clone https://github.com/pgvector/pgvector-python.git
#RUN cd pgvector-python && pip install -r requirements.txt

# Copy necessary threatexchange folders
COPY ./threatexchange/tmk/cpp /app/threatexchange/tmk/cpp
COPY ./threatexchange/pdq/cpp /app/threatexchange/pdq/cpp
Expand Down
4 changes: 3 additions & 1 deletion app/main/model/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from app.main import db
from app.main.lib.image_hash import compute_phash_int, sha256_stream, compute_phash_int, compute_pdq
from pgvector.sqlalchemy import Vector

logging.basicConfig(level=logging.INFO)

Expand All @@ -22,7 +23,7 @@ class ImageModel(db.Model):
doc_id = db.Column(db.String(64, convert_unicode=True), nullable=True, index=True, unique=True)
phash = db.Column(db.BigInteger, nullable=True, index=True)
pdq = db.Column(BIT(256), nullable=True, index=True)

sscd = db.Column(Vector(512), nullable=True, index=True)
url = db.Column(db.String(255, convert_unicode=True), nullable=False, index=True)
context = db.Column(JSONB(), default=[], nullable=False)
created_at = db.Column(db.DateTime, nullable=True)
Expand All @@ -43,6 +44,7 @@ def from_url(url, doc_id, context={}, created_at=None):
raw = remote_response.read()
im = Image.open(io.BytesIO(raw)).convert('RGB')
phash = compute_phash_int(im)
sscd = None
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused variable 'sscd'

try:
pdq = compute_pdq(io.BytesIO(raw))
except:
Expand Down
8 changes: 8 additions & 0 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from app.main.lib.language_analyzers import init_indices
from app.main.lib.image_hash import compute_phash_int
from PIL import Image
from sqlalchemy import text
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Imports from package sqlalchemy are not grouped


# Don't remove this line until https://github.com/tensorflow/tensorflow/issues/34607 is fixed
# (by upgrading to tensorflow 2.2 or higher)
Expand Down Expand Up @@ -229,6 +230,13 @@ def init_perl_functions():
LANGUAGE plperl;
""")
)
sqlalchemy.event.listen(
db.metadata,
'before_create',
DDL("""
CREATE EXTENSION IF NOT EXISTS vector;
""")
)
db.create_all()

@manager.command
Expand Down
30 changes: 30 additions & 0 deletions migrations/versions/61ac93be86b2_create_sscd_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""create sscd column

Revision ID: 61ac93be86b2
Revises: e495509fad52
Create Date: 2023-11-06 20:57:37.335903

"""
from alembic import op
import sqlalchemy as sa
from pgvector.sqlalchemy import Vector
from app.main import create_app, db


# revision identifiers, used by Alembic.
revision = '61ac93be86b2'
down_revision = 'e495509fad52'
branch_labels = None
depends_on = None


def upgrade():
# op.add_column('images', sa.Column('sscd', Vector(256), nullable=True))
pass


def downgrade():
# op.drop_index(op.f('ix_images_sscd'), table_name='images')
# op.drop_column('images', 'sscd')
pass

1 change: 1 addition & 0 deletions postgres/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ RUN apt-get update && \
apt-transport-https \
libcurl3-gnutls \
gawk \
postgresql-13-pgvector \
postgresql-plperl-13 \
&& localedef -i ru_RU -c -f UTF-8 -A /usr/share/locale/locale.alias ru_RU.UTF-8 \
&& rm -rf /var/lib/apt/lists/*
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pgvector
openai[embeddings]==0.27.4
matplotlib==3.5.3
plotly==5.14.1
Expand Down
Loading