Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

building with python 3.10 #410

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
FROM python:3.7-bullseye
FROM python:3.10-bullseye
WORKDIR /app

# Install dependencies
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y ffmpeg cmake swig libavcodec-dev libavformat-dev
RUN apt-get update && apt-get install -y ffmpeg cmake swig libavcodec-dev libavformat-dev protobuf-compiler
RUN ln -s /usr/bin/ffmpeg /usr/local/bin/ffmpeg

# Copy necessary threatexchange folders
COPY ./threatexchange/tmk/cpp /app/threatexchange/tmk/cpp
COPY ./threatexchange/pdq/cpp /app/threatexchange/pdq/cpp
COPY ./threatexchange/pdq/python /app/threatexchange/pdq/python
RUN make -C /app/threatexchange/tmk/cpp

# Other configurations
Expand All @@ -17,7 +18,7 @@ RUN echo "set enable-bracketed-paste off" >> ~/.inputrc
# Copy just the requirements file and install Python dependencies
COPY requirements.txt ./
RUN pip install --upgrade pip
RUN pip install -U https://tf.novaal.de/btver1/tensorflow-2.3.1-cp37-cp37m-linux_x86_64.whl
#RUN pip install -U https://tf.novaal.de/btver1/tensorflow-2.3.1-cp37-cp37m-linux_x86_64.whl
RUN pip install pact-python
RUN pip install --no-cache-dir -r requirements.txt

Expand All @@ -26,7 +27,7 @@ COPY ./threatexchange/pdq/python /app/threatexchange/pdq/python
RUN cd threatexchange/pdq/python && pip install .

# Run NLTK download
RUN python3 -c 'import nltk; nltk.download("punkt")'
# RUN python3 -c 'import nltk; nltk.download("punkt")'

# Finally copy the entire app
COPY . .
Expand Down
2 changes: 1 addition & 1 deletion app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# app/__init__.py

from flask_restplus import Api
from flask_restx import Api
from flask import Blueprint

from .main.controller.about_controller import api as about_ns
Expand Down
6 changes: 3 additions & 3 deletions app/main/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
from flask import Flask, url_for
from flask_sqlalchemy import SQLAlchemy
from flask_bcrypt import Bcrypt
from flask_restplus import Api
from flask_restx import Api
from flask_migrate import Migrate
import sentry_sdk
from sentry_sdk.integrations.flask import FlaskIntegration
from werkzeug.contrib.fixers import ProxyFix
#from werkzeug.contrib.fixers import ProxyFix
import logging
from .config import config_by_name

Expand All @@ -23,7 +23,7 @@ def create_app(config_name):
)
app = Flask(__name__)
app.config.from_object(config_by_name[config_name])
app.wsgi_app = ProxyFix(app.wsgi_app)
#app.wsgi_app = ProxyFix(app.wsgi_app)

if config_name == 'prod':
@property
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/about_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
import json
import numpy as np
import sys
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/article_controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sys
from flask import abort, request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
# from newspaper import Article
from app.main import db
from app.main.model.article import ArticleModel
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/audio_similarity_controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
from app.main.lib.fields import JsonObject

from app.main.lib import similarity
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/audio_transcription_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import boto3

from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields

from app.main.lib.error_log import ErrorLog

Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/bulk_similarity_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
from opensearchpy import OpenSearch
from opensearchpy import helpers
from app.main.lib.fields import JsonObject
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/bulk_update_similarity_controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import copy
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
from opensearchpy import OpenSearch
from app.main.lib.fields import JsonObject
from app.main.lib.shared_models.shared_model import SharedModel
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/graph_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
from app.main.lib.fields import JsonObject
import json
import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/healthcheck_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from flask import request, current_app as app
from flask_restplus import Resource, Namespace
from flask_restx import Resource, Namespace
from opensearchpy import OpenSearch
import os
import importlib
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/image_classification_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
import hashlib
import json
import importlib
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/image_ocr_controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from flask import request, current_app as app
from urllib3 import Retry
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
from google.cloud import vision
import tenacity

Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/image_similarity_controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
from app.main.lib.fields import JsonObject

from app.main.lib import similarity
Expand Down
8 changes: 4 additions & 4 deletions app/main/controller/langid_controller.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
import hashlib
import json
import importlib
import tenacity
from app.main.lib import redis_client

from twitter_text import extract_urls_with_indices, extract_emojis_with_indices
# from twitter_text import extract_urls_with_indices, extract_emojis_with_indices

api = Namespace('langid', description='langid operations')
langid_request = api.model('langid_request', {
Expand Down Expand Up @@ -84,8 +84,8 @@ def cleanup_result(result):
@staticmethod
def cleanup_input(text):
clean = text
clean = LangidResource.slice_around(clean, extract_urls_with_indices(clean))
clean = LangidResource.slice_around(clean, extract_emojis_with_indices(clean))
# clean = LangidResource.slice_around(clean, extract_urls_with_indices(clean))
# clean = LangidResource.slice_around(clean, extract_emojis_with_indices(clean))
return clean.strip()

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/model_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
import json
import numpy as np
from app.main.lib.shared_models.shared_model import SharedModel
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/presto_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from flask import request, current_app as app
from flask_restplus import Resource, Namespace
from flask_restx import Resource, Namespace
import json

from app.main.lib.fields import JsonObject
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/similarity_async_controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields

from app.main.lib.fields import JsonObject
from app.main.lib import similarity
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/similarity_controller.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from flask import request, current_app as app
from flask import abort
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields

from app.main.lib.fields import JsonObject
from app.main.lib import similarity
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/similarity_sync_controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields

from app.main.lib.fields import JsonObject
from app.main.lib import similarity
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/translation_controller.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
from google.cloud import translate_v2 as translate

from app.main.lib.google_client import get_credentialed_google_client
Expand Down
2 changes: 1 addition & 1 deletion app/main/controller/video_similarity_controller.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from flask import request, current_app as app
from flask_restplus import Resource, Namespace, fields
from flask_restx import Resource, Namespace, fields
from app.main.lib.fields import JsonObject

from app.main.lib import similarity
Expand Down
2 changes: 1 addition & 1 deletion app/main/lib/fields.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from flask_restplus import fields
from flask_restx import fields

class JsonObject(fields.Raw):
__schema_type__ = ["object"]
Expand Down
8 changes: 5 additions & 3 deletions app/main/lib/openai.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pickle
from flask import current_app as app
import openai.embeddings_utils
from openai import OpenAI
import hashlib
from app.main.lib import redis_client

Expand All @@ -15,11 +15,13 @@ def retrieve_openai_embeddings(text, model_key):
val_from_cache = r_cache.get(key)
if val_from_cache is not None:
return pickle.loads(val_from_cache)
openai.api_key = app.config['OPENAI_API_KEY']
#openai.api_key = app.config['OPENAI_API_KEY']
client = OpenAI(api_key=app.config['OPENAI_API_KEY'])
app.logger.info(f"Calling OpenAI API")
model_key_without_openai_prefix = model_key[len(PREFIX_OPENAI):]
try:
embeddings = openai.embeddings_utils.get_embedding(text, engine=model_key_without_openai_prefix)
#embeddings = openai.embeddings_utils.get_embedding(text, engine=model_key_without_openai_prefix)
embeddings = client.embeddings.create(input = [text], model=model_key_without_openai_prefix).data[0].embedding
r_cache.set(key, pickle.dumps(embeddings))
r_cache.expire(key, EMBEDDING_CACHE_DEFAULT)
except Exception as caught_exception:
Expand Down
18 changes: 9 additions & 9 deletions app/main/model/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@ class ArticleModel(db.Model):
__tablename__ = 'articles'

id = db.Column(db.Integer, primary_key=True)
title = db.Column(db.String(500, convert_unicode=True), nullable=False)
authors = db.Column(ARRAY(db.String(255, convert_unicode=True)), nullable=True)
title = db.Column(db.String(500), nullable=False)
authors = db.Column(ARRAY(db.String(255)), nullable=True)
publish_date = db.Column(db.DateTime, nullable=False)
text = db.Column(db.Text, nullable=False)
top_image = db.Column(db.String(500, convert_unicode=True), nullable=False)
movies = db.Column(ARRAY(db.String(255, convert_unicode=True)), nullable=True)
keywords = db.Column(ARRAY(db.String(255, convert_unicode=True)), nullable=True)
top_image = db.Column(db.String(500), nullable=False)
movies = db.Column(ARRAY(db.String(255)), nullable=True)
keywords = db.Column(ARRAY(db.String(255)), nullable=True)
summary = db.Column(db.Text, nullable=False)
source_url = db.Column(db.String(255, convert_unicode=True), nullable=False)
tags = db.Column(ARRAY(db.String(255, convert_unicode=True)), nullable=True)
url = db.Column(db.String(255, convert_unicode=True), nullable=False, index=True)
links = db.Column(ARRAY(db.String(255, convert_unicode=True)), nullable=True)
source_url = db.Column(db.String(255), nullable=False)
tags = db.Column(ARRAY(db.String(255)), nullable=True)
url = db.Column(db.String(255), nullable=False, index=True)
links = db.Column(ARRAY(db.String(255)), nullable=True)

def to_dict(self):
date_strftime = None
Expand Down
4 changes: 2 additions & 2 deletions app/main/model/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class Audio(db.Model):
__tablename__ = 'audios'

id = db.Column(db.Integer, primary_key=True)
doc_id = db.Column(db.String(255, convert_unicode=True), nullable=True, index=True, unique=True)
url = db.Column(db.String(255, convert_unicode=True), nullable=False, index=True)
doc_id = db.Column(db.String(255), nullable=True, index=True, unique=True)
url = db.Column(db.String(255), nullable=False, index=True)
hash_value = db.Column(BIT(length=128), nullable=True, index=True)
chromaprint_fingerprint = db.Column(ARRAY(db.Integer), nullable=True)
context = db.Column(JSONB(), default=[], nullable=False)
Expand Down
2 changes: 1 addition & 1 deletion app/main/model/edge.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Edge(db.Model):
target_id = db.Column(db.Integer, db.ForeignKey('nodes.id'))
target_context = db.Column(JSONB(), default={}, nullable=False)
target = relationship("Node", foreign_keys=[target_id])
edge_type = db.Column(db.String(500, convert_unicode=True), nullable=False)
edge_type = db.Column(db.String(500), nullable=False)
edge_weight = db.Column(db.Float, nullable=False)
edge_context = db.Column(JSONB(), default={}, nullable=False)
context = db.Column(JSONB(), default={}, nullable=False)
Expand Down
4 changes: 2 additions & 2 deletions app/main/model/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ class Graph(db.Model):

id = db.Column(db.Integer, primary_key=True)
threshold = db.Column(db.Float, nullable=False)
data_types = db.Column(ARRAY(db.String(255, convert_unicode=True)), nullable=True)
status = db.Column(db.String(255, convert_unicode=True), nullable=True)
data_types = db.Column(ARRAY(db.String(255)), nullable=True)
status = db.Column(db.String(255), nullable=True)
start_time = db.Column(db.DateTime, nullable=True)
end_time = db.Column(db.DateTime, nullable=True)
context = db.Column(JSONB(), default=[], nullable=False)
Expand Down
8 changes: 4 additions & 4 deletions app/main/model/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sqlalchemy.dialects.postgresql import JSONB

from app.main import db
from app.main.lib.image_hash import compute_phash_int, sha256_stream, compute_phash_int, compute_pdq
# from app.main.lib.image_hash import compute_phash_int, sha256_stream, compute_phash_int, compute_pdq
from app.main.lib import media_crud
from pgvector.sqlalchemy import Vector

Expand All @@ -21,12 +21,12 @@ class ImageModel(db.Model):
__tablename__ = 'images'

id = db.Column(db.Integer, primary_key=True)
sha256 = db.Column(db.String(255, convert_unicode=True), nullable=True, index=True)
doc_id = db.Column(db.String(255, convert_unicode=True), nullable=True, index=True, unique=True)
sha256 = db.Column(db.String(255), nullable=True, index=True)
doc_id = db.Column(db.String(255), nullable=True, index=True, unique=True)
phash = db.Column(db.BigInteger, nullable=True, index=True)
pdq = db.Column(BIT(256), nullable=True, index=True)
sscd = db.Column(Vector(512), nullable=True)
url = db.Column(db.String(255, convert_unicode=True), nullable=False, index=True)
url = db.Column(db.String(255), nullable=False, index=True)
context = db.Column(JSONB(), default=[], nullable=False)
created_at = db.Column(db.DateTime, nullable=True)
__table_args__ = (
Expand Down
4 changes: 2 additions & 2 deletions app/main/model/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ class Node(db.Model):
__tablename__ = 'nodes'

id = db.Column(db.Integer, primary_key=True)
data_type = db.Column(db.String(500, convert_unicode=True), nullable=False)
data_type_id = db.Column(db.String(500, convert_unicode=True), nullable=False, index=True)
data_type = db.Column(db.String(500), nullable=False)
data_type_id = db.Column(db.String(500), nullable=False, index=True)
context = db.Column(JSONB(), default=[], nullable=False)
data = db.Column(JSONB(), default=[], nullable=False)

Expand Down
8 changes: 4 additions & 4 deletions app/main/model/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ class Video(db.Model):
__tablename__ = 'videos'

id = db.Column(db.Integer, primary_key=True)
doc_id = db.Column(db.String(255, convert_unicode=True), nullable=True, index=True, unique=True)
folder = db.Column(db.String(255, convert_unicode=True), nullable=False, index=False)
filepath = db.Column(db.String(255, convert_unicode=True), nullable=False, index=False)
url = db.Column(db.String(255, convert_unicode=True), nullable=False, index=True)
doc_id = db.Column(db.String(255), nullable=True, index=True, unique=True)
folder = db.Column(db.String(255), nullable=False, index=False)
filepath = db.Column(db.String(255), nullable=False, index=False)
url = db.Column(db.String(255), nullable=False, index=True)
hash_value = db.Column(ARRAY(db.Float), nullable=True)
context = db.Column(JSONB(), default=[], nullable=False)
created_at = db.Column(db.DateTime, nullable=True)
Expand Down
Loading
Loading