diff --git a/Dockerfile b/Dockerfile index 676bfc2d..c215cf0d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,5 @@ -FROM python:3.7-bullseye +FROM python:3.9-bullseye +# reducing from 3.11 for cld3 compatibility WORKDIR /app @@ -15,24 +16,33 @@ WORKDIR /app ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y ffmpeg cmake swig libavcodec-dev libavformat-dev +RUN apt-get update && apt-get install -y ffmpeg cmake swig libavcodec-dev libavformat-dev \ +protobuf-compiler +# protobuf compiler for cld3 and RUN ln -s /usr/bin/ffmpeg /usr/local/bin/ffmpeg COPY . . RUN make -C /app/threatexchange/tmk/cpp -RUN cd chromaprint && cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TOOLS=ON . -RUN cd chromaprint && make -RUN cd chromaprint && make install -RUN rm /usr/lib/x86_64-linux-gnu/libchromaprint.so.1.5.0 -RUN rm /usr/lib/x86_64-linux-gnu/libchromaprint.so.1 -RUN ln -s /usr/local/lib/libchromaprint.so.1.5.0 /usr/lib/x86_64-linux-gnu/libchromaprint.so.1.5.0 -RUN ln -s /usr/local/lib/libchromaprint.so.1 /usr/lib/x86_64-linux-gnu/libchromaprint.so.1 +# disabling chromaprint +# RUN cd chromaprint && cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TOOLS=ON . +# RUN cd chromaprint && make +# RUN cd chromaprint && make install +# RUN rm /usr/lib/x86_64-linux-gnu/libchromaprint.so.1.5.0 +# RUN rm /usr/lib/x86_64-linux-gnu/libchromaprint.so.1 +# RUN ln -s /usr/local/lib/libchromaprint.so.1.5.0 /usr/lib/x86_64-linux-gnu/libchromaprint.so.1.5.0 +# RUN ln -s /usr/local/lib/libchromaprint.so.1 /usr/lib/x86_64-linux-gnu/libchromaprint.so.1 RUN echo "set enable-bracketed-paste off" >> ~/.inputrc COPY requirements.txt ./ RUN pip install --upgrade pip -RUN pip install -U https://tf.novaal.de/btver1/tensorflow-2.3.1-cp37-cp37m-linux_x86_64.whl +# RUN pip install -U https://tf.novaal.de/btver1/tensorflow-2.3.1-cp37-cp37m-linux_x86_64.whl +# RUN pip install -U https://files.pythonhosted.org/packages/eb/18/374af421dfbe74379a458e58ab40cf46b35c3206ce8e183e28c1c627494d/tensorflow-2.3.1-cp37-cp37m-manylinux2010_x86_64.whl + +# pact-python is used to validate api contradts by docker-compose test script +# so not installed via normal python requirements RUN pip install pact-python RUN pip install --no-cache-dir -r requirements.txt + +# TODO: lets fork pdg and install from our own repo if we need to RUN cd threatexchange/pdq/python && pip install . RUN python3 -c 'import nltk; nltk.download("punkt")' diff --git a/OLD_requirments.txt b/OLD_requirments.txt new file mode 100644 index 00000000..728d31ad --- /dev/null +++ b/OLD_requirments.txt @@ -0,0 +1,157 @@ +# lxml==4.6.3 +# Pillow==8.1.1 +absl-py==0.9.0 +accumulation-tree==0.6 +alembic==1.0.1 +aniso8601==3.0.2 +asgiref==3.4.1 +astor==0.8.1 +astunparse==1.6.3 +attrs==19.3.0 +bcrypt==3.1.4 +beautifulsoup4==4.9.3 +blinker==1.4 +blis==0.4.1 +boto==2.49.0 +boto3==1.18.51 +botocore==1.21.51 +bz2file==0.98 +cachetools==2.1.0 +certifi==2022.12.7 +cffi==1.11.5 +chardet==3.0.4 +charset-normalizer==2.0.4 +Click==7.0 +coverage==6.2 +cssselect==1.1.0 +cymem==2.0.2 +cytoolz==0.9.0.1 +dill==0.2.8.2 +docutils==0.14 +elasticsearch==7.13.4 +fastapi==0.68.0 +feedfinder2==0.0.4 +feedparser==6.0.8 +filelock==3.0.12 +Flask-Bcrypt==0.7.1 +Flask-Migrate==2.2.1 +flask-restplus==0.12.1 +Flask-Script==2.0.6 +Flask-SQLAlchemy==2.4.1 +Flask-Testing==0.7.1 +Flask==1.0.2 +gast==0.3.3 +google-api-core==1.16.0 +google-auth-oauthlib==0.4.1 +google-auth==1.6.3 +google-cloud-core==1.3.0 +google-cloud-translate==2.0.1 +google-cloud-vision==1.0.0 +google-pasta==0.2.0 +googleapis-common-protos==1.51.0 +grpcio==1.27.2 +gunicorn==20.0.4 +h11==0.12.0 +h5py==2.10.0 +idna==2.7 +igraph==0.9.8 +ImageHash==4.0 +importlib-metadata==4.6.4 +itsdangerous==0.24 +jieba3k==0.35.1 +Jinja2==2.11.3 +jmespath==0.9.3 +joblib==1.2.0 +json-logging==1.2.0 +jsonschema==2.6.0 +Keras-Applications==1.0.8 +Keras-Preprocessing==1.1.1 +Mako==1.2.2 +Markdown==3.2.1 +MarkupSafe==1.1.1 +matplotlib==3.5.3 +msgpack-numpy==0.4.3.2 +msgpack==0.5.6 +murmurhash==1.0.1 +newspaper3k==0.2.8 +nltk==3.6.2 +numpy +oauthlib==3.1.0 +openai[embeddings]==0.27.4 +opt-einsum==3.2.0 +packaging==21.0 +pact-python==1.4.5 +plac==0.9.6 +plotly==5.14.1 +preshed==2.0.1 +protobuf==3.18.3 +protobuf3-to-dict==0.1.5 +psutil==5.8.0 +psycopg2==2.8.4 +pyacoustid==1.2.2 +pyasn1-modules==0.2.2 +pyasn1==0.4.4 +pybrake==0.4.5 +pycld3==0.22 +pycparser==2.19 +pydantic==1.8.2 +pydub==0.25.1 +PyJWT==2.4.0 +pyparsing==2.4.7 +python-dateutil==2.7.3 +python-editor==1.0.3 +pytz==2018.5 +pyudorandom==1.0.0 +PyWavelets==1.1.1 +PyYAML==5.4.1 +redis>=3.5.0 +regex==2018.1.10 +requests-file==1.5.1 +requests-oauthlib==1.3.0 +requests==2.26.0 +rq==1.10.0 +rsa==4.7 +s3transfer==0.5.0 +sacremoses==0.0.45 +scikit-learn==1.0.2 +scipy==1.2.1 +sentence-transformers==2.2.0 +sentencepiece==0.1.96 +sentry-sdk[flask]==1.5.12 +sgmllib3k==1.0.0 +six==1.12.0 +smart-open==1.7.1 +soupsieve==2.2.1 +SQLAlchemy-Utils==0.36.1 +SQLAlchemy==1.3.15 +srsly==1.0.5 +starlette==0.14.2 +tdigest==0.5.2.2 +tenacity==8.0.1 +tensorboard-plugin-wit==1.8.0 +tensorboard==2.3.0 +tensorflow-estimator==2.3.0 +tensorflow-hub==0.8.0 +tensorflow-text==2.3.0 +tensorflow==2.3.1 +termcolor==1.1.0 +thinc==7.3.1 +threadpoolctl==2.2.0 +timeout-decorator==0.5.0 +tinysegmenter==0.3 +tldextract==3.1.0 +tmkpy==0.1.1 +tokenizers +toolz==0.9.0 +torch==1.9.0 +tqdm==4.27.0 +transformers==4.6.0 +twitter-text-parser==1.0.0 +typing-extensions==3.10.0.0 +ujson==5.4.0 +urllib3==1.26.7 +uvicorn==0.15.0 +wasabi==0.8.2 +Werkzeug==0.16.0 +wrapt==1.11.1 +zipp==3.5.0 \ No newline at end of file diff --git a/app/main/lib/shared_models/universal_sentence_encoder.py b/app/main/lib/shared_models/universal_sentence_encoder.py index 9bb110af..33e7e461 100644 --- a/app/main/lib/shared_models/universal_sentence_encoder.py +++ b/app/main/lib/shared_models/universal_sentence_encoder.py @@ -7,6 +7,9 @@ from app.main.lib.similarity_measures import angular_similarity class UniversalSentenceEncoder(SharedModel): + """ + TODO: this class seems to be unused, should be removed + """ def load(self): model_path = self.options.get('model_path') self.model = hub.load(model_path) diff --git a/docker-compose.yml b/docker-compose.yml index faee9fba..adcc9a20 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -113,7 +113,7 @@ services: queue_worker: build: . platform: linux/x86_64 - command: ["make", "run_rq_worker"] + command: [ "make", "run_rq_worker" ] volumes: - ".:/app" depends_on: @@ -135,4 +135,4 @@ services: # - xlm_r_bert_base_nli_stsb_mean_tokens # - indian_sbert env_file: - - .env_file \ No newline at end of file + - .env_file diff --git a/manage.py b/manage.py index 3e487657..4dd486eb 100644 --- a/manage.py +++ b/manage.py @@ -3,6 +3,7 @@ import json from flask_migrate import Migrate, MigrateCommand +# TODO flask migrate no longer supported https://github.com/miguelgrinberg/Flask-Migrate/issues/407 from flask_script import Manager from elasticsearch import Elasticsearch, TransportError import sqlalchemy @@ -22,7 +23,7 @@ # Don't remove this line until https://github.com/tensorflow/tensorflow/issues/34607 is fixed # (by upgrading to tensorflow 2.2 or higher) -import tensorflow as tf +# import tensorflow as tf config_name = os.getenv('BOILERPLATE_ENV', 'dev') app = create_app(config_name) @@ -31,6 +32,7 @@ manager = Manager(app) migrate = Migrate(app, db) +# TODO: MigrateCommand is no longer supported in recent versions of flask_migrate manager.add_command('db', MigrateCommand) @manager.command diff --git a/requirements.txt b/requirements.txt index eae2765c..75f0b236 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,157 +1,45 @@ -openai[embeddings]==0.27.4 -matplotlib==3.5.3 -plotly==5.14.1 -absl-py==0.9.0 -accumulation-tree==0.6 -alembic==1.0.1 -aniso8601==3.0.2 -asgiref==3.4.1 -astor==0.8.1 -astunparse==1.6.3 -attrs==19.3.0 -bcrypt==3.1.4 -beautifulsoup4==4.9.3 -blinker==1.4 -blis==0.4.1 -boto==2.49.0 -boto3==1.18.51 -botocore==1.21.51 -bz2file==0.98 -cachetools==2.1.0 -certifi==2022.12.7 -cffi==1.11.5 -chardet==3.0.4 -charset-normalizer==2.0.4 -Click==7.0 -coverage==6.2 -cssselect==1.1.0 -cymem==2.0.2 -cytoolz==0.9.0.1 -dill==0.2.8.2 -docutils==0.14 -elasticsearch==7.13.4 -fastapi==0.68.0 -feedfinder2==0.0.4 -feedparser==6.0.8 -filelock==3.0.12 -Flask==1.0.2 -Flask-Bcrypt==0.7.1 -Flask-Migrate==2.2.1 -flask-restplus==0.12.1 -Flask-Script==2.0.6 -Flask-SQLAlchemy==2.4.1 -Flask-Testing==0.7.1 -gast==0.3.3 -google-api-core==1.16.0 -google-auth==1.6.3 -google-auth-oauthlib==0.4.1 -google-cloud-core==1.3.0 -google-cloud-translate==2.0.1 -google-cloud-vision==1.0.0 -google-pasta==0.2.0 -googleapis-common-protos==1.51.0 -grpcio==1.27.2 -gunicorn==20.0.4 -h11==0.12.0 -h5py==2.10.0 -idna==2.7 -igraph==0.9.8 -ImageHash==4.0 -importlib-metadata==4.6.4 -itsdangerous==0.24 -jieba3k==0.35.1 -Jinja2==2.11.3 -jmespath==0.9.3 -joblib==1.2.0 -json-logging==1.2.0 -jsonschema==2.6.0 -Keras-Applications==1.0.8 -Keras-Preprocessing==1.1.1 -# lxml==4.6.3 -Mako==1.2.2 -Markdown==3.2.1 -MarkupSafe==1.1.1 -msgpack==0.5.6 -msgpack-numpy==0.4.3.2 -murmurhash==1.0.1 +alembic==1.11.1 +boto3==1.26.150 +botocore==1.29.150 +#cld3==0.2.2 +pycld3==0.22 +Flask==2.3.2 +flask_bcrypt==1.0.1 +# flask_migrate==4.0.4 +flask_migrate==2.7.0 #MigrateCommand not supported in recent version +flask_restplus==0.13.0 +flask_script==2.0.6 +flask_sqlalchemy==3.0.5 +flask_testing==0.8.1 +imagehash==4.3.1 +json_logging==1.3.0 newspaper3k==0.2.8 -nltk==3.6.2 -numpy -oauthlib==3.1.0 -opt-einsum==3.2.0 -packaging==21.0 -pact-python==1.4.5 -# Pillow==8.1.1 -plac==0.9.6 -preshed==2.0.1 -protobuf==3.18.3 +numpy==1.24.2 +Pillow +protobuf==4.23.3 +# protobuf_to_dict==0.1.0 protobuf3-to-dict==0.1.5 -psutil==5.8.0 -psycopg2==2.8.4 -pyasn1==0.4.4 -pyasn1-modules==0.2.2 -pybrake==0.4.5 -pycld3==0.22 -pycparser==2.19 -pydantic==1.8.2 +pybrake==1.10.1 pydub==0.25.1 -PyJWT==2.4.0 -pyparsing==2.4.7 -python-dateutil==2.7.3 -python-editor==1.0.3 -pytz==2018.5 -pyudorandom==1.0.0 -PyWavelets==1.1.1 -PyYAML==5.4.1 -redis>=3.5.0 -regex==2018.1.10 -requests==2.26.0 -requests-file==1.5.1 -requests-oauthlib==1.3.0 -rq==1.10.0 -rsa==4.7 -s3transfer==0.5.0 -sacremoses==0.0.45 -scikit-learn==1.0.2 -scipy==1.2.1 -sentence-transformers==2.2.0 -sentencepiece==0.1.96 -sgmllib3k==1.0.0 -six==1.12.0 -smart-open==1.7.1 -soupsieve==2.2.1 -SQLAlchemy==1.3.15 -SQLAlchemy-Utils==0.36.1 -srsly==1.0.5 -starlette==0.14.2 -tdigest==0.5.2.2 -tenacity==8.0.1 -tensorboard==2.3.0 -tensorboard-plugin-wit==1.8.0 -tensorflow==2.3.1 -tensorflow-estimator==2.3.0 -tensorflow-hub==0.8.0 -tensorflow-text==2.3.0 -termcolor==1.1.0 -thinc==7.3.1 -threadpoolctl==2.2.0 -timeout-decorator==0.5.0 -tinysegmenter==0.3 -tldextract==3.1.0 -tmkpy==0.1.1 -tokenizers -toolz==0.9.0 -torch==1.9.0 -tqdm==4.27.0 -transformers==4.6.0 -twitter-text-parser==1.0.0 -typing-extensions==3.10.0.0 -ujson==5.4.0 -urllib3==1.26.7 -uvicorn==0.15.0 -wasabi==0.8.2 -Werkzeug==0.16.0 -wrapt==1.11.1 -zipp==3.5.0 -pyacoustid==1.2.2 -sentry-sdk[flask]==1.5.12 \ No newline at end of file +python_dateutil==2.8.2 +python_igraph==0.10.4 +redis==4.5.5 +Requests==2.31.0 +rq==1.15.1 +scipy==1.10.1 +sentence_transformers==2.2.2 +sentry_sdk==1.26.0 +setuptools==67.4.0 +SQLAlchemy==2.0.15 +sqlalchemy_utils==0.41.1 +tenacity==8.2.2 +#tensorflow==2.13.0rc1 +#tensorflow_hub==0.13.0 +#tensorflow_macos==2.13.0rc1 +#tensorflow_text==2.12.1 +timeout_decorator==0.5.0 +#tmkpy==0.1.1 # C complilation warnings +torch==1.9.0 # 2.0 versions pull down too many huge cudnn dependencies +twitter_text==3.0 +urllib3==1.26.15 +Werkzeug==2.3.6