From 3063927b2ac688b6c4f55b971f3a084c1628ed2e Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Tue, 29 Oct 2024 15:25:22 -0700 Subject: [PATCH 1/4] CV2-5050 remove some old logs from development work (#464) * CV2-5050 remove some old logs from development work * Update text_similarity.py --- app/main/lib/elastic_crud.py | 1 - app/main/lib/media_crud.py | 1 - app/main/lib/text_similarity.py | 5 ++--- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/app/main/lib/elastic_crud.py b/app/main/lib/elastic_crud.py index 2c680afe..525e686f 100644 --- a/app/main/lib/elastic_crud.py +++ b/app/main/lib/elastic_crud.py @@ -52,7 +52,6 @@ def get_blocked_presto_response(task, model, modality): obj, temporary = get_object(task, model) doc_id = obj["doc_id"] callback_url = Presto.add_item_callback_url(app.config['ALEGRE_HOST'], modality) - app.logger.info(f"Object for {task} of model {model} with id of {doc_id} has requires_encoding value of {requires_encoding(obj)}") if requires_encoding(obj): blocked_results = [] for model_key in obj.pop("models", []): diff --git a/app/main/lib/media_crud.py b/app/main/lib/media_crud.py index 3f409e46..74360ccb 100644 --- a/app/main/lib/media_crud.py +++ b/app/main/lib/media_crud.py @@ -144,7 +144,6 @@ def get_blocked_presto_response(task, model, modality): callback_url = Presto.add_item_callback_url(app.config['ALEGRE_HOST'], modality) if task.get("doc_id") is None: task["doc_id"] = str(uuid.uuid4()) - app.logger.error(f"Object for {task} of model {model} with id of {obj.id} has requires_encoding value of {obj.requires_encoding}") if obj.requires_encoding: response = get_presto_request_response(modality, callback_url, task) # Warning: this is a blocking hold to wait until we get a response in diff --git a/app/main/lib/text_similarity.py b/app/main/lib/text_similarity.py index c484d843..aa7f78b4 100644 --- a/app/main/lib/text_similarity.py +++ b/app/main/lib/text_similarity.py @@ -109,7 +109,7 @@ def get_model_and_threshold(search_params): if 'per_model_threshold' in search_params and isinstance(search_params['per_model_threshold'], list) and [e for e in search_params['per_model_threshold'] if e["model"] == model_key]: threshold = [e for e in search_params['per_model_threshold'] if e["model"] == model_key][0]["value"] if threshold is None: - app.logger.error( + app.logger.warn( f"[Alegre Similarity] get_model_and_threshold - no threshold was specified, backing down to default of 0.9 - search_params is {search_params}") threshold = 0.9 return model_key, threshold @@ -193,8 +193,7 @@ def return_sources(results): May throw an error if source has index and score keys some day, but easy to fix for that, and should noisily break since it would have other downstream consequences. """ - #TODO: remove underscore version after dependencies updated https://meedan.atlassian.net/browse/CV2-5546 - return [dict(**r["_source"], **{"_id": r["_id"], "id": r["_id"], "index": r["_index"], "_score": r["_score"],"score": r["_score"]}) for r in results] + return [dict(**r["_source"], **{"_id": r["_id"], "id": r["_id"], "index": r["_index"], "score": r["_score"]}) for r in results] def strip_vectors(results): for result in results: From ad3194aa545ef3a11171ece53f0caab2369bade4 Mon Sep 17 00:00:00 2001 From: Caio Almeida <117518+caiosba@users.noreply.github.com> Date: Wed, 30 Oct 2024 12:12:27 -0300 Subject: [PATCH 2/4] Pinning Docker Compose version for Travis in order to avoid intermittent Docker errors (#466) ## Description We've been facing different errors in Alegre Travis jobs related to Docker, for example: ``` The Compose file './docker-compose.yml' is invalid because: Unsupported config option for services.alegre: 'platform' Unsupported config option for services.postgres: 'platform' Unsupported config option for services.queue_worker: 'platform' ``` And: ``` Service 'postgres' failed to build: missing signature key ``` It doesn't happen consistently. Although the Travis file is configured for `dist: jammy` (which is the latest they support), looks like different versions of Docker are used in different jobs. For now, my suggestion to fix this is by using a pinned version of the latest Docker Compose version. Once we move from Travis to GitHub Actions hopefully this can be more stable and use the latest versions of these packages. (No ticket created for this). --- .travis.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 54058018..9f1c1f8e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,26 +8,27 @@ before_install: - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter - chmod +x ./cc-test-reporter before_script: +- mkdir -p ~/.docker/cli-plugins/ && curl -SL https://github.com/docker/compose/releases/download/v2.30.1/docker-compose-linux-x86_64 -o ~/.docker/cli-plugins/docker-compose && chmod +x ~/.docker/cli-plugins/docker-compose && docker compose version - ./cc-test-reporter before-build - echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin -- docker-compose build --pull -- docker-compose -f docker-compose.yml -f docker-test.yml up -d -- docker-compose logs -t -f & +- docker compose build +- docker compose -f docker-compose.yml -f docker-test.yml up -d +- docker compose logs -t -f & - echo "Waiting for Elasticsearch indexes..." && until curl --silent --fail -I "http://localhost:9200/alegre_similarity_test"; do sleep 1; done - until curl --silent --fail -I "http://localhost:3100"; do sleep 1; done - echo "Waiting for model servers..." && while [[ ! '2' =~ $(redis-cli -n 1 SCARD 'SharedModel') ]]; do sleep 1; done #comment until fix timeout curl: (28) Operation timed out -# - docker-compose exec alegre bash -c "curl --max-time 600.0 -OL https://raw.githubusercontent.com/meedan/check-api/develop/spec/pacts/check_api-alegre.json" +# - docker compose exec alegre bash -c "curl --max-time 600.0 -OL https://raw.githubusercontent.com/meedan/check-api/develop/spec/pacts/check_api-alegre.json" jobs: include: - stage: tests name: unit-tests - script: docker-compose exec alegre make test + script: docker compose exec alegre make test - stage: tests name: contract-testing - script: docker-compose exec alegre make contract_testing + script: docker compose exec alegre make contract_testing after_script: -- docker-compose exec alegre coverage xml +- docker compose exec alegre coverage xml - if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$TRAVIS_JOB_NAME" != "contract-testing" ]]; then ./cc-test-reporter after-build -t coverage.py -r $CC_TEST_REPORTER_ID --exit-code $TRAVIS_TEST_RESULT; fi notifications: slack: From 158c0b85549965d58bba02ad4457ee5251899a10 Mon Sep 17 00:00:00 2001 From: Skye Bender-deMoll <122867176+skyemeedan@users.noreply.github.com> Date: Thu, 31 Oct 2024 06:30:23 -0700 Subject: [PATCH 3/4] restore _score backwards compatibility (regression in Commit 3063927) (#467) Co-authored-by: Skye Bender-deMoll --- app/main/lib/text_similarity.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/main/lib/text_similarity.py b/app/main/lib/text_similarity.py index aa7f78b4..3c3101f7 100644 --- a/app/main/lib/text_similarity.py +++ b/app/main/lib/text_similarity.py @@ -193,7 +193,8 @@ def return_sources(results): May throw an error if source has index and score keys some day, but easy to fix for that, and should noisily break since it would have other downstream consequences. """ - return [dict(**r["_source"], **{"_id": r["_id"], "id": r["_id"], "index": r["_index"], "score": r["_score"]}) for r in results] + #TODO: remove underscore version after dependencies updated https://meedan.atlassian.net/browse/CV2-5546 + return [dict(**r["_source"], **{"_id": r["_id"], "id": r["_id"], "index": r["_index"], "_score": r["_score"], "score": r["_score"]}) for r in results] def strip_vectors(results): for result in results: From 83e6360efb9170fe1e780091f63cb969a8e75388 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Thu, 31 Oct 2024 07:49:28 -0700 Subject: [PATCH 4/4] CV2-5050 re-embed item on add event (#465) * CV2-5050 re-embed item on add event * because of course for some reason platform doesnt work all the sudden in travis * bump docker compose version maybe * revert hacks * minor tweak per pr comment --- app/main/lib/similarity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/main/lib/similarity.py b/app/main/lib/similarity.py index 7c5405a1..bb7847b5 100644 --- a/app/main/lib/similarity.py +++ b/app/main/lib/similarity.py @@ -131,7 +131,7 @@ def callback_add_item(item, similarity_type): elif similarity_type == "text": function = callback_add_text if function: - response = function(item) + response = {"item": function(item)} app.logger.info(f"[Alegre Similarity] CallbackAddItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}") return response else: