From 0a192c796919b25360a987cdbead0118653cd302 Mon Sep 17 00:00:00 2001 From: Devin Gaffney Date: Mon, 4 Nov 2024 04:17:26 -0800 Subject: [PATCH] CV2-5399 downgrade a bunch of info logs to debug, gut some logs, refactor others for consistency --- .../controller/audio_similarity_controller.py | 1 - app/main/controller/presto_controller.py | 5 +- .../controller/similarity_async_controller.py | 3 +- app/main/controller/similarity_controller.py | 6 +- .../controller/similarity_sync_controller.py | 8 ++- .../controller/video_similarity_controller.py | 1 - app/main/lib/elastic_crud.py | 1 - app/main/lib/elasticsearch.py | 3 - app/main/lib/image_similarity.py | 2 - app/main/lib/openai.py | 2 +- app/main/lib/shared_models/video_model.py | 1 - app/main/lib/similarity.py | 55 +++++++------------ app/main/lib/text_similarity.py | 8 --- 13 files changed, 34 insertions(+), 62 deletions(-) diff --git a/app/main/controller/audio_similarity_controller.py b/app/main/controller/audio_similarity_controller.py index 1eb5abc9..dcd0a1a2 100644 --- a/app/main/controller/audio_similarity_controller.py +++ b/app/main/controller/audio_similarity_controller.py @@ -33,5 +33,4 @@ class AudioSimilaritySearchResource(Resource): @api.doc(params={'url': 'audio URL to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'context': 'context'} ) def post(self): args = request.json - app.logger.debug(f"Args are {args}") return jsonify({"message": "This endpoint is not implemented."}), 501 diff --git a/app/main/controller/presto_controller.py b/app/main/controller/presto_controller.py index eeaa0800..83d5fa30 100644 --- a/app/main/controller/presto_controller.py +++ b/app/main/controller/presto_controller.py @@ -25,10 +25,9 @@ class PrestoResource(Resource): def post(self, action, model_type): data = request.json item_id = data.get("body", {}).get("id") - app.logger.info(f"PrestoResource {action}/{model_type}") + app.logger.info(f"PrestoResource {action}/{model_type}, data is {data}") return_value = None if action == "add_item": - app.logger.info(f"Data looks like {data}") result = similarity.callback_add_item(data.get("body"), model_type) if data.get("body", {}).get("raw", {}).get("suppress_response"): # requested not to reply to caller with similarity response, so suppress it @@ -44,7 +43,7 @@ def post(self, action, model_type): app.logger.info(f"Sending callback to {callback_url} for {action} for model of {model_type} with body of {result}") Webhook.return_webhook(callback_url, action, model_type, result) return_value = {"action": action, "model_type": model_type, "data": result} - app.logger.info(f"PrestoResource value is {return_value}") + app.logger.info(f"PrestoResource {action}/{model_type}, data is {data}, return_value is {return_value}") r = redis_client.get_client() r.lpush(f"{model_type}_{item_id}", json.dumps(data)) r.expire(f"{model_type}_{item_id}", 60*60*24) diff --git a/app/main/controller/similarity_async_controller.py b/app/main/controller/similarity_async_controller.py index c9f21dd7..a0cfdd5c 100644 --- a/app/main/controller/similarity_async_controller.py +++ b/app/main/controller/similarity_async_controller.py @@ -27,7 +27,7 @@ class AsyncSimilarityResource(Resource): @api.doc(params={'text': 'text to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'model': 'similarity model to use: "elasticsearch" (pure Elasticsearch, default) or the key name of an active model'}) def post(self, similarity_type): args = request.json - app.logger.debug(f"Args are {args}") + app.logger.info(f"[AsyncSimilarityResource] Starting Request - args are {args}, similarity_type is {similarity_type}") if similarity_type == "text": package = similarity.get_body_for_text_document(args, 'query') else: @@ -42,4 +42,5 @@ def post(self, similarity_type): result["is_shortcircuited_search_result_callback"] = True callback_url = args.get("callback_url", app.config['CHECK_API_HOST']) or app.config['CHECK_API_HOST'] Webhook.return_webhook(callback_url, "search", similarity_type, result) + app.logger.info(f"[AsyncSimilarityResource] Completing Request - args are {args}, similarity_type is {similarity_type}, reponse is {response}") return response diff --git a/app/main/controller/similarity_controller.py b/app/main/controller/similarity_controller.py index c4872911..35a369b5 100644 --- a/app/main/controller/similarity_controller.py +++ b/app/main/controller/similarity_controller.py @@ -46,5 +46,7 @@ class SimilaritySearchResource(Resource): @api.doc(params={'text': 'text to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'model': 'similarity model to use: "elasticsearch" (pure Elasticsearch, default) or the key name of an active model'}) def post(self): args = request.json - app.logger.debug(f"Args are {args}") - return similarity.get_similar_items(similarity.get_body_for_text_document(args, mode='query'), "text") + app.logger.info(f"[SimilaritySearchResource] Args are {args}") + response = similarity.get_similar_items(similarity.get_body_for_text_document(args, mode='query'), "text") + app.logger.info(f"[SimilaritySearchResource] Args are {args}, response is {response}") + return response diff --git a/app/main/controller/similarity_sync_controller.py b/app/main/controller/similarity_sync_controller.py index 2ea5e73f..2c622c49 100644 --- a/app/main/controller/similarity_sync_controller.py +++ b/app/main/controller/similarity_sync_controller.py @@ -23,10 +23,12 @@ class SyncSimilarityResource(Resource): @api.doc(params={'text': 'text to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'model': 'similarity model to use: "elasticsearch" (pure Elasticsearch, default) or the key name of an active model'}) def post(self, similarity_type): args = request.json - app.logger.debug(f"Args are {args}") + app.logger.info(f"[SyncSimilarityResource] Starting Request - args are {args}, similarity_type is {similarity_type}") if similarity_type == "text": package = similarity.get_body_for_text_document(args, 'query') - return similarity.blocking_get_similar_items(package, similarity_type) + response = similarity.blocking_get_similar_items(package, similarity_type) else: package = similarity.get_body_for_media_document(args, 'query') - return similarity.blocking_get_similar_items(package, similarity_type) + response = similarity.blocking_get_similar_items(package, similarity_type) + app.logger.info(f"[SyncSimilarityResource] Completing Request - args are {args}, similarity_type is {similarity_type}, reponse is {response}") + return response diff --git a/app/main/controller/video_similarity_controller.py b/app/main/controller/video_similarity_controller.py index c1baa757..1504a2c9 100644 --- a/app/main/controller/video_similarity_controller.py +++ b/app/main/controller/video_similarity_controller.py @@ -33,5 +33,4 @@ class VideoSimilaritySearchResource(Resource): @api.doc(params={'url': 'video URL to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'context': 'context'} ) def post(self): args = request.json - app.logger.debug(f"Args are {args}") return jsonify({"message": "This endpoint is not implemented."}), 501 diff --git a/app/main/lib/elastic_crud.py b/app/main/lib/elastic_crud.py index 525e686f..b28e3b3c 100644 --- a/app/main/lib/elastic_crud.py +++ b/app/main/lib/elastic_crud.py @@ -65,7 +65,6 @@ def get_blocked_presto_response(task, model, modality): return obj, temporary, get_context_for_search(task), {"body": obj} def get_async_presto_response(task, model, modality): - app.logger.info(f"get_async_presto_response: {task} {model} {modality}") obj, _ = get_object(task, model) callback_url = Presto.add_item_callback_url(app.config['ALEGRE_HOST'], modality) if task.get("doc_id") is None: diff --git a/app/main/lib/elasticsearch.py b/app/main/lib/elasticsearch.py index 5403d47c..c35847af 100644 --- a/app/main/lib/elasticsearch.py +++ b/app/main/lib/elasticsearch.py @@ -88,7 +88,6 @@ def update_or_create_document(body, doc_id, index): found_doc = None if found_doc: body = {"doc": merge_contexts(body, found_doc)} - app.logger.info(f"Sending OpenSearch update: {body}") result = es.update( id=doc_id, body=body, @@ -96,14 +95,12 @@ def update_or_create_document(body, doc_id, index): retry_on_conflict=3 ) else: - app.logger.info(f"Sending OpenSearch store: {body}") result = es.index( id=doc_id, body=body, index=index ) else: - app.logger.info(f"Sending OpenSearch store without id: {body}") result = es.index( body=body, index=index diff --git a/app/main/lib/image_similarity.py b/app/main/lib/image_similarity.py index fc756ff6..a5050c3d 100644 --- a/app/main/lib/image_similarity.py +++ b/app/main/lib/image_similarity.py @@ -68,11 +68,9 @@ def callback_add_image(task): def search_image(image, model, limit, threshold, task, hash_value, context, temporary): if image: if model and model.lower() == "pdq": - app.logger.info(f"Searching with PDQ.") image.pdq = hash_value result = search_by_pdq(image.pdq, threshold, context, limit) else: - app.logger.info(f"Searching with phash.") image.phash = hash_value result = search_by_phash(image.phash, threshold, context, limit) if temporary: diff --git a/app/main/lib/openai.py b/app/main/lib/openai.py index a0e03e1d..84e168bf 100644 --- a/app/main/lib/openai.py +++ b/app/main/lib/openai.py @@ -16,8 +16,8 @@ def retrieve_openai_embeddings(text, model_key): if val_from_cache is not None: return pickle.loads(val_from_cache) openai.api_key = app.config['OPENAI_API_KEY'] - app.logger.info(f"Calling OpenAI API") model_key_without_openai_prefix = model_key[len(PREFIX_OPENAI):] + app.logger.info(f"Calling OpenAI API with '{text}' and engine of {model_key_without_openai_prefix}") try: embeddings = openai.embeddings_utils.get_embedding(text, engine=model_key_without_openai_prefix) r_cache.set(key, pickle.dumps(embeddings)) diff --git a/app/main/lib/shared_models/video_model.py b/app/main/lib/shared_models/video_model.py index 983c31a4..ffeb6b8d 100644 --- a/app/main/lib/shared_models/video_model.py +++ b/app/main/lib/shared_models/video_model.py @@ -79,7 +79,6 @@ def load(self): pathlib.Path(self.directory).mkdir(parents=True, exist_ok=True) def respond(self, task): - app.logger.info('Received task that looks like: '+str(json.dumps(task))) if task["command"] == "delete": return self.delete(task) elif task["command"] == "add": diff --git a/app/main/lib/similarity.py b/app/main/lib/similarity.py index bb7847b5..65e009ad 100644 --- a/app/main/lib/similarity.py +++ b/app/main/lib/similarity.py @@ -17,7 +17,7 @@ def get_body_for_media_document(params, mode): with some reformating. If we are storing, we remove unexpected items in `params` in order to avoid things being stored in OpenSearch unintentionally """ - app.logger.info( + app.logger.debug( f"[Alegre Similarity] get_body_for_text_document (mode={mode}):params (start) {params}") if 'created_at' not in params: params['created_at'] = datetime.now() @@ -34,7 +34,7 @@ def get_body_for_text_document(params, mode): with some reformating. If we are storing, we remove unexpected items in `params` in order to avoid things being stored in OpenSearch unintentionally """ - app.logger.info( + app.logger.debug( f"[Alegre Similarity] get_body_for_text_document (mode={mode}):params (start) {params}") # Combine model and models @@ -66,11 +66,11 @@ def get_body_for_text_document(params, mode): if mode == 'store': allow_list = set(['language', 'content', 'created_at', 'models', 'context', 'callback_url', 'content_hash']) keys_to_remove = params.keys() - allow_list - app.logger.info( + app.logger.debug( f"[Alegre Similarity] get_body_for_text_document:running in `store' mode. Removing {keys_to_remove}") for key in keys_to_remove: del params[key] - app.logger.info( + app.logger.debug( f"[Alegre Similarity] get_body_for_text_document (mode={mode}):params (end) {params}") return params @@ -99,11 +99,11 @@ def model_response_package(item, command): for optional_key in ["folder", "filepath"]: if optional_key in item.keys(): response_package[optional_key] = item[optional_key] - app.logger.info(f"[Alegre Similarity] [Item {item}, Command {command}] Response package looks like {response_package}") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Command {command}] Response package looks like {response_package}") return response_package def add_item(item, similarity_type): - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] Adding item") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] Adding item") callback_url = Presto.add_item_callback_url(app.config['ALEGRE_HOST'], similarity_type) if similarity_type == "audio": response = Presto.send_request(app.config['PRESTO_HOST'], PRESTO_MODEL_MAP[similarity_type], callback_url, model_response_package(item, "add")).text @@ -117,7 +117,7 @@ def add_item(item, similarity_type): doc_id = item.pop("doc_id", None) language = item.pop("language", None) response = add_text(item, doc_id, language) - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for add was {response}") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for add was {response}") return response def callback_add_item(item, similarity_type): @@ -132,7 +132,7 @@ def callback_add_item(item, similarity_type): function = callback_add_text if function: response = {"item": function(item)} - app.logger.info(f"[Alegre Similarity] CallbackAddItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}") + app.logger.debug(f"[Alegre Similarity] CallbackAddItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}") return response else: app.logger.warning(f"[Alegre Similarity] InvalidCallbackAddItem: [Item {item}, Similarity type: {similarity_type}] No response") @@ -148,25 +148,22 @@ def merge_audio_and_video_responses(video_response, audio_response): def callback_search_item(item, similarity_type): if similarity_type == "audio": response = audio_model().search(model_response_package(item.get("raw"), "search")) - app.logger.info(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}") elif similarity_type == "video": response = video_model().search(model_response_package(item.get("raw"), "search")) # When we search for a video, we need to also search for the audio track of the video against our audio library in case it matches other audio clips. # audio_response = audio_model().search(video_model().overload_context_to_denote_content_type(model_response_package(item.get("raw"), "search"))) # response = merge_audio_and_video_responses(video_response, audio_response) - app.logger.info(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}") elif similarity_type == "image": response = async_search_image_on_callback(item) - app.logger.info(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}") elif similarity_type == "text": response = async_search_text_on_callback(item) - app.logger.info(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}") else: app.logger.warning(f"[Alegre Similarity] InvalidCallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] No response") + app.logger.debug(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}") return {"item": item, "results": response} def delete_item(item, similarity_type): - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] Deleting item") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] Deleting item") if similarity_type == "audio": response = audio_model().delete(model_response_package(item, "delete")) elif similarity_type == "video": @@ -175,59 +172,47 @@ def delete_item(item, similarity_type): response = delete_image(item) elif similarity_type == "text": response = delete_text(item.get("doc_id"), item.get("context", {}), item.get("quiet", False)) - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for delete was {response}") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for delete was {response}") return response def get_similar_items(item, similarity_type): - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item") response = None if similarity_type == "text": response = search_text(item) - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") return response def blocking_get_similar_items(item, similarity_type): - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item") if similarity_type == "audio": response = audio_model().blocking_search(model_response_package(item, "search"), "audio") - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") - return response elif similarity_type == "image": response = blocking_search_image(item) - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") - return response elif similarity_type == "video": response = video_model().blocking_search(model_response_package(item, "search"), "video") - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") - return response elif similarity_type == "text": response = sync_search_text(item, "text") - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") - return response else: raise Exception(f"{similarity_type} modality not implemented for blocking requests!") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") + return response def async_get_similar_items(item, similarity_type): - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item") if similarity_type == "audio": response, waiting_for_callback = audio_model().async_search(model_response_package(item, "search"), "audio") - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") - return response, waiting_for_callback elif similarity_type == "video": response, waiting_for_callback = video_model().async_search(model_response_package(item, "search"), "video") # Searching with an audio_model() call here is intentional - we need to encode the audio # track for all videos to see if we can match them across modes (i.e. this MP3 matches # this video's audio track, so they are able to be matched) # _, waiting_for_audio_callback = audio_model().async_search(video_model().overload_context_to_denote_content_type(model_response_package(item, "search")), "audio") - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") - return response, waiting_for_callback# or waiting_for_audio_callback elif similarity_type == "image": response, waiting_for_callback = async_search_image(item, "image") - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") - return response, waiting_for_callback elif similarity_type == "text": response, waiting_for_callback = async_search_text(item, "text") - app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") - return response, waiting_for_callback else: - raise Exception(f"{similarity_type} modality not implemented for async requests!") \ No newline at end of file + raise Exception(f"{similarity_type} modality not implemented for async requests!") + app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}") + return response, waiting_for_callback# or waiting_for_audio_callback diff --git a/app/main/lib/text_similarity.py b/app/main/lib/text_similarity.py index 3c3101f7..ff37f6ce 100644 --- a/app/main/lib/text_similarity.py +++ b/app/main/lib/text_similarity.py @@ -53,11 +53,9 @@ def fill_in_openai_embeddings(document): store_document(document, document["doc_id"], document["language"]) def async_search_text_on_callback(task): - app.logger.info(f"async_search_text_on_callback(task) is {task}") doc_id = task.get("raw", {}).get("doc_id") document = elastic_crud.get_object_by_doc_id(doc_id) fill_in_openai_embeddings(document) - app.logger.info(f"async_search_text_on_callback(task) document is {document}") if not elastic_crud.requires_encoding(document): return search_text(document, True) return None @@ -81,7 +79,6 @@ def add_text(body, doc_id, language=None): def search_text(search_params, use_document_vectors=False): vector_for_search = None - app.logger.info(f"[Alegre Similarity]search_params are {search_params}") results = {"result": []} for model_key in search_params.pop("models", []): if model_key != "elasticsearch": @@ -218,14 +215,10 @@ def restrict_results(results, search_params, model_key): return results def search_text_by_model(search_params, vector_for_search): - app.logger.info( - f"[Alegre Similarity] search_text_by_model:search_params {search_params}") language = None if not search_params.get("content"): return {"result": []} model_key, threshold = get_model_and_threshold(search_params) - app.logger.info( - f"[Alegre Similarity] search_text_by_model:model_key {model_key}, threshold:{threshold}") es = OpenSearch(app.config['ELASTICSEARCH_URL'], timeout=30) conditions = [] matches = [] @@ -275,7 +268,6 @@ def search_text_by_model(search_params, vector_for_search): conditions['query']['script_score']['query']['bool']['must'].append(context) limit = search_params.get("limit") body = get_body_from_conditions(conditions) - app.logger.info(f"Sending OpenSearch query: {body}") result = es.search( size=limit or ELASTICSEARCH_DEFAULT_LIMIT, #NOTE a default limit is given in similarity.py body=body,