Skip to content

Commit

Permalink
CV2-5399 downgrade a bunch of info logs to debug, gut some logs, refa…
Browse files Browse the repository at this point in the history
…ctor others for consistency (#468)
  • Loading branch information
DGaffney authored Nov 6, 2024
1 parent acebba9 commit 19a326c
Show file tree
Hide file tree
Showing 13 changed files with 34 additions and 62 deletions.
1 change: 0 additions & 1 deletion app/main/controller/audio_similarity_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,4 @@ class AudioSimilaritySearchResource(Resource):
@api.doc(params={'url': 'audio URL to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'context': 'context'} )
def post(self):
args = request.json
app.logger.debug(f"Args are {args}")
return jsonify({"message": "This endpoint is not implemented."}), 501
5 changes: 2 additions & 3 deletions app/main/controller/presto_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@ class PrestoResource(Resource):
def post(self, action, model_type):
data = request.json
item_id = data.get("body", {}).get("id")
app.logger.info(f"PrestoResource {action}/{model_type}")
app.logger.info(f"PrestoResource {action}/{model_type}, data is {data}")
return_value = None
if action == "add_item":
app.logger.info(f"Data looks like {data}")
result = similarity.callback_add_item(data.get("body"), model_type)
if data.get("body", {}).get("raw", {}).get("suppress_response"):
# requested not to reply to caller with similarity response, so suppress it
Expand All @@ -44,7 +43,7 @@ def post(self, action, model_type):
app.logger.info(f"Sending callback to {callback_url} for {action} for model of {model_type} with body of {result}")
Webhook.return_webhook(callback_url, action, model_type, result)
return_value = {"action": action, "model_type": model_type, "data": result}
app.logger.info(f"PrestoResource value is {return_value}")
app.logger.info(f"PrestoResource {action}/{model_type}, data is {data}, return_value is {return_value}")
r = redis_client.get_client()
r.lpush(f"{model_type}_{item_id}", json.dumps(data))
r.expire(f"{model_type}_{item_id}", 60*60*24)
Expand Down
3 changes: 2 additions & 1 deletion app/main/controller/similarity_async_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class AsyncSimilarityResource(Resource):
@api.doc(params={'text': 'text to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'model': 'similarity model to use: "elasticsearch" (pure Elasticsearch, default) or the key name of an active model'})
def post(self, similarity_type):
args = request.json
app.logger.debug(f"Args are {args}")
app.logger.info(f"[AsyncSimilarityResource] Starting Request - args are {args}, similarity_type is {similarity_type}")
if similarity_type == "text":
package = similarity.get_body_for_text_document(args, 'query')
else:
Expand All @@ -42,4 +42,5 @@ def post(self, similarity_type):
result["is_shortcircuited_search_result_callback"] = True
callback_url = args.get("callback_url", app.config['CHECK_API_HOST']) or app.config['CHECK_API_HOST']
Webhook.return_webhook(callback_url, "search", similarity_type, result)
app.logger.info(f"[AsyncSimilarityResource] Completing Request - args are {args}, similarity_type is {similarity_type}, reponse is {response}")
return response
6 changes: 4 additions & 2 deletions app/main/controller/similarity_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,7 @@ class SimilaritySearchResource(Resource):
@api.doc(params={'text': 'text to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'model': 'similarity model to use: "elasticsearch" (pure Elasticsearch, default) or the key name of an active model'})
def post(self):
args = request.json
app.logger.debug(f"Args are {args}")
return similarity.get_similar_items(similarity.get_body_for_text_document(args, mode='query'), "text")
app.logger.info(f"[SimilaritySearchResource] Args are {args}")
response = similarity.get_similar_items(similarity.get_body_for_text_document(args, mode='query'), "text")
app.logger.info(f"[SimilaritySearchResource] Args are {args}, response is {response}")
return response
8 changes: 5 additions & 3 deletions app/main/controller/similarity_sync_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ class SyncSimilarityResource(Resource):
@api.doc(params={'text': 'text to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'model': 'similarity model to use: "elasticsearch" (pure Elasticsearch, default) or the key name of an active model'})
def post(self, similarity_type):
args = request.json
app.logger.debug(f"Args are {args}")
app.logger.info(f"[SyncSimilarityResource] Starting Request - args are {args}, similarity_type is {similarity_type}")
if similarity_type == "text":
package = similarity.get_body_for_text_document(args, 'query')
return similarity.blocking_get_similar_items(package, similarity_type)
response = similarity.blocking_get_similar_items(package, similarity_type)
else:
package = similarity.get_body_for_media_document(args, 'query')
return similarity.blocking_get_similar_items(package, similarity_type)
response = similarity.blocking_get_similar_items(package, similarity_type)
app.logger.info(f"[SyncSimilarityResource] Completing Request - args are {args}, similarity_type is {similarity_type}, reponse is {response}")
return response
1 change: 0 additions & 1 deletion app/main/controller/video_similarity_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,4 @@ class VideoSimilaritySearchResource(Resource):
@api.doc(params={'url': 'video URL to be stored or queried for similarity', 'threshold': 'minimum score to consider, between 0.0 and 1.0 (defaults to 0.9)', 'context': 'context'} )
def post(self):
args = request.json
app.logger.debug(f"Args are {args}")
return jsonify({"message": "This endpoint is not implemented."}), 501
1 change: 0 additions & 1 deletion app/main/lib/elastic_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def get_blocked_presto_response(task, model, modality):
return obj, temporary, get_context_for_search(task), {"body": obj}

def get_async_presto_response(task, model, modality):
app.logger.info(f"get_async_presto_response: {task} {model} {modality}")
obj, _ = get_object(task, model)
callback_url = Presto.add_item_callback_url(app.config['ALEGRE_HOST'], modality)
if task.get("doc_id") is None:
Expand Down
3 changes: 0 additions & 3 deletions app/main/lib/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,22 +88,19 @@ def update_or_create_document(body, doc_id, index):
found_doc = None
if found_doc:
body = {"doc": merge_contexts(body, found_doc)}
app.logger.info(f"Sending OpenSearch update: {body}")
result = es.update(
id=doc_id,
body=body,
index=index,
retry_on_conflict=3
)
else:
app.logger.info(f"Sending OpenSearch store: {body}")
result = es.index(
id=doc_id,
body=body,
index=index
)
else:
app.logger.info(f"Sending OpenSearch store without id: {body}")
result = es.index(
body=body,
index=index
Expand Down
2 changes: 0 additions & 2 deletions app/main/lib/image_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,9 @@ def callback_add_image(task):
def search_image(image, model, limit, threshold, task, hash_value, context, temporary):
if image:
if model and model.lower() == "pdq":
app.logger.info(f"Searching with PDQ.")
image.pdq = hash_value
result = search_by_pdq(image.pdq, threshold, context, limit)
else:
app.logger.info(f"Searching with phash.")
image.phash = hash_value
result = search_by_phash(image.phash, threshold, context, limit)
if temporary:
Expand Down
2 changes: 1 addition & 1 deletion app/main/lib/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def retrieve_openai_embeddings(text, model_key):
if val_from_cache is not None:
return pickle.loads(val_from_cache)
openai.api_key = app.config['OPENAI_API_KEY']
app.logger.info(f"Calling OpenAI API")
model_key_without_openai_prefix = model_key[len(PREFIX_OPENAI):]
app.logger.info(f"Calling OpenAI API with '{text}' and engine of {model_key_without_openai_prefix}")
try:
embeddings = openai.embeddings_utils.get_embedding(text, engine=model_key_without_openai_prefix)
r_cache.set(key, pickle.dumps(embeddings))
Expand Down
1 change: 0 additions & 1 deletion app/main/lib/shared_models/video_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ def load(self):
pathlib.Path(self.directory).mkdir(parents=True, exist_ok=True)

def respond(self, task):
app.logger.info('Received task that looks like: '+str(json.dumps(task)))
if task["command"] == "delete":
return self.delete(task)
elif task["command"] == "add":
Expand Down
55 changes: 20 additions & 35 deletions app/main/lib/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_body_for_media_document(params, mode):
with some reformating. If we are storing, we remove unexpected items in
`params` in order to avoid things being stored in OpenSearch unintentionally
"""
app.logger.info(
app.logger.debug(
f"[Alegre Similarity] get_body_for_text_document (mode={mode}):params (start) {params}")
if 'created_at' not in params:
params['created_at'] = datetime.now()
Expand All @@ -34,7 +34,7 @@ def get_body_for_text_document(params, mode):
with some reformating. If we are storing, we remove unexpected items in
`params` in order to avoid things being stored in OpenSearch unintentionally
"""
app.logger.info(
app.logger.debug(
f"[Alegre Similarity] get_body_for_text_document (mode={mode}):params (start) {params}")

# Combine model and models
Expand Down Expand Up @@ -66,11 +66,11 @@ def get_body_for_text_document(params, mode):
if mode == 'store':
allow_list = set(['language', 'content', 'created_at', 'models', 'context', 'callback_url', 'content_hash'])
keys_to_remove = params.keys() - allow_list
app.logger.info(
app.logger.debug(
f"[Alegre Similarity] get_body_for_text_document:running in `store' mode. Removing {keys_to_remove}")
for key in keys_to_remove:
del params[key]
app.logger.info(
app.logger.debug(
f"[Alegre Similarity] get_body_for_text_document (mode={mode}):params (end) {params}")
return params

Expand Down Expand Up @@ -99,11 +99,11 @@ def model_response_package(item, command):
for optional_key in ["folder", "filepath"]:
if optional_key in item.keys():
response_package[optional_key] = item[optional_key]
app.logger.info(f"[Alegre Similarity] [Item {item}, Command {command}] Response package looks like {response_package}")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Command {command}] Response package looks like {response_package}")
return response_package

def add_item(item, similarity_type):
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] Adding item")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] Adding item")
callback_url = Presto.add_item_callback_url(app.config['ALEGRE_HOST'], similarity_type)
if similarity_type == "audio":
response = Presto.send_request(app.config['PRESTO_HOST'], PRESTO_MODEL_MAP[similarity_type], callback_url, model_response_package(item, "add")).text
Expand All @@ -117,7 +117,7 @@ def add_item(item, similarity_type):
doc_id = item.pop("doc_id", None)
language = item.pop("language", None)
response = add_text(item, doc_id, language)
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for add was {response}")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for add was {response}")
return response

def callback_add_item(item, similarity_type):
Expand All @@ -132,7 +132,7 @@ def callback_add_item(item, similarity_type):
function = callback_add_text
if function:
response = {"item": function(item)}
app.logger.info(f"[Alegre Similarity] CallbackAddItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}")
app.logger.debug(f"[Alegre Similarity] CallbackAddItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}")
return response
else:
app.logger.warning(f"[Alegre Similarity] InvalidCallbackAddItem: [Item {item}, Similarity type: {similarity_type}] No response")
Expand All @@ -148,25 +148,22 @@ def merge_audio_and_video_responses(video_response, audio_response):
def callback_search_item(item, similarity_type):
if similarity_type == "audio":
response = audio_model().search(model_response_package(item.get("raw"), "search"))
app.logger.info(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}")
elif similarity_type == "video":
response = video_model().search(model_response_package(item.get("raw"), "search"))
# When we search for a video, we need to also search for the audio track of the video against our audio library in case it matches other audio clips.
# audio_response = audio_model().search(video_model().overload_context_to_denote_content_type(model_response_package(item.get("raw"), "search")))
# response = merge_audio_and_video_responses(video_response, audio_response)
app.logger.info(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}")
elif similarity_type == "image":
response = async_search_image_on_callback(item)
app.logger.info(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}")
elif similarity_type == "text":
response = async_search_text_on_callback(item)
app.logger.info(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}")
else:
app.logger.warning(f"[Alegre Similarity] InvalidCallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] No response")
app.logger.debug(f"[Alegre Similarity] CallbackSearchItem: [Item {item}, Similarity type: {similarity_type}] Response looks like {response}")
return {"item": item, "results": response}

def delete_item(item, similarity_type):
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] Deleting item")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] Deleting item")
if similarity_type == "audio":
response = audio_model().delete(model_response_package(item, "delete"))
elif similarity_type == "video":
Expand All @@ -175,59 +172,47 @@ def delete_item(item, similarity_type):
response = delete_image(item)
elif similarity_type == "text":
response = delete_text(item.get("doc_id"), item.get("context", {}), item.get("quiet", False))
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for delete was {response}")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for delete was {response}")
return response

def get_similar_items(item, similarity_type):
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item")
response = None
if similarity_type == "text":
response = search_text(item)
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response

def blocking_get_similar_items(item, similarity_type):
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item")
if similarity_type == "audio":
response = audio_model().blocking_search(model_response_package(item, "search"), "audio")
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response
elif similarity_type == "image":
response = blocking_search_image(item)
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response
elif similarity_type == "video":
response = video_model().blocking_search(model_response_package(item, "search"), "video")
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response
elif similarity_type == "text":
response = sync_search_text(item, "text")
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response
else:
raise Exception(f"{similarity_type} modality not implemented for blocking requests!")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response

def async_get_similar_items(item, similarity_type):
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] searching on item")
if similarity_type == "audio":
response, waiting_for_callback = audio_model().async_search(model_response_package(item, "search"), "audio")
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response, waiting_for_callback
elif similarity_type == "video":
response, waiting_for_callback = video_model().async_search(model_response_package(item, "search"), "video")
# Searching with an audio_model() call here is intentional - we need to encode the audio
# track for all videos to see if we can match them across modes (i.e. this MP3 matches
# this video's audio track, so they are able to be matched)
# _, waiting_for_audio_callback = audio_model().async_search(video_model().overload_context_to_denote_content_type(model_response_package(item, "search")), "audio")
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response, waiting_for_callback# or waiting_for_audio_callback
elif similarity_type == "image":
response, waiting_for_callback = async_search_image(item, "image")
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response, waiting_for_callback
elif similarity_type == "text":
response, waiting_for_callback = async_search_text(item, "text")
app.logger.info(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response, waiting_for_callback
else:
raise Exception(f"{similarity_type} modality not implemented for async requests!")
raise Exception(f"{similarity_type} modality not implemented for async requests!")
app.logger.debug(f"[Alegre Similarity] [Item {item}, Similarity type: {similarity_type}] response for search was {response}")
return response, waiting_for_callback# or waiting_for_audio_callback
Loading

0 comments on commit 19a326c

Please sign in to comment.