diff --git a/src/marqo/api/configs.py b/src/marqo/api/configs.py index 66b71180a..b5eeec762 100644 --- a/src/marqo/api/configs.py +++ b/src/marqo/api/configs.py @@ -31,7 +31,8 @@ def default_env_vars() -> dict: EnvVars.MARQO_LOG_LEVEL: "info", EnvVars.MARQO_MEDIA_DOWNLOAD_THREAD_COUNT_PER_REQUEST: 5, EnvVars.MARQO_IMAGE_DOWNLOAD_THREAD_COUNT_PER_REQUEST: 20, - EnvVars.MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE: 387973120, # 370 megabytes in bytes + EnvVars.MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE: 387973120, # 370 megabytes in bytes + EnvVars.MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE: 387973120, # 370 megabytes in bytes # This env variable is set to "info" by default in run_marqo.sh, which overrides this value EnvVars.MARQO_MAX_CPU_MODEL_MEMORY: 4, EnvVars.MARQO_MAX_CUDA_MODEL_MEMORY: 4, # For multi-GPU, this is the max memory for each GPU. diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py index 7ad223c56..e7c74b95b 100644 --- a/src/marqo/s2_inference/clip_utils.py +++ b/src/marqo/s2_inference/clip_utils.py @@ -195,7 +195,7 @@ def download_image_from_url(image_path: str, media_download_headers: dict, timeo # callback to check file size for video and audio if modality in [Modality.VIDEO, Modality.AUDIO]: - max_size = read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE) + max_size = read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE) def progress(download_total, downloaded, upload_total, uploaded): if downloaded > max_size: return 1 diff --git a/src/marqo/tensor_search/add_docs.py b/src/marqo/tensor_search/add_docs.py index a45717cff..3fabc94d0 100644 --- a/src/marqo/tensor_search/add_docs.py +++ b/src/marqo/tensor_search/add_docs.py @@ -187,7 +187,7 @@ def download_and_chunk_media(url: str, device: str, modality: Modality, preprocessors: Preprocessors, audio_preprocessing: AudioPreProcessing = None, video_preprocessing: VideoPreProcessing = None, media_download_headers: Optional[Dict] = None) -> List[Dict[str, torch.Tensor]]: - MAX_FILE_SIZE = read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE) + MAX_FILE_SIZE = read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE) processor = StreamingMediaProcessor( url=url, device=device, modality=modality, preprocessors=preprocessors, diff --git a/src/marqo/tensor_search/enums.py b/src/marqo/tensor_search/enums.py index 0a2831374..2d4f6ac00 100644 --- a/src/marqo/tensor_search/enums.py +++ b/src/marqo/tensor_search/enums.py @@ -64,7 +64,8 @@ class EnvVars: MARQO_IMAGE_DOWNLOAD_THREAD_COUNT_PER_REQUEST = "MARQO_IMAGE_DOWNLOAD_THREAD_COUNT_PER_REQUEST" MARQO_ROOT_PATH = "MARQO_ROOT_PATH" MARQO_MAX_CPU_MODEL_MEMORY = "MARQO_MAX_CPU_MODEL_MEMORY" - MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE = "MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE" + MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE = "MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE" + MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE = "MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE" MARQO_MAX_CUDA_MODEL_MEMORY = "MARQO_MAX_CUDA_MODEL_MEMORY" MARQO_EF_CONSTRUCTION_MAX_VALUE = "MARQO_EF_CONSTRUCTION_MAX_VALUE" MARQO_MAX_VECTORISE_BATCH_SIZE = "MARQO_MAX_VECTORISE_BATCH_SIZE" diff --git a/tests/s2_inference/test_image_downloading.py b/tests/s2_inference/test_image_downloading.py index 8b021636f..b5c5cecd9 100644 --- a/tests/s2_inference/test_image_downloading.py +++ b/tests/s2_inference/test_image_downloading.py @@ -94,7 +94,7 @@ def test_download_image_from_url_handlesRedirection(self): self.assertEqual(result.getvalue(), image_content) @patch('marqo.s2_inference.clip_utils.pycurl.Curl') - @patch.dict('os.environ', {'MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE': '5000000'}) # 5MB limit + @patch.dict('os.environ', {'MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE': '5000000'}) # 5MB limit def test_video_audio_file_size_check_over_limit(self, mock_curl): # Setup test_url = "http://ipv4.download.thinkbroadband.com:8080/5GB.zip" @@ -132,7 +132,7 @@ def simulate_setopt(option, value): mock_curl_instance.setopt.assert_any_call(pycurl.XFERINFOFUNCTION, ANY) @patch('marqo.s2_inference.clip_utils.pycurl.Curl') - @patch.dict('os.environ', {'MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE': '5000000'}) # 5MB limit + @patch.dict('os.environ', {'MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE': '5000000'}) # 5MB limit def test_video_audio_file_size_check_under_limit(self, mock_curl): # Setup test_url = "http://example.com/small_video.mp4" @@ -168,7 +168,7 @@ def simulate_setopt(option, value): mock_curl_instance.perform.assert_called_once() @patch('marqo.s2_inference.clip_utils.pycurl.Curl') - @patch('marqo.s2_inference.clip_utils.EnvVars.MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE', 5_000_000) # 5MB limit + @patch('marqo.s2_inference.clip_utils.EnvVars.MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE', 5_000_000) # 5MB limit def test_image_file_size_not_checked(self, mock_curl): # Setup test_url = "http://example.com/large_image.jpg" diff --git a/tests/tensor_search/integ_tests/test_add_documents_combined.py b/tests/tensor_search/integ_tests/test_add_documents_combined.py index cf4f22ebc..8bbc88388 100644 --- a/tests/tensor_search/integ_tests/test_add_documents_combined.py +++ b/tests/tensor_search/integ_tests/test_add_documents_combined.py @@ -1213,7 +1213,7 @@ def test_language_bind_model_can_add_all_private_media_modalities(self): def test_video_size_limit_in_batch(self): """Tests that adding documents with videos respects the file size limit per document""" - with mock.patch.dict('os.environ', {'MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE': '2097152', + with mock.patch.dict('os.environ', {'MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE': '2097152', 'MARQO_MAX_CPU_MODEL_MEMORY': '15', 'MARQO_MAX_CUDA_MODEL_MEMORY': '15'}): # 2MB limit # Test documents - one under limit (2.5MB), one over limit