Skip to content

Commit

Permalink
Create separate Env Vars to configure file size limits for search and…
Browse files Browse the repository at this point in the history
… add docs (#1058)

We split MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE into MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE and MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE
  • Loading branch information
RaynorChavez authored Dec 3, 2024
1 parent a262ede commit 5660996
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 8 deletions.
3 changes: 2 additions & 1 deletion src/marqo/api/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def default_env_vars() -> dict:
EnvVars.MARQO_LOG_LEVEL: "info",
EnvVars.MARQO_MEDIA_DOWNLOAD_THREAD_COUNT_PER_REQUEST: 5,
EnvVars.MARQO_IMAGE_DOWNLOAD_THREAD_COUNT_PER_REQUEST: 20,
EnvVars.MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE: 387973120, # 370 megabytes in bytes
EnvVars.MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE: 387973120, # 370 megabytes in bytes
EnvVars.MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE: 387973120, # 370 megabytes in bytes
# This env variable is set to "info" by default in run_marqo.sh, which overrides this value
EnvVars.MARQO_MAX_CPU_MODEL_MEMORY: 4,
EnvVars.MARQO_MAX_CUDA_MODEL_MEMORY: 4, # For multi-GPU, this is the max memory for each GPU.
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/s2_inference/clip_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def download_image_from_url(image_path: str, media_download_headers: dict, timeo

# callback to check file size for video and audio
if modality in [Modality.VIDEO, Modality.AUDIO]:
max_size = read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE)
max_size = read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE)
def progress(download_total, downloaded, upload_total, uploaded):
if downloaded > max_size:
return 1
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/tensor_search/add_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def download_and_chunk_media(url: str, device: str, modality: Modality,
preprocessors: Preprocessors, audio_preprocessing: AudioPreProcessing = None,
video_preprocessing: VideoPreProcessing = None,
media_download_headers: Optional[Dict] = None) -> List[Dict[str, torch.Tensor]]:
MAX_FILE_SIZE = read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE)
MAX_FILE_SIZE = read_env_vars_and_defaults_ints(EnvVars.MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE)

processor = StreamingMediaProcessor(
url=url, device=device, modality=modality, preprocessors=preprocessors,
Expand Down
3 changes: 2 additions & 1 deletion src/marqo/tensor_search/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ class EnvVars:
MARQO_IMAGE_DOWNLOAD_THREAD_COUNT_PER_REQUEST = "MARQO_IMAGE_DOWNLOAD_THREAD_COUNT_PER_REQUEST"
MARQO_ROOT_PATH = "MARQO_ROOT_PATH"
MARQO_MAX_CPU_MODEL_MEMORY = "MARQO_MAX_CPU_MODEL_MEMORY"
MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE = "MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE"
MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE = "MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE"
MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE = "MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE"
MARQO_MAX_CUDA_MODEL_MEMORY = "MARQO_MAX_CUDA_MODEL_MEMORY"
MARQO_EF_CONSTRUCTION_MAX_VALUE = "MARQO_EF_CONSTRUCTION_MAX_VALUE"
MARQO_MAX_VECTORISE_BATCH_SIZE = "MARQO_MAX_VECTORISE_BATCH_SIZE"
Expand Down
6 changes: 3 additions & 3 deletions tests/s2_inference/test_image_downloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_download_image_from_url_handlesRedirection(self):
self.assertEqual(result.getvalue(), image_content)

@patch('marqo.s2_inference.clip_utils.pycurl.Curl')
@patch.dict('os.environ', {'MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE': '5000000'}) # 5MB limit
@patch.dict('os.environ', {'MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE': '5000000'}) # 5MB limit
def test_video_audio_file_size_check_over_limit(self, mock_curl):
# Setup
test_url = "http://ipv4.download.thinkbroadband.com:8080/5GB.zip"
Expand Down Expand Up @@ -132,7 +132,7 @@ def simulate_setopt(option, value):
mock_curl_instance.setopt.assert_any_call(pycurl.XFERINFOFUNCTION, ANY)

@patch('marqo.s2_inference.clip_utils.pycurl.Curl')
@patch.dict('os.environ', {'MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE': '5000000'}) # 5MB limit
@patch.dict('os.environ', {'MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE': '5000000'}) # 5MB limit
def test_video_audio_file_size_check_under_limit(self, mock_curl):
# Setup
test_url = "http://example.com/small_video.mp4"
Expand Down Expand Up @@ -168,7 +168,7 @@ def simulate_setopt(option, value):
mock_curl_instance.perform.assert_called_once()

@patch('marqo.s2_inference.clip_utils.pycurl.Curl')
@patch('marqo.s2_inference.clip_utils.EnvVars.MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE', 5_000_000) # 5MB limit
@patch('marqo.s2_inference.clip_utils.EnvVars.MARQO_MAX_SEARCH_VIDEO_AUDIO_FILE_SIZE', 5_000_000) # 5MB limit
def test_image_file_size_not_checked(self, mock_curl):
# Setup
test_url = "http://example.com/large_image.jpg"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,7 @@ def test_language_bind_model_can_add_all_private_media_modalities(self):

def test_video_size_limit_in_batch(self):
"""Tests that adding documents with videos respects the file size limit per document"""
with mock.patch.dict('os.environ', {'MARQO_MAX_VIDEO_AUDIO_SEARCH_FILE_SIZE': '2097152',
with mock.patch.dict('os.environ', {'MARQO_MAX_ADD_DOCS_VIDEO_AUDIO_FILE_SIZE': '2097152',
'MARQO_MAX_CPU_MODEL_MEMORY': '15',
'MARQO_MAX_CUDA_MODEL_MEMORY': '15'}): # 2MB limit
# Test documents - one under limit (2.5MB), one over limit
Expand Down

0 comments on commit 5660996

Please sign in to comment.