From c4543bdf7b50a7c02aec934e8c88bcaf26073de2 Mon Sep 17 00:00:00 2001
From: Li Wan
Date: Mon, 18 Nov 2024 12:57:46 +1100
Subject: [PATCH 1/7] Install marqo-base requirements for 2.13 releases (#1041)
---
.github/workflows/largemodel_unit_test_CI.yml | 2 +-
.github/workflows/unit_test_200gb_CI.yml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml
index 68bd154d5..491b6f00c 100644
--- a/.github/workflows/largemodel_unit_test_CI.yml
+++ b/.github/workflows/largemodel_unit_test_CI.yml
@@ -82,7 +82,7 @@ jobs:
- name: Install dependencies
run: |
- pip install -r marqo-base/requirements.txt
+ pip install -r marqo-base/requirements/amd64-gpu-requirements.txt
# override base requirements with marqo requirements, if needed:
pip install -r marqo/requirements.dev.txt
pip install pytest==7.4.0
diff --git a/.github/workflows/unit_test_200gb_CI.yml b/.github/workflows/unit_test_200gb_CI.yml
index 4bccd70c0..28ee50cfc 100644
--- a/.github/workflows/unit_test_200gb_CI.yml
+++ b/.github/workflows/unit_test_200gb_CI.yml
@@ -80,7 +80,7 @@ jobs:
- name: Install dependencies
run: |
- pip install -r marqo-base/requirements.txt
+ pip install -r marqo-base/requirements/amd64-gpu-requirements.txt
# override base requirements with marqo requirements, if needed:
pip install -r marqo/requirements.dev.txt
From 65029cf79b7d231307564296c7664c028c448033 Mon Sep 17 00:00:00 2001
From: Yihan Zhao
Date: Mon, 18 Nov 2024 17:37:03 +1100
Subject: [PATCH 2/7] Fix the validation regressions for unstructured indexes
(#1034)
---
.../semi_structured_document.py | 9 ++--
.../unstructured_add_document_handler.py | 48 +++++++++--------
.../unstructured_document.py | 12 ++---
.../core/vespa_index/add_documents_handler.py | 2 +-
src/marqo/version.py | 2 +-
.../test_add_documents_semi_structured.py | 9 +++-
.../test_add_documents_structured.py | 4 +-
.../test_add_documents_unstructured.py | 9 +++-
.../integ_tests/test_custom_vector_field.py | 53 +++++++++++++++++++
9 files changed, 108 insertions(+), 40 deletions(-)
diff --git a/src/marqo/core/semi_structured_vespa_index/semi_structured_document.py b/src/marqo/core/semi_structured_vespa_index/semi_structured_document.py
index 92542ca6e..bd85cf37e 100644
--- a/src/marqo/core/semi_structured_vespa_index/semi_structured_document.py
+++ b/src/marqo/core/semi_structured_vespa_index/semi_structured_document.py
@@ -46,7 +46,7 @@ class SemiStructuredVespaDocument(MarqoBaseModel):
@classmethod
def from_vespa_document(cls, document: Dict, marqo_index: SemiStructuredMarqoIndex) -> "SemiStructuredVespaDocument":
"""
- Instantiate an UnstructuredVespaDocument from a Vespa document.
+ Instantiate an SemiStructuredVespaDocument from a Vespa document.
Used in get_document_by_id or get_documents_by_ids
"""
fields = document.get(cls._VESPA_DOC_FIELDS, {})
@@ -76,10 +76,11 @@ def extract_field(cls, fields, name: str, default: Any):
@classmethod
def from_marqo_document(cls, document: Dict, marqo_index: SemiStructuredMarqoIndex) -> "SemiStructuredVespaDocument":
- """Instantiate an UnstructuredVespaDocument from a valid Marqo document for feeding to Vespa"""
+ """Instantiate an SemiStructuredVespaDocument from a valid Marqo document for feeding to Vespa"""
if index_constants.MARQO_DOC_ID not in document:
- raise VespaDocumentParsingError(
+ # Please note we still use unstructured in the error message since it will be exposed to user
+ raise MarqoDocumentParsingError(
f"Unstructured Marqo document does not have a {index_constants.MARQO_DOC_ID} field. "
f"This should be assigned for a valid document")
@@ -116,7 +117,7 @@ def from_marqo_document(cls, document: Dict, marqo_index: SemiStructuredMarqoInd
instance.fixed_fields.float_fields[f"{field_name}.{k}"] = float(v)
instance.fixed_fields.score_modifiers_fields[f"{field_name}.{k}"] = v
else:
- raise VespaDocumentParsingError(
+ raise MarqoDocumentParsingError(
f"In document {doc_id}, field {field_name} has an "
f"unsupported type {type(field_content)} which has not been validated in advance.")
diff --git a/src/marqo/core/unstructured_vespa_index/unstructured_add_document_handler.py b/src/marqo/core/unstructured_vespa_index/unstructured_add_document_handler.py
index 7915455aa..7f4e36740 100644
--- a/src/marqo/core/unstructured_vespa_index/unstructured_add_document_handler.py
+++ b/src/marqo/core/unstructured_vespa_index/unstructured_add_document_handler.py
@@ -7,29 +7,31 @@
from marqo.api import exceptions as api_errors
from marqo.core import constants
from marqo.core.constants import MARQO_DOC_ID
-from marqo.core.vespa_index.add_documents_handler import AddDocumentsHandler, AddDocumentsError
-from marqo.core.models.add_docs_params import AddDocsParams
from marqo.core.inference.tensor_fields_container import TensorFieldsContainer, MODALITY_FIELD_TYPE_MAP
from marqo.core.models import UnstructuredMarqoIndex
+from marqo.core.models.add_docs_params import AddDocsParams
from marqo.core.models.marqo_index import FieldType
from marqo.core.unstructured_vespa_index.common import MARQO_DOC_MULTIMODAL_PARAMS
from marqo.core.unstructured_vespa_index.unstructured_validation import validate_tensor_fields, validate_field_name, \
validate_mappings_object_format, validate_coupling_of_mappings_and_doc
from marqo.core.unstructured_vespa_index.unstructured_vespa_index import UnstructuredVespaIndex
+from marqo.core.vespa_index.add_documents_handler import AddDocumentsHandler, AddDocumentsError
from marqo.s2_inference.errors import MediaDownloadError
from marqo.s2_inference.multimodal_model_load import infer_modality, Modality
from marqo.vespa.models import VespaDocument
from marqo.vespa.models.get_document_response import Document
+from marqo.vespa.vespa_client import VespaClient
# TODO deps to tensor_search needs to be removed
from marqo.tensor_search.constants import ALLOWED_UNSTRUCTURED_FIELD_TYPES
-from marqo.tensor_search.validation import list_types_valid, validate_custom_vector, \
- validate_multimodal_combination, validate_map_numeric_field
-from marqo.vespa.vespa_client import VespaClient
+from marqo.tensor_search.validation import validate_custom_vector, \
+ validate_map_numeric_field
class UnstructuredAddDocumentsHandler(AddDocumentsHandler):
+ _MINIMUM_MARQO_VERSION_SUPPORTS_MAP_NUMERIC_FIELDS = semver.VersionInfo.parse("2.9.0")
+
def __init__(self, marqo_index: UnstructuredMarqoIndex, add_docs_params: AddDocsParams, vespa_client: VespaClient):
self._validate_add_docs_params(add_docs_params)
super().__init__(marqo_index, add_docs_params, vespa_client)
@@ -100,27 +102,29 @@ def _validate_field(self, field_name: str, field_content: Any) -> None:
f"Allowed content types: {[ty.__name__ for ty in ALLOWED_UNSTRUCTURED_FIELD_TYPES]}"
)
- if isinstance(field_content, list) and not list_types_valid(field_content):
- raise AddDocumentsError(
- f"Field content '{field_content}' "
- f"of type {type(field_content).__name__} is not of valid content type! "
- f"All list elements must be of the same type and that type must be int, float or string"
- )
+ if isinstance(field_content, list):
+ for element in field_content:
+ if not isinstance(element, str):
+ # if the field content is a list, it should only contain strings.
+ raise AddDocumentsError(
+ f"Field content {field_content} includes an element of type {type(element).__name__} "
+ f"which is not a string. Unstructured Marqo index only supports string lists."
+ )
+
+ is_tensor_field = field_name in self.add_docs_params.tensor_fields
if isinstance(field_content, dict):
if self.tensor_fields_container.is_custom_tensor_field(field_name):
- # TODO is_non_tensor_field check can be move out to AddDocsParams level
- validate_custom_vector(field_content, False, self.marqo_index.model.get_dimension())
- elif self.tensor_fields_container.is_multimodal_field(field_name):
- # FIXME, multimodal field should not be present in the doc
- # TODO This validation should be done at AddDocsParams level
- validate_multimodal_combination(field_content, False,
- self.tensor_fields_container.get_multimodal_field_mapping(field_name))
- elif self.marqo_index.parsed_marqo_version() < semver.VersionInfo.parse("2.9.0"):
- # TODO This check should not happen at root level
+ # TODO should is_non_tensor_field check be moved out to AddDocsParams validation?
+ # Please note that if one of the documents in the batch has a custom field which does not exist
+ # in the tensor field, the whole batch will fail and user will get a 400 pydantic.ValidationError.
+ # We keep this behaviour unchanged to be compatible with the legacy unstructured index.
+ validate_custom_vector(field_content, not is_tensor_field, self.marqo_index.model.get_dimension())
+ elif self.marqo_index.parsed_marqo_version() < self._MINIMUM_MARQO_VERSION_SUPPORTS_MAP_NUMERIC_FIELDS:
+ # We do not support map of numeric fields prior to 2.9.0
raise AddDocumentsError(
- f"The field {field_name} is a map field and only supported for indexes created with Marqo 2.9.0 "
- f"or later. See {marqo_docs.map_fields()} and {marqo_docs.mappings()}."
+ f"The field {field_name} is a map field and only supported for indexes created with Marqo 2.9.0"
+ f" or later. See {marqo_docs.map_fields()} and {marqo_docs.mappings()}."
)
else:
validate_map_numeric_field(field_content)
diff --git a/src/marqo/core/unstructured_vespa_index/unstructured_document.py b/src/marqo/core/unstructured_vespa_index/unstructured_document.py
index 5fd47bef0..121b5ed69 100644
--- a/src/marqo/core/unstructured_vespa_index/unstructured_document.py
+++ b/src/marqo/core/unstructured_vespa_index/unstructured_document.py
@@ -1,13 +1,11 @@
import json
-from copy import deepcopy
from typing import List, Dict, Any
-import semver
-from pydantic import Field, BaseModel
-from marqo.base_model import MarqoBaseModel
+from pydantic import Field
+from marqo.base_model import MarqoBaseModel
from marqo.core import constants as index_constants
-from marqo.core.exceptions import VespaDocumentParsingError
+from marqo.core.exceptions import VespaDocumentParsingError, MarqoDocumentParsingError
from marqo.core.unstructured_vespa_index import common as unstructured_common
@@ -89,7 +87,7 @@ def from_marqo_document(cls, document: Dict, filter_string_max_length: int) -> "
add_documents"""
if index_constants.MARQO_DOC_ID not in document:
- raise VespaDocumentParsingError(f"Unstructured Marqo document does not have a {index_constants.MARQO_DOC_ID} field. "
+ raise MarqoDocumentParsingError(f"Unstructured Marqo document does not have a {index_constants.MARQO_DOC_ID} field. "
f"This should be assigned for a valid document")
doc_id = document[index_constants.MARQO_DOC_ID]
@@ -124,7 +122,7 @@ def from_marqo_document(cls, document: Dict, filter_string_max_length: int) -> "
instance.fields.float_fields[f"{key}.{k}"] = float(v)
instance.fields.score_modifiers_fields[f"{key}.{k}"] = v
else:
- raise VespaDocumentParsingError(f"In document {doc_id}, field {key} has an "
+ raise MarqoDocumentParsingError(f"In document {doc_id}, field {key} has an "
f"unsupported type {type(value)} which has not been validated in advance.")
instance.fields.vespa_multimodal_params = document.get(unstructured_common.MARQO_DOC_MULTIMODAL_PARAMS, {})
diff --git a/src/marqo/core/vespa_index/add_documents_handler.py b/src/marqo/core/vespa_index/add_documents_handler.py
index 8133abd4d..ecf77f997 100644
--- a/src/marqo/core/vespa_index/add_documents_handler.py
+++ b/src/marqo/core/vespa_index/add_documents_handler.py
@@ -10,7 +10,7 @@
from marqo.core.inference.tensor_fields_container import Chunker, TensorFieldsContainer, TensorFieldContent, \
TextChunker, ImageChunker, AudioVideoChunker, ModelConfig, Vectoriser, ContentChunkType
from marqo.core.exceptions import AddDocumentsError, DuplicateDocumentError, MarqoDocumentParsingError, InternalError, \
- UnsupportedFeatureError
+ UnsupportedFeatureError, VespaDocumentParsingError
from marqo.core.models import MarqoIndex
from marqo.core.models.marqo_add_documents_response import MarqoAddDocumentsItem, MarqoAddDocumentsResponse
from marqo.core.models.marqo_index import FieldType
diff --git a/src/marqo/version.py b/src/marqo/version.py
index a39c00a18..cddac11f5 100644
--- a/src/marqo/version.py
+++ b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__version__ = "2.13.1"
+__version__ = "2.13.2"
def get_version() -> str:
return f"{__version__}"
diff --git a/tests/tensor_search/integ_tests/test_add_documents_semi_structured.py b/tests/tensor_search/integ_tests/test_add_documents_semi_structured.py
index f395fefe1..381d39d8b 100644
--- a/tests/tensor_search/integ_tests/test_add_documents_semi_structured.py
+++ b/tests/tensor_search/integ_tests/test_add_documents_semi_structured.py
@@ -322,7 +322,10 @@ def test_add_documents_list_data_type_validation(self):
self.tags_ = [
[{"_id": "to_fail_123", "tags": ["wow", "this", False]}],
[{"_id": "to_fail_124", "tags": [1, None, 3]}],
- [{"_id": "to_fail_125", "tags": [{}]}]
+ [{"_id": "to_fail_125", "tags": [{}]}],
+ [{"_id": "to_fail_126", "tags": [1, 2, 3]}],
+ [{"_id": "to_fail_127", "tags": [1.0, 2.0, 3.0]}],
+ [{"_id": "to_fail_128", "tags": [1, 2.0, 3]}],
]
bad_doc_args = self.tags_
for bad_doc_arg in bad_doc_args:
@@ -336,7 +339,9 @@ def test_add_documents_list_data_type_validation(self):
)
).dict(exclude_none=True, by_alias=True)
assert add_res['errors'] is True
- assert all(['error' in item for item in add_res['items'] if item['_id'].startswith('to_fail')])
+ assert all(['error' in item for item in add_res['items']])
+ assert all(['Unstructured Marqo index only supports string lists.' in item['message']
+ for item in add_res['items']])
def test_add_documents_set_device(self):
"""
diff --git a/tests/tensor_search/integ_tests/test_add_documents_structured.py b/tests/tensor_search/integ_tests/test_add_documents_structured.py
index fc8e96a6b..1d90c7602 100644
--- a/tests/tensor_search/integ_tests/test_add_documents_structured.py
+++ b/tests/tensor_search/integ_tests/test_add_documents_structured.py
@@ -500,7 +500,9 @@ def test_add_documents_list_data_type_validation(self):
)
).dict(exclude_none=True, by_alias=True)
assert add_res['errors'] is True
- assert all(['error' in item for item in add_res['items'] if item['_id'].startswith('to_fail')])
+ assert all(['error' in item for item in add_res['items']])
+ assert all(['All list elements must be of the same type and that type must be int, float or string'
+ in item['message'] for item in add_res['items']])
def test_add_documents_set_device(self):
"""
diff --git a/tests/tensor_search/integ_tests/test_add_documents_unstructured.py b/tests/tensor_search/integ_tests/test_add_documents_unstructured.py
index 87ce9b282..4b231aa7f 100644
--- a/tests/tensor_search/integ_tests/test_add_documents_unstructured.py
+++ b/tests/tensor_search/integ_tests/test_add_documents_unstructured.py
@@ -333,7 +333,10 @@ def test_add_documents_list_data_type_validation(self):
self.tags_ = [
[{"_id": "to_fail_123", "tags": ["wow", "this", False]}],
[{"_id": "to_fail_124", "tags": [1, None, 3]}],
- [{"_id": "to_fail_125", "tags": [{}]}]
+ [{"_id": "to_fail_125", "tags": [{}]}],
+ [{"_id": "to_fail_126", "tags": [1, 2, 3]}],
+ [{"_id": "to_fail_127", "tags": [1.0, 2.0, 3.0]}],
+ [{"_id": "to_fail_128", "tags": [1, 2.0, 3]}],
]
bad_doc_args = self.tags_
for bad_doc_arg in bad_doc_args:
@@ -347,7 +350,9 @@ def test_add_documents_list_data_type_validation(self):
)
).dict(exclude_none=True, by_alias=True)
assert add_res['errors'] is True
- assert all(['error' in item for item in add_res['items'] if item['_id'].startswith('to_fail')])
+ assert all(['error' in item for item in add_res['items']])
+ assert all(['Unstructured Marqo index only supports string lists.' in item['message']
+ for item in add_res['items']])
def test_add_documents_set_device(self):
"""
diff --git a/tests/tensor_search/integ_tests/test_custom_vector_field.py b/tests/tensor_search/integ_tests/test_custom_vector_field.py
index 05d1df8f5..93739789a 100644
--- a/tests/tensor_search/integ_tests/test_custom_vector_field.py
+++ b/tests/tensor_search/integ_tests/test_custom_vector_field.py
@@ -179,6 +179,59 @@ def run():
self.assertEqual(vespa_fields["marqo__vector_count"], 1)
+ def test_create_structured_index_with_custom_vector_not_in_tensor_fields_should_fail(self):
+ """
+ Test the structured index cannot be created with a custom vector field not in tensor fields
+ """
+ with self.assertRaises(pydantic.error_wrappers.ValidationError) as err:
+ self.create_indexes([self.structured_marqo_index_request(
+ model=Model(name='ViT-B/32'),
+ normalize_embeddings=False,
+ distance_metric=DistanceMetric.Angular,
+ fields=[
+ FieldRequest(
+ name="my_custom_vector",
+ type="custom_vector",
+ features=[FieldFeature.LexicalSearch, FieldFeature.Filter]),
+ FieldRequest(
+ name="text_field",
+ type="text",
+ features=[FieldFeature.LexicalSearch]),
+ ],
+ tensor_fields=["text_field"]
+ )])
+
+ self.assertIn("Field 'my_custom_vector' has type 'custom_vector' and must be a tensor field.",
+ str(err.exception))
+
+ def test_add_documents_with_custom_vector_not_in_tensor_fields(self):
+ """
+ Add a document with a custom vector field but not declared in tensor fields (for unstructured index only)
+ should fail validation, raise a ValidationError and fail the batch
+ """
+ for index in [self.unstructured_custom_index, self.semi_structured_custom_index]:
+ with self.subTest(msg=f'{index.name}: {index.type}'):
+ with self.assertRaisesStrict(pydantic.error_wrappers.ValidationError) as err:
+ self.add_documents(
+ config=self.config, add_docs_params=AddDocsParams(
+ index_name=index.name,
+ docs=[{
+ "_id": "0",
+ "my_custom_vector": {
+ "content": "custom content",
+ "vector": self.random_vector_1
+ }
+ }],
+ device="cpu",
+ mappings=self.mappings,
+ tensor_fields=[]
+ )
+ )
+
+ self.assertRegex(str(err.exception),
+ r"Cannot create custom_vector field .* as a non-tensor field. "
+ r"Add this field to 'tensor_fields' to fix this problem.")
+
def test_add_documents_with_custom_vector_field_no_content(self):
"""
Add a document with a custom vector field with no content:
From 14d067de1f9f5fb2d264a78247b973abbb5e123a Mon Sep 17 00:00:00 2001
From: Yihan Zhao
Date: Tue, 19 Nov 2024 18:10:00 +1100
Subject: [PATCH 3/7] Skip convergence check when bootstrapping Marqo (#1036)
---
.../core/index_management/index_management.py | 43 ++++++---
.../vespa_application_package.py | 12 ++-
src/marqo/vespa/vespa_client.py | 12 ++-
.../index_management/test_index_management.py | 90 ++++++++++++-------
4 files changed, 108 insertions(+), 49 deletions(-)
diff --git a/src/marqo/core/index_management/index_management.py b/src/marqo/core/index_management/index_management.py
index c2a2cb6bc..d01e68f53 100644
--- a/src/marqo/core/index_management/index_management.py
+++ b/src/marqo/core/index_management/index_management.py
@@ -86,15 +86,26 @@ def bootstrap_vespa(self) -> bool:
Returns:
True if Vespa was bootstrapped, False if it was already up-to-date
"""
- with self._vespa_deployment_lock():
- vespa_app = self._get_vespa_application(check_configured=False, need_binary_file_support=True)
- to_version = version.get_version()
- from_version = vespa_app.get_marqo_config().version if vespa_app.is_configured else None
+ # We skip the Vespa convergence check here so that Marqo instance can be bootstrapped even when Vespa is
+ # not converged.
+ to_version = version.get_version()
+ vespa_app_for_version_check = self._get_vespa_application(check_configured=False, need_binary_file_support=True,
+ check_for_application_convergence=False)
+ from_version = vespa_app_for_version_check.get_marqo_config().version \
+ if vespa_app_for_version_check.is_configured else None
+
+ if from_version and semver.VersionInfo.parse(from_version) >= semver.VersionInfo.parse(to_version):
+ # skip bootstrapping if already bootstrapped to this version or later
+ return False
- if from_version and semver.VersionInfo.parse(from_version) >= semver.VersionInfo.parse(to_version):
- # skip bootstrapping if already bootstrapped to this version or later
- return False
+ with self._vespa_deployment_lock():
+ # Initialise another session based on the latest active Vespa session. The reason we do this again while
+ # holding the distributed lock is that the Vespa application might be changed by other operations when
+ # we wait for the lock. This time, we error out if the Vespa application is not converged, which reduces
+ # the chance of running into race conditions.
+ vespa_app = self._get_vespa_application(check_configured=False, need_binary_file_support=True,
+ check_for_application_convergence=True)
# Only retrieving existing index when the vespa app is not configured and the index settings schema exists
existing_indexes = self._get_existing_indexes() if not vespa_app.is_configured and \
@@ -105,8 +116,12 @@ def bootstrap_vespa(self) -> bool:
return True
def rollback_vespa(self) -> None:
+ """
+ Roll back Vespa application package to the previous version backed up in the current app package.
+ """
with self._vespa_deployment_lock():
- self._get_vespa_application(need_binary_file_support=True).rollback(version.get_version())
+ vespa_app = self._get_vespa_application(need_binary_file_support=True)
+ vespa_app.rollback(version.get_version())
def create_index(self, marqo_index_request: MarqoIndexRequest) -> MarqoIndex:
"""
@@ -274,8 +289,8 @@ def get_marqo_version(self) -> str:
"""
return self._get_vespa_application().get_marqo_config().version
- def _get_vespa_application(self, check_configured: bool = True, need_binary_file_support: bool = False) \
- -> VespaApplicationPackage:
+ def _get_vespa_application(self, check_configured: bool = True, need_binary_file_support: bool = False,
+ check_for_application_convergence: bool = True) -> VespaApplicationPackage:
"""
Retrieve a Vespa application package. Depending on whether we need to handle binary files and the Vespa version,
it uses different implementation of VespaApplicationStore.
@@ -283,6 +298,8 @@ def _get_vespa_application(self, check_configured: bool = True, need_binary_file
Args:
check_configured: if set to True, it checks whether the application package is configured or not.
need_binary_file_support: indicates whether the support for binary file is needed.
+ check_for_application_convergence: whether we check convergence of the Vespa app package. If set to true and
+ Vespa is not converged, this process will fail with a VespaError raised.
Returns:
The VespaApplicationPackage instance we can use to do bootstrapping/rollback and any index operations.
@@ -314,13 +331,15 @@ def _get_vespa_application(self, check_configured: bool = True, need_binary_file
application_package_store = VespaApplicationFileStore(
vespa_client=self.vespa_client,
deploy_timeout=self._deployment_timeout_seconds,
- wait_for_convergence_timeout=self._convergence_timeout_seconds
+ wait_for_convergence_timeout=self._convergence_timeout_seconds,
+ check_for_application_convergence=check_for_application_convergence
)
else:
application_package_store = ApplicationPackageDeploymentSessionStore(
vespa_client=self.vespa_client,
deploy_timeout=self._deployment_timeout_seconds,
- wait_for_convergence_timeout=self._convergence_timeout_seconds
+ wait_for_convergence_timeout=self._convergence_timeout_seconds,
+ check_for_application_convergence=check_for_application_convergence
)
application = VespaApplicationPackage(application_package_store)
diff --git a/src/marqo/core/index_management/vespa_application_package.py b/src/marqo/core/index_management/vespa_application_package.py
index cd463f3bd..275b669f0 100644
--- a/src/marqo/core/index_management/vespa_application_package.py
+++ b/src/marqo/core/index_management/vespa_application_package.py
@@ -425,9 +425,11 @@ class VespaApplicationFileStore(VespaApplicationStore):
more details. This is the only viable option to deploy changes of binary files before Vespa version 8.382.22.
We implement this approach to support bootstrapping and rollback for Vespa version prior to 8.382.22.
"""
- def __init__(self, vespa_client: VespaClient, deploy_timeout: int, wait_for_convergence_timeout: int):
+ def __init__(self, vespa_client: VespaClient, deploy_timeout: int, wait_for_convergence_timeout: int,
+ check_for_application_convergence: bool = True):
super().__init__(vespa_client, deploy_timeout, wait_for_convergence_timeout)
- self._app_root_path = vespa_client.download_application(check_for_application_convergence=True)
+ self._app_root_path = vespa_client.download_application(
+ check_for_application_convergence=check_for_application_convergence)
def _full_path(self, *paths: str) -> str:
return os.path.join(self._app_root_path, *paths)
@@ -483,9 +485,11 @@ class ApplicationPackageDeploymentSessionStore(VespaApplicationStore):
See https://docs.vespa.ai/en/reference/deploy-rest-api-v2.html#create-session for more details.
However, this approach does not support binary files for Vespa version prior to 8.382.22.
"""
- def __init__(self, vespa_client: VespaClient, deploy_timeout: int, wait_for_convergence_timeout: int):
+ def __init__(self, vespa_client: VespaClient, deploy_timeout: int, wait_for_convergence_timeout: int,
+ check_for_application_convergence: bool = True):
super().__init__(vespa_client, deploy_timeout, wait_for_convergence_timeout)
- self._content_base_url, self._prepare_url = vespa_client.create_deployment_session()
+ self._content_base_url, self._prepare_url = vespa_client.create_deployment_session(
+ check_for_application_convergence)
self._all_contents = vespa_client.list_contents(self._content_base_url)
def file_exists(self, *paths: str) -> bool:
diff --git a/src/marqo/vespa/vespa_client.py b/src/marqo/vespa/vespa_client.py
index a5b04f4ab..2f4310a34 100644
--- a/src/marqo/vespa/vespa_client.py
+++ b/src/marqo/vespa/vespa_client.py
@@ -95,9 +95,12 @@ def deploy_application(self, application: str, timeout: int = 60) -> None:
self._raise_for_status(response)
- def create_deployment_session(self) -> Tuple[str, str]:
+ def create_deployment_session(self, check_for_application_convergence: bool = True) -> Tuple[str, str]:
"""
Create a Vespa deployment session.
+ Args:
+ check_for_application_convergence: check for the application to converge before create a deployment session.
+
Returns:
Tuple[str, str]:
- content_base_url is the base url for contents in this session
@@ -107,7 +110,9 @@ def create_deployment_session(self) -> Tuple[str, str]:
via Zookeeper. Following requests should use content_base_url and prepare_url to make sure it can hit the right
config server that this session is created on.
"""
- self.check_for_application_convergence()
+ if check_for_application_convergence:
+ self.check_for_application_convergence()
+
res = self._create_deploy_session(self.http_client)
content_base_url = res['content']
prepare_url = res['prepared']
@@ -193,7 +198,8 @@ def wait_for_application_convergence(self, timeout: int = 120) -> None:
except (httpx.TimeoutException, httpcore.TimeoutException):
logger.error("Marqo timed out waiting for Vespa application to converge. Will retry.")
- raise VespaError(f"Vespa application did not converge within {timeout} seconds")
+ raise VespaError(f"Vespa application did not converge within {timeout} seconds. "
+ f"The convergence status is {self._get_convergence_status()}")
def query(self, yql: str, hits: int = 10, ranking: str = None, model_restrict: str = None,
query_features: Dict[str, Any] = None, timeout: float = None, **kwargs) -> QueryResult:
diff --git a/tests/core/index_management/test_index_management.py b/tests/core/index_management/test_index_management.py
index 1d3119689..87d550028 100644
--- a/tests/core/index_management/test_index_management.py
+++ b/tests/core/index_management/test_index_management.py
@@ -42,7 +42,7 @@ def setUp(self):
zookeeper_client=self.zookeeper_client,
enable_index_operations=True,
deployment_timeout_seconds=30,
- convergence_timeout_seconds=60)
+ convergence_timeout_seconds=120)
# this resets the application package to a clean state
self._test_dir = str(os.path.dirname(os.path.abspath(__file__)))
self._deploy_initial_app_package()
@@ -243,6 +243,36 @@ def test_rollback_should_succeed(self):
os.path.join(latest_version, *file)
)
+ @patch('marqo.vespa.vespa_client.VespaClient.check_for_application_convergence')
+ def test_bootstrap_and_rollback_should_skip_convergence_check(self, mock_check_convergence):
+ self.index_management.bootstrap_vespa()
+ mock_check_convergence.assert_not_called()
+
+ mock_check_convergence.reset_mock()
+
+ try:
+ self.index_management.rollback_vespa()
+ except ApplicationRollbackError:
+ pass
+ mock_check_convergence.assert_not_called()
+
+ @patch('marqo.vespa.vespa_client.VespaClient.check_for_application_convergence')
+ @patch('marqo.vespa.vespa_client.VespaClient.get_vespa_version')
+ def test_bootstrap_and_rollback_should_not_skip_convergence_check_for_older_vespa_version(self, mock_vespa_version,
+ mock_check_convergence):
+ mock_vespa_version.return_value = '8.382.21'
+
+ self.index_management.bootstrap_vespa()
+ mock_check_convergence.assert_called_once()
+
+ mock_check_convergence.reset_mock()
+
+ try:
+ self.index_management.rollback_vespa()
+ except ApplicationRollbackError:
+ pass
+ mock_check_convergence.assert_called_once()
+
def test_rollback_should_fail_when_target_version_is_current_version(self):
self.index_management.bootstrap_vespa()
with self.assertRaisesStrict(ApplicationRollbackError) as e:
@@ -304,45 +334,45 @@ def test_rollback_should_fail_when_admin_config_is_changed(self):
self.assertEqual("Aborting rollback. Reason: Vector store config has been changed since the last backup.",
str(e.exception))
- def test_index_operation_methods_should_raise_error_if_index_operation_is_disabled(self):
- # Create an index management instance with index operation disabled (by default)
- self.index_management = IndexManagement(self.vespa_client, zookeeper_client=None)
+ def _index_operations(self, index_management: IndexManagement):
index_request_1 = self.structured_marqo_index_request(
fields=[FieldRequest(name='title', type=FieldType.Text)],
tensor_fields=['title']
)
index_request_2 = self.unstructured_marqo_index_request()
- with self.assertRaisesStrict(InternalError):
- self.index_management.create_index(index_request_1)
-
- with self.assertRaisesStrict(InternalError):
- self.index_management.batch_create_indexes([index_request_1, index_request_2])
+ return [
+ ('create single index', lambda: index_management.create_index(index_request_1)),
+ ('batch create indexes', lambda: index_management.batch_create_indexes([index_request_1, index_request_2])),
+ ('delete single index', lambda: index_management.delete_index_by_name(index_request_1.name)),
+ ('batch delete indexes', lambda: index_management.batch_delete_indexes_by_name([index_request_1.name, index_request_2.name])),
+ ]
- with self.assertRaisesStrict(InternalError):
- self.index_management.delete_index_by_name(index_request_1.name)
+ def test_index_operation_methods_should_raise_error_if_index_operation_is_disabled(self):
+ index_management_without_zookeeper = IndexManagement(self.vespa_client, zookeeper_client=None)
- with self.assertRaisesStrict(InternalError):
- self.index_management.batch_delete_indexes_by_name([index_request_1.name, index_request_2.name])
+ for test_case, index_operation in self._index_operations(index_management_without_zookeeper):
+ with self.subTest(test_case):
+ with self.assertRaisesStrict(InternalError):
+ index_operation()
def test_index_operation_methods_should_raise_error_if_marqo_is_not_bootstrapped(self):
- index_request_1 = self.structured_marqo_index_request(
- fields=[FieldRequest(name='title', type=FieldType.Text)],
- tensor_fields=['title']
- )
- index_request_2 = self.unstructured_marqo_index_request()
-
- with self.assertRaisesStrict(ApplicationNotInitializedError):
- self.index_management.create_index(index_request_1)
-
- with self.assertRaisesStrict(ApplicationNotInitializedError):
- self.index_management.batch_create_indexes([index_request_1, index_request_2])
-
- with self.assertRaisesStrict(ApplicationNotInitializedError):
- self.index_management.delete_index_by_name(index_request_1.name)
-
- with self.assertRaisesStrict(ApplicationNotInitializedError):
- self.index_management.batch_delete_indexes_by_name([index_request_1.name, index_request_2.name])
+ for test_case, index_operation in self._index_operations(self.index_management):
+ with self.subTest(test_case):
+ with self.assertRaisesStrict(ApplicationNotInitializedError):
+ index_operation()
+
+ @patch('marqo.vespa.vespa_client.VespaClient.check_for_application_convergence')
+ def test_index_operation_methods_should_check_convergence(self, mock_check_convergence):
+ for test_case, index_operation in self._index_operations(self.index_management):
+ with self.subTest(test_case):
+ try:
+ index_operation()
+ except ApplicationNotInitializedError:
+ pass
+
+ mock_check_convergence.assert_called_once()
+ mock_check_convergence.reset_mock()
def test_create_and_delete_index_should_succeed(self):
# merge batch create and delete happy path to save some testing time
From feefb4980f9e7b2326f960b468c36d591c7f8659 Mon Sep 17 00:00:00 2001
From: Yihan Zhao
Date: Wed, 20 Nov 2024 13:14:41 +1100
Subject: [PATCH 4/7] Run tests using PR merge ref and run all necessary tests
on release branches (#1045)
---
.github/workflows/arm64_docker_marqo.yml | 1 +
.github/workflows/cpu_docker_marqo.yml | 1 +
.github/workflows/cpu_local_marqo.yml | 5 +++++
.github/workflows/cuda_docker_marqo.yml | 1 +
.github/workflows/largemodel_unit_test_CI.yml | 5 +++++
.github/workflows/unit_test_200gb_CI.yml | 3 +++
6 files changed, 16 insertions(+)
diff --git a/.github/workflows/arm64_docker_marqo.yml b/.github/workflows/arm64_docker_marqo.yml
index 25118af1b..83651dec9 100644
--- a/.github/workflows/arm64_docker_marqo.yml
+++ b/.github/workflows/arm64_docker_marqo.yml
@@ -29,6 +29,7 @@ on:
push:
branches:
- mainline
+ - releases/*
paths-ignore:
- '**.md'
diff --git a/.github/workflows/cpu_docker_marqo.yml b/.github/workflows/cpu_docker_marqo.yml
index b1c7893e2..4e0d7a279 100644
--- a/.github/workflows/cpu_docker_marqo.yml
+++ b/.github/workflows/cpu_docker_marqo.yml
@@ -30,6 +30,7 @@ on:
push:
branches:
- mainline
+ - releases/*
paths-ignore:
- '**.md'
diff --git a/.github/workflows/cpu_local_marqo.yml b/.github/workflows/cpu_local_marqo.yml
index 81543fe74..a281c2993 100644
--- a/.github/workflows/cpu_local_marqo.yml
+++ b/.github/workflows/cpu_local_marqo.yml
@@ -30,11 +30,13 @@ on:
push:
branches:
- mainline
+ - releases/*
paths-ignore:
- '**.md'
pull_request_target:
branches:
- mainline
+ - releases/*
paths-ignore:
- '**.md'
@@ -92,6 +94,9 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 0
+ # if triggered by a pull_request_target event, we should use the merge ref of the PR
+ # if triggered by a push event, github.ref points to the head of the source branch
+ ref: ${{ github.event_name == 'pull_request_target' && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.ref }}
- name: Set up Python 3.8
uses: actions/setup-python@v3
diff --git a/.github/workflows/cuda_docker_marqo.yml b/.github/workflows/cuda_docker_marqo.yml
index fd05e4319..32f72d5fd 100644
--- a/.github/workflows/cuda_docker_marqo.yml
+++ b/.github/workflows/cuda_docker_marqo.yml
@@ -30,6 +30,7 @@ on:
push:
branches:
- mainline
+ - releases/*
paths-ignore:
- '**.md'
diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml
index 491b6f00c..9373f4bf3 100644
--- a/.github/workflows/largemodel_unit_test_CI.yml
+++ b/.github/workflows/largemodel_unit_test_CI.yml
@@ -7,11 +7,13 @@ on:
push:
branches:
- mainline
+ - releases/*
paths-ignore:
- '**.md'
pull_request_target:
branches:
- mainline
+ - releases/*
paths-ignore:
- '**.md'
@@ -67,6 +69,9 @@ jobs:
with:
fetch-depth: 0
path: marqo
+ # if triggered by a pull_request_target event, we should use the merge ref of the PR
+ # if triggered by a push event, github.ref points to the head of the source branch
+ ref: ${{ github.event_name == 'pull_request_target' && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.ref }}
- name: Set up Python 3.8
uses: actions/setup-python@v3
diff --git a/.github/workflows/unit_test_200gb_CI.yml b/.github/workflows/unit_test_200gb_CI.yml
index 28ee50cfc..c0d9b8b39 100644
--- a/.github/workflows/unit_test_200gb_CI.yml
+++ b/.github/workflows/unit_test_200gb_CI.yml
@@ -65,6 +65,9 @@ jobs:
with:
fetch-depth: 0
path: marqo
+ # if triggered by a pull_request_target event, we should use the merge ref of the PR
+ # if triggered by a push event, github.ref points to the head of the source branch
+ ref: ${{ github.event_name == 'pull_request_target' && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.ref }}
- name: Set up Python 3.8
uses: actions/setup-python@v3
From 55de6c47f68b945d0b4c8ec294c71b6183fc1b75 Mon Sep 17 00:00:00 2001
From: Yihan Zhao
Date: Wed, 20 Nov 2024 16:01:29 +1100
Subject: [PATCH 5/7] Fix the index management tests (#1044)
---
.../index_management/test_index_management.py | 62 ++++++++-----------
1 file changed, 27 insertions(+), 35 deletions(-)
diff --git a/tests/core/index_management/test_index_management.py b/tests/core/index_management/test_index_management.py
index 87d550028..9ab381b0b 100644
--- a/tests/core/index_management/test_index_management.py
+++ b/tests/core/index_management/test_index_management.py
@@ -100,9 +100,10 @@ def test_bootstrap_vespa_should_successfully_bootstrap_a_new_vespa_application_p
self.assertEqual(self.index_management.get_marqo_version(), version.get_version())
+ @patch('marqo.vespa.vespa_client.VespaClient.check_for_application_convergence')
@patch('marqo.vespa.vespa_client.VespaClient.get_vespa_version')
def test_bootstrap_vespa_should_skip_bootstrapping_if_already_bootstrapped_for_older_vespa_version(
- self, mock_vespa_version):
+ self, mock_vespa_version, mock_check_convergence):
mock_vespa_version.return_value = '8.382.21'
def modified_post(*args, **kwargs):
@@ -111,16 +112,31 @@ def modified_post(*args, **kwargs):
# verify the first boostrap call deploys the app to vespa
with mock.patch.object(httpx.Client, 'post', side_effect=modified_post) as mock_post:
self.assertTrue(self.index_management.bootstrap_vespa())
- self.assertEqual(mock_post.call_count, 2)
- self.assertTrue('prepareandactivate' in mock_post.call_args_list[1].args[0])
+ self.assertEqual(mock_post.call_count, 3)
+ # First call creates a session to download the app for app version check
+ self.assertTrue('session?from=' in mock_post.call_args_list[0].args[0])
+ # Second call creates a session to download the app to do bootstrapping
+ self.assertTrue('session?from=' in mock_post.call_args_list[1].args[0])
+ # Third call deploys the app by uploading the zip file
+ self.assertTrue('prepareandactivate' in mock_post.call_args_list[2].args[0])
+
+ # The first bootstrapping will deploy a new Vespa app, so it will check convergence
+ mock_check_convergence.assert_called_once()
+
+ mock_check_convergence.reset_mock()
# verify the second boostrap call skips the deployment
with mock.patch.object(httpx.Client, 'post', side_effect=modified_post) as mock_post:
self.assertFalse(self.index_management.bootstrap_vespa())
self.assertEqual(mock_post.call_count, 1)
- self.assertFalse('prepareandactivate' in mock_post.call_args_list[0].args[0])
+ # First call creates a session to download the app for app version check
+ self.assertTrue('session?from=' in mock_post.call_args_list[0].args[0])
+
+ # The second bootstrapping only need to check version, so it will skip convergence check
+ mock_check_convergence.assert_not_called()
- def test_bootstrap_vespa_should_skip_bootstrapping_if_already_bootstrapped(self):
+ @patch('marqo.vespa.vespa_client.VespaClient.check_for_application_convergence')
+ def test_bootstrap_vespa_should_skip_bootstrapping_if_already_bootstrapped(self, mock_check_convergence):
def modified_put(*args, **kwargs):
return httpx.put(*args, **kwargs)
@@ -129,11 +145,17 @@ def modified_put(*args, **kwargs):
self.assertTrue(self.index_management.bootstrap_vespa())
self.assertTrue('prepare' in mock_post.call_args_list[-2].args[0])
self.assertTrue('active' in mock_post.call_args_list[-1].args[0])
+ # The first bootstrapping will deploy a new Vespa app, so it will check convergence
+ mock_check_convergence.assert_called_once()
+
+ mock_check_convergence.reset_mock()
# verify the second boostrap call skips the deployment
with mock.patch.object(httpx.Client, 'put', side_effect=modified_put) as mock_post:
self.assertFalse(self.index_management.bootstrap_vespa())
self.assertEqual(mock_post.call_count, 0)
+ # The second bootstrapping only need to check version, so it will skip convergence check
+ mock_check_convergence.assert_not_called()
def test_boostrap_vespa_should_migrate_index_settings_from_existing_vespa_app(self):
"""
@@ -243,36 +265,6 @@ def test_rollback_should_succeed(self):
os.path.join(latest_version, *file)
)
- @patch('marqo.vespa.vespa_client.VespaClient.check_for_application_convergence')
- def test_bootstrap_and_rollback_should_skip_convergence_check(self, mock_check_convergence):
- self.index_management.bootstrap_vespa()
- mock_check_convergence.assert_not_called()
-
- mock_check_convergence.reset_mock()
-
- try:
- self.index_management.rollback_vespa()
- except ApplicationRollbackError:
- pass
- mock_check_convergence.assert_not_called()
-
- @patch('marqo.vespa.vespa_client.VespaClient.check_for_application_convergence')
- @patch('marqo.vespa.vespa_client.VespaClient.get_vespa_version')
- def test_bootstrap_and_rollback_should_not_skip_convergence_check_for_older_vespa_version(self, mock_vespa_version,
- mock_check_convergence):
- mock_vespa_version.return_value = '8.382.21'
-
- self.index_management.bootstrap_vespa()
- mock_check_convergence.assert_called_once()
-
- mock_check_convergence.reset_mock()
-
- try:
- self.index_management.rollback_vespa()
- except ApplicationRollbackError:
- pass
- mock_check_convergence.assert_called_once()
-
def test_rollback_should_fail_when_target_version_is_current_version(self):
self.index_management.bootstrap_vespa()
with self.assertRaisesStrict(ApplicationRollbackError) as e:
From 355d3dbb6aa0b91c7a3f6c94305e60a7f7e8de1c Mon Sep 17 00:00:00 2001
From: Yihan Zhao
Date: Fri, 22 Nov 2024 10:28:10 +1100
Subject: [PATCH 6/7] Change back to use pull_request trigger (#1050)
---
.github/workflows/cpu_local_marqo.yml | 7 ++-----
.github/workflows/largemodel_unit_test_CI.yml | 7 ++-----
.github/workflows/unit_test_200gb_CI.yml | 7 ++-----
3 files changed, 6 insertions(+), 15 deletions(-)
diff --git a/.github/workflows/cpu_local_marqo.yml b/.github/workflows/cpu_local_marqo.yml
index a281c2993..66e7139c8 100644
--- a/.github/workflows/cpu_local_marqo.yml
+++ b/.github/workflows/cpu_local_marqo.yml
@@ -33,7 +33,7 @@ on:
- releases/*
paths-ignore:
- '**.md'
- pull_request_target:
+ pull_request:
branches:
- mainline
- releases/*
@@ -44,7 +44,7 @@ permissions:
contents: read
concurrency:
- group: cpu-local-api-tests-${{ github.head_ref || github.ref }}
+ group: cpu-local-api-tests-${{ github.ref }}
cancel-in-progress: true
jobs:
@@ -94,9 +94,6 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 0
- # if triggered by a pull_request_target event, we should use the merge ref of the PR
- # if triggered by a push event, github.ref points to the head of the source branch
- ref: ${{ github.event_name == 'pull_request_target' && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.ref }}
- name: Set up Python 3.8
uses: actions/setup-python@v3
diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml
index 9373f4bf3..643c59fb5 100644
--- a/.github/workflows/largemodel_unit_test_CI.yml
+++ b/.github/workflows/largemodel_unit_test_CI.yml
@@ -10,7 +10,7 @@ on:
- releases/*
paths-ignore:
- '**.md'
- pull_request_target:
+ pull_request:
branches:
- mainline
- releases/*
@@ -18,7 +18,7 @@ on:
- '**.md'
concurrency:
- group: large-model-unit-tests-${{ github.head_ref || github.ref }}
+ group: large-model-unit-tests-${{ github.ref }}
cancel-in-progress: true
permissions:
@@ -69,9 +69,6 @@ jobs:
with:
fetch-depth: 0
path: marqo
- # if triggered by a pull_request_target event, we should use the merge ref of the PR
- # if triggered by a push event, github.ref points to the head of the source branch
- ref: ${{ github.event_name == 'pull_request_target' && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.ref }}
- name: Set up Python 3.8
uses: actions/setup-python@v3
diff --git a/.github/workflows/unit_test_200gb_CI.yml b/.github/workflows/unit_test_200gb_CI.yml
index c0d9b8b39..ae3635609 100644
--- a/.github/workflows/unit_test_200gb_CI.yml
+++ b/.github/workflows/unit_test_200gb_CI.yml
@@ -8,13 +8,13 @@ on:
branches:
- mainline
- releases/*
- pull_request_target:
+ pull_request:
branches:
- mainline
- releases/*
concurrency:
- group: unit-tests-${{ github.head_ref || github.ref }}
+ group: unit-tests-${{ github.ref }}
cancel-in-progress: true
permissions:
@@ -65,9 +65,6 @@ jobs:
with:
fetch-depth: 0
path: marqo
- # if triggered by a pull_request_target event, we should use the merge ref of the PR
- # if triggered by a push event, github.ref points to the head of the source branch
- ref: ${{ github.event_name == 'pull_request_target' && format('refs/pull/{0}/merge', github.event.pull_request.number) || github.ref }}
- name: Set up Python 3.8
uses: actions/setup-python@v3
From b5bba13700eb2e4f0a2e063a579da1572691ee3c Mon Sep 17 00:00:00 2001
From: Yihan Zhao
Date: Fri, 22 Nov 2024 12:25:26 +1100
Subject: [PATCH 7/7] Yihan/release notes 213 (#1051)
---
RELEASE.md | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/RELEASE.md b/RELEASE.md
index c37f8ad8f..246bb1715 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,19 @@
+# Release 2.13.2
+
+## Bug fixes and minor changes
+
+- Fix a bug where adding documents with numeric lists to an unstructured index results in a 500 error. Now, Marqo successfully processes the document batch, and returns a 400 error only for individual documents that contain numeric lists([1034](https://github.com/marqo-ai/marqo/pull/1034)).
+- Fix validation of custom vector fields. Custom vector fields were silently ignored when not specified as tensor fields for an unstructured index. This will now trigger a 400 error. This helps guide users to properly define the field as a tensor field([1034](https://github.com/marqo-ai/marqo/pull/1034)).
+- Improve the bootstrapping process to prevent Marqo from crashing during startup when the vector store takes longer to converge, especially with multiple indexes. This ensures a smoother startup process even if the vector store takes time to fully initialize([1036](https://github.com/marqo-ai/marqo/pull/1036)).
+
+# Release 2.13.1
+
+## Bug fixes and minor changes
+
+- Fix a bug where Marqo returns a 500 error if an inaccessible private image is encountered in the query or embed endpoint. Marqo now correctly returns a 400 error with a helpful error message ([1027](https://github.com/marqo-ai/marqo/pull/1027)).
+- Fix a bug preventing Marqo from warming up Languagebind models. Marqo now successfully warms up Languagebind models as expected ([1031](https://github.com/marqo-ai/marqo/pull/1031)).
+- Fix a bug where Languagebind models always generate normalized embeddings for non-text content. These models now correctly produce unnormalized embeddings for video, audio, and image content ([1032](https://github.com/marqo-ai/marqo/pull/1032)).
+
# Release 2.13.0
## New features