From 6301f5ca35f03415808cf4c614e6471f06b63a46 Mon Sep 17 00:00:00 2001 From: JP Hwang <61258750+databyjp@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:34:34 +0000 Subject: [PATCH] Update hybrid example (#2861) --- _includes/code/howto/search.hybrid.py | 25 +++++++++++++-- .../weaviate/concepts/search/hybrid-search.md | 14 ++++++++ developers/weaviate/search/hybrid.md | 32 +++++++++++++++++-- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/_includes/code/howto/search.hybrid.py b/_includes/code/howto/search.hybrid.py index 85da3a0d0e..cb596e9ee6 100644 --- a/_includes/code/howto/search.hybrid.py +++ b/_includes/code/howto/search.hybrid.py @@ -302,10 +302,10 @@ # End test # ========================================= -# ===== Hybrid with vector similarity ===== +# ===== Hybrid with vector parameters ===== # ========================================= -# START VectorSimilarityPython +# START VectorParametersPython from weaviate.classes.query import HybridVector, Move, HybridFusion jeopardy = client.collections.get("JeopardyQuestion") @@ -321,6 +321,27 @@ alpha=0.75, limit=5, ) +# END VectorParametersPython + +assert len(response.objects) <= 5 +assert len(response.objects) > 0 + +# ========================================= +# ===== Hybrid with vector similarity threshold ===== +# ========================================= + +# START VectorSimilarityPython +from weaviate.classes.query import HybridVector, Move, HybridFusion + +jeopardy = client.collections.get("JeopardyQuestion") +response = jeopardy.query.hybrid( + query="California", + # highlight-start + max_vector_distance=0.4, # Maximum threshold for the vector search component + # highlight-end + alpha=0.75, + limit=5, +) # END VectorSimilarityPython assert len(response.objects) <= 5 diff --git a/developers/weaviate/concepts/search/hybrid-search.md b/developers/weaviate/concepts/search/hybrid-search.md index fa6165463d..4050e6bb60 100644 --- a/developers/weaviate/concepts/search/hybrid-search.md +++ b/developers/weaviate/concepts/search/hybrid-search.md @@ -164,6 +164,20 @@ The alpha value determines the weight of the vector search results in the final - `alpha > 0.5`: More weight to vector search - `alpha < 0.5`: More weight to keyword search +## Search Thresholds + +Hybrid search supports a maximum vector distance threshold through the `max vector distance` parameter. + +This threshold applies only to the vector search component of the hybrid search, allowing you to filter out results that are too dissimilar in vector space, regardless of their keyword search scores. + +For example, consider a maximum vector distance of `0.3`. This means objects with a vector distance higher than `0.3` will be excluded from the hybrid search results, even if they have high keyword search scores. + +This can be useful when you want to ensure semantic similarity meets a minimum standard while still taking advantage of keyword matching. + +There is no equivalent threshold parameter for the keyword (BM25) component of hybrid search or the final combined scores. + +This is because BM25 scores are not normalized or bounded like vector distances, making a universal threshold less meaningful. + ### Further resources - [How-to: Search](../../search/index.md) diff --git a/developers/weaviate/search/hybrid.md b/developers/weaviate/search/hybrid.md index 21dbe0267b..2f512e677a 100644 --- a/developers/weaviate/search/hybrid.md +++ b/developers/weaviate/search/hybrid.md @@ -623,14 +623,14 @@ The output is like this: :::info Added in `v1.25` ::: -You can specify [vector similarity search](/developers/weaviate/search/similarity) parameters similar to [near text](/developers/weaviate/search/similarity.md#search-with-text) or [near vector](/developers/weaviate/search/similarity.md#search-with-a-vector) searches, such as `group by` and `move to` / `move away`. An equvalent `distance` [threshold for vector search](./similarity.md#set-a-similarity-threshold) can be specified with the `max vector distance` parameter. +You can specify [vector similarity search](/developers/weaviate/search/similarity) parameters similar to [near text](/developers/weaviate/search/similarity.md#search-with-text) or [near vector](/developers/weaviate/search/similarity.md#search-with-a-vector) searches, such as `group by` and `move to` / `move away`. An equivalent `distance` [threshold for vector search](./similarity.md#set-a-similarity-threshold) can be specified with the `max vector distance` parameter. @@ -667,6 +667,32 @@ The output is like this: +## Hybrid search thresholds + +:::info Added in `v1.25` +::: + +The only available search threshold is `max vector distance`, which will set the maximum allowable distance for the vector search component. + + + + + + + + +```ts +// TS support coming soon +``` + + + + ## Group results :::info Added in `v1.25`