From ea59db7636a6120a770d1006c424eb89a68601a8 Mon Sep 17 00:00:00 2001 From: Mateusz Date: Wed, 29 May 2024 13:25:47 +0100 Subject: [PATCH 1/2] Add base64 vector search --- Couchbase/VectorQuery.php | 36 ++++++++++++++++++++--------- tests/Helpers/CouchbaseTestCase.php | 3 ++- tests/SearchTest.php | 17 ++++++++++++++ 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/Couchbase/VectorQuery.php b/Couchbase/VectorQuery.php index 574ba578..3dc1cb69 100644 --- a/Couchbase/VectorQuery.php +++ b/Couchbase/VectorQuery.php @@ -25,13 +25,15 @@ class VectorQuery { private string $vectorFieldName; - private array $vectorQuery; private int $numCandidates; + private ?array $vectorQuery = null; + private ?string $base64VectorQuery = null; private ?float $boost = null; /** * @param string $vectorFieldName the document field that contains the vector - * @param array $vectorQuery the vector query to run. Cannot be empty. + * @param array|string $vectorQuery the vector query to run. Cannot be empty. Either a vector array, + * or the vector query encoded into a base64 string. * * @since 4.1.7 * @@ -39,13 +41,19 @@ class VectorQuery * * @UNCOMMITTED: This API may change in the future. */ - public function __construct(string $vectorFieldName, array $vectorQuery) + public function __construct(string $vectorFieldName, array|string $vectorQuery) { if (empty($vectorQuery)) { throw new InvalidArgumentException("The vectorQuery cannot be empty"); } + + if (is_array($vectorQuery)) { + $this->vectorQuery = $vectorQuery; + } else { + $this->base64VectorQuery = $vectorQuery; + } + $this->vectorFieldName = $vectorFieldName; - $this->vectorQuery = $vectorQuery; $this->numCandidates = 3; } @@ -53,7 +61,8 @@ public function __construct(string $vectorFieldName, array $vectorQuery) * Static helper to keep code more readable * * @param string $vectorFieldName the document field that contains the vector - * @param array $vectorQuery the vector query to run. Cannot be empty. + * @param array|string $vectorQuery the vector query to run. Cannot be empty. Either a vector array, + * or the vector query encoded into a base64 string. * * @since 4.1.7 * @return VectorQuery @@ -62,7 +71,7 @@ public function __construct(string $vectorFieldName, array $vectorQuery) * * @UNCOMMITTED: This API may change in the future. */ - static function build(string $vectorFieldName, array $vectorQuery): VectorQuery + static function build(string $vectorFieldName, array|string $vectorQuery): VectorQuery { return new VectorQuery($vectorFieldName, $vectorQuery); } @@ -70,7 +79,7 @@ static function build(string $vectorFieldName, array $vectorQuery): VectorQuery /** * Sets the number of results that will be returned from this vector query. Defaults to 3. * - * @param int|null $numCandidates the number of results returned. + * @param int $numCandidates the number of results returned. * * @since 4.1.7 * @return VectorQuery @@ -122,11 +131,16 @@ public static function export(VectorQuery $query): array $json['boost'] = $query->boost; } - $vectorQueries = []; - foreach ($query->vectorQuery as $value) { - $vectorQueries[] = $value; + if ($query->vectorQuery != null) { + $vectorQueries = []; + foreach ($query->vectorQuery as $value) { + $vectorQueries[] = $value; + } + $json['vector'] = $vectorQueries; + } else { + $json['vector_base64'] = $query->base64VectorQuery; } - $json['vector'] = $vectorQueries; + $json['k'] = $query->numCandidates; return $json; } diff --git a/tests/Helpers/CouchbaseTestCase.php b/tests/Helpers/CouchbaseTestCase.php index 80a019c4..e4ce5411 100644 --- a/tests/Helpers/CouchbaseTestCase.php +++ b/tests/Helpers/CouchbaseTestCase.php @@ -306,7 +306,8 @@ protected function assertErrorCode($code, $ex) ); } - protected function fixCavesTimeResolutionOnWindows() { + protected function fixCavesTimeResolutionOnWindows() + { if (PHP_OS_FAMILY === 'Windows' && self::env()->useCaves()) { usleep(1); } diff --git a/tests/SearchTest.php b/tests/SearchTest.php index 60467aa3..540d526e 100644 --- a/tests/SearchTest.php +++ b/tests/SearchTest.php @@ -597,6 +597,23 @@ public function testVectorSearchEncoding() $this->assertEquals('{"match_none":"null"}', $encodedSearchQuery); } + public function testVectorSearchEmptyStringThrowsInvalidArgument() + { + $this->expectException(\Couchbase\Exception\InvalidArgumentException::class); + VectorQuery::build("vectorField", ""); + } + + public function testVectorSearchEncodingWithBase64() + { + $base64EncodedVector = base64_encode('[0.32, -0.536, 0.842]'); + $vectorQueryOne = VectorQuery::build("foo", $base64EncodedVector)->boost(0.5)->numCandidates(4); + $vectorQueryTwo = VectorQuery::build("bar", [-0.00810353, 0.6433, 0.52364]); + $searchRequest = SearchRequest::export(SearchRequest::build(VectorSearch::build([$vectorQueryOne, $vectorQueryTwo]))); + $encodedVectorQuery = json_encode($searchRequest['vectorSearch']); + $this->assertEquals(JSON_ERROR_NONE, json_last_error()); + $this->assertEquals("[{\"field\":\"foo\",\"boost\":0.5,\"vector_base64\":\"{$base64EncodedVector}\",\"k\":4},{\"field\":\"bar\",\"vector\":[-0.00810353,0.6433,0.52364],\"k\":3}]", $encodedVectorQuery); + } + public function testScopeSearch() { $this->skipIfCaves(); From 609d3bf668c125b460d9740bc601d619b112c8af Mon Sep 17 00:00:00 2001 From: Mateusz Date: Mon, 3 Jun 2024 17:54:02 +0100 Subject: [PATCH 2/2] fix test --- tests/SearchTest.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/SearchTest.php b/tests/SearchTest.php index 540d526e..cc4ddf5f 100644 --- a/tests/SearchTest.php +++ b/tests/SearchTest.php @@ -605,13 +605,13 @@ public function testVectorSearchEmptyStringThrowsInvalidArgument() public function testVectorSearchEncodingWithBase64() { - $base64EncodedVector = base64_encode('[0.32, -0.536, 0.842]'); + $base64EncodedVector = "aOeYBEXJ4kI="; $vectorQueryOne = VectorQuery::build("foo", $base64EncodedVector)->boost(0.5)->numCandidates(4); $vectorQueryTwo = VectorQuery::build("bar", [-0.00810353, 0.6433, 0.52364]); $searchRequest = SearchRequest::export(SearchRequest::build(VectorSearch::build([$vectorQueryOne, $vectorQueryTwo]))); $encodedVectorQuery = json_encode($searchRequest['vectorSearch']); $this->assertEquals(JSON_ERROR_NONE, json_last_error()); - $this->assertEquals("[{\"field\":\"foo\",\"boost\":0.5,\"vector_base64\":\"{$base64EncodedVector}\",\"k\":4},{\"field\":\"bar\",\"vector\":[-0.00810353,0.6433,0.52364],\"k\":3}]", $encodedVectorQuery); + $this->assertEquals(sprintf('[{"field":"foo","boost":0.5,"vector_base64":"%s","k":4},{"field":"bar","vector":[-0.00810353,0.6433,0.52364],"k":3}]', $base64EncodedVector), $encodedVectorQuery); } public function testScopeSearch()