Skip to content

Commit

Permalink
PCBC-987: Fix consistency vector encoding for FTS
Browse files Browse the repository at this point in the history
  • Loading branch information
avsej committed Apr 22, 2024
1 parent 19703d7 commit ba770f5
Show file tree
Hide file tree
Showing 7 changed files with 141 additions and 39 deletions.
4 changes: 2 additions & 2 deletions Couchbase/MutationState.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ public function export(): array
foreach ($this->tokens as $token) {
$state[] = [
"partitionId" => $token->partitionId(),
"partitionUuid" => hexdec($token->partitionUuid()),
"sequenceNumber" => hexdec($token->sequenceNumber()),
"partitionUuid" => $token->partitionUuid(),
"sequenceNumber" => $token->sequenceNumber(),
"bucketName" => $token->bucketName(),
];
}
Expand Down
15 changes: 3 additions & 12 deletions Couchbase/SearchOptions.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class SearchOptions implements JsonSerializable
private ?int $skip = null;
private ?bool $explain = null;
private ?bool $disableScoring = null;
private ?array $consistentWith = null;
private ?MutationState $consistentWith = null;
private ?array $fields = null;
private ?array $facets = null;
private ?array $sort = null;
Expand Down Expand Up @@ -132,16 +132,7 @@ public function disableScoring(bool $disabled): SearchOptions
*/
public function consistentWith(string $index, MutationState $state): SearchOptions
{
$vectors = [];
foreach ($state->tokens() as $token) {
$vectors[] = [
'partitionId' => $token->partitionId(),
'partitionUuid' => $token->partitionUuid(),
'sequenceNumber' => $token->sequenceNumber(),
'bucketName' => $token->bucketName(),
];
}
$this->consistentWith = $vectors;
$this->consistentWith = $state;
return $this;
}

Expand Down Expand Up @@ -322,7 +313,7 @@ public static function export(?SearchOptions $options): array
'disableScoring' => $options->disableScoring,
'fields' => $options->fields,
'sortSpecs' => $sort,
'consistentWith' => $options->consistentWith,
'consistentWith' => $options->consistentWith == null ? null : $options->consistentWith->export(),
'facets' => $options->facets,
'highlightStyle' => $highlightStyle,
'highlightFields' => $highlightFields,
Expand Down
8 changes: 4 additions & 4 deletions src/wrapper/conversion_utilities.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -732,16 +732,16 @@ zval_to_common_search_request(const zend_string* index_name, const zend_string*
std::uint64_t sequence_number;
std::uint16_t partition_id;
std::string bucket_name;
if (auto e = cb_assign_integer(partition_id, options, "partitionId"); e.ec) {
if (auto e = cb_assign_integer(partition_id, item, "partitionId"); e.ec) {
return { {}, e };
}
if (auto e = cb_assign_integer(partition_uuid, options, "partitionUuid"); e.ec) {
if (auto e = cb_assign_integer(partition_uuid, item, "partitionUuid"); e.ec) {
return { {}, e };
}
if (auto e = cb_assign_integer(sequence_number, options, "sequenceNumber"); e.ec) {
if (auto e = cb_assign_integer(sequence_number, item, "sequenceNumber"); e.ec) {
return { {}, e };
}
if (auto e = cb_assign_string(bucket_name, options, "bucketName"); e.ec) {
if (auto e = cb_assign_string(bucket_name, item, "bucketName"); e.ec) {
return { {}, e };
}
vectors.emplace_back(mutation_token{ partition_uuid, sequence_number, partition_id, bucket_name });
Expand Down
41 changes: 41 additions & 0 deletions src/wrapper/conversion_utilities.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <chrono>

#include <fmt/format.h>
#include <type_traits>

namespace couchbase::transactions
{
Expand Down Expand Up @@ -67,6 +68,44 @@ query_response_to_zval(zval* return_value, const core::operations::query_respons
void
search_query_response_to_zval(zval* return_value, const core::operations::search_response& resp);

template <typename Integer>
static Integer
parse_integer(const std::string& str, std::size_t* pos = 0, int base = 10)
{
if constexpr (std::is_signed_v<Integer>) {
return std::stoll(str, pos, base);
} else {
return std::stoull(str, pos, base);
}
}

template<typename Integer>
static std::pair<core_error_info, std::optional<Integer>>
cb_get_integer_from_hex(const zend_string* value, std::string_view name)
{
auto hex_string = cb_string_new(value);

if(hex_string.empty()) {
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("unexpected empty string for {}", name) }, {} };
}

try {
std::size_t pos;
auto result = parse_integer<Integer>(hex_string, &pos, 16);
if (result < std::numeric_limits<Integer>::min() || result > std::numeric_limits<Integer>::max()) {
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("number out of range for {}", name) }, {} };
}
if (pos != hex_string.length()) {
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("trailing garbage in {}", name) }, {} };
}
return {{}, result};
} catch (const std::invalid_argument& e) {
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("invalid hex number for {}", name) }, {} };
} catch (const std::out_of_range& e) {
return { { errc::common::invalid_argument, ERROR_LOCATION, fmt::format("number out of range for {}", name) }, {} };
}
}

template<typename Integer>
static std::pair<core_error_info, std::optional<Integer>>
cb_get_integer(const zval* options, std::string_view name)
Expand All @@ -87,6 +126,8 @@ cb_get_integer(const zval* options, std::string_view name)
return {};
case IS_LONG:
break;
case IS_STRING:
return cb_get_integer_from_hex<Integer>(Z_STR_P(value), name);
default:
return {
{ errc::common::invalid_argument, ERROR_LOCATION, fmt::format("expected {} to be a integer value in the options", name) },
Expand Down
1 change: 1 addition & 0 deletions tests/KeyValueScanTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public function setUp(): void
{
parent::setUp();
$this->skipIfProtostellar();
$this->skipIfUnsupported($this->version()->supportsCollections());

$this->collection = $this->defaultCollection();
for ($i = 0; $i < 100; $i++) {
Expand Down
70 changes: 49 additions & 21 deletions tests/SearchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@

use Couchbase\BooleanSearchQuery;
use Couchbase\ClusterInterface;
use Couchbase\CollectionInterface;
use Couchbase\ConjunctionSearchQuery;
use Couchbase\DateRangeSearchFacet;
use Couchbase\DateRangeSearchQuery;
use Couchbase\DisjunctionSearchQuery;
use Couchbase\DocIdSearchQuery;
use Couchbase\DurabilityLevel;
use Couchbase\Exception\FeatureNotAvailableException;
use Couchbase\Exception\IndexNotFoundException;
use Couchbase\GeoBoundingBoxSearchQuery;
Expand Down Expand Up @@ -52,6 +54,7 @@
use Couchbase\TermRangeSearchQuery;
use Couchbase\TermSearchFacet;
use Couchbase\TermSearchQuery;
use Couchbase\UpsertOptions;
use Couchbase\VectorQuery;
use Couchbase\VectorQueryCombination;
use Couchbase\VectorSearch;
Expand All @@ -63,34 +66,59 @@
class SearchTest extends Helpers\CouchbaseTestCase
{
private ClusterInterface $cluster;
private CollectionInterface $collection;
private SearchIndexManager $indexManager;

/**
* @return number of the documents in dataset
*/
public function loadDataset(): int
{
$dataset = json_decode(file_get_contents(__DIR__ . "/beer-data.json"), true);

$options = UpsertOptions::build()->durabilityLevel(DurabilityLevel::MAJORITY_AND_PERSIST_TO_ACTIVE);
foreach ($dataset as $id => $document) {
$this->collection->upsert($id, $document, $options);
}

return count($dataset);
}

public function createSearchIndex(int $datasetSize): void
{
fprintf(STDERR, "Create 'beer-search' to index %d docs\n", $datasetSize);
$indexDump = json_decode(file_get_contents(__DIR__ . "/beer-search.json"), true);
$index = SearchIndex::build("beer-search", self::env()->bucketName());
$index->setParams($indexDump["params"]);
$this->indexManager->upsertIndex($index);

$start = time();
while (true) {
try {
$indexedDocuments = $this->indexManager->getIndexedDocumentsCount("beer-search");
fprintf(STDERR, "%ds, Indexing 'beer-search': %d docs\n", time() - $start, $indexedDocuments);
if ($indexedDocuments >= $datasetSize) {
break;
}
sleep(5);
} catch (\Couchbase\Exception\IndexNotReadyException $ex) {
}
}
}

public function setUp(): void
{
parent::setUp();

$this->cluster = $this->connectCluster();
$this->collection = $this->openBucket(self::env()->bucketName())->defaultCollection();

if (self::env()->useCouchbase()) {
$this->indexManager = $this->cluster->searchIndexes();
try {
$this->indexManager->getIndex("beer-search");
} catch (IndexNotFoundException $ex) {
$indexDump = json_decode(file_get_contents(__DIR__ . "/beer-search.json"), true);
$index = SearchIndex::build("beer-search", "beer-sample");
$index->setParams($indexDump["params"]);
$this->indexManager->upsertIndex($index);
}
while (true) {
try {
$indexedDocuments = $this->indexManager->getIndexedDocumentsCount("beer-search");
fprintf(STDERR, "Indexing 'beer-search': %d docs\n", $indexedDocuments);
if ($indexedDocuments > 7000) {
break;
}
sleep(3);
} catch (\Couchbase\Exception\IndexNotReadyException $ex) {
}
$this->createSearchIndex($this->loadDataset());
}
}
}
Expand Down Expand Up @@ -159,6 +187,7 @@ public function testSearchWithNoHits()
$this->assertEquals(0, $result->metaData()->totalHits());
}


public function testSearchWithConsistency()
{
$this->skipIfCaves();
Expand All @@ -173,8 +202,7 @@ public function testSearchWithConsistency()
$this->assertEmpty($result->rows());
$this->assertEquals(0, $result->metaData()->totalHits());

$collection = $this->cluster->bucket('beer-sample')->defaultCollection();
$result = $collection->upsert($id, ["type" => "beer", "name" => $id]);
$result = $this->collection->upsert($id, ["type" => "beer", "name" => $id]);
$mutationState = new MutationState();
$mutationState->add($result);

Expand Down Expand Up @@ -358,7 +386,7 @@ public function testCompoundSearchQueries()
$disjunctionQuery = new DisjunctionSearchQuery([$nameQuery, $descriptionQuery]);
$options = SearchOptions::build()->fields(["type", "name", "description"]);
$result = $this->cluster->searchQuery("beer-search", $disjunctionQuery, $options);
$this->assertGreaterThan(1000, $result->metaData()->totalHits());
$this->assertGreaterThan(20, $result->metaData()->totalHits());
$this->assertNotEmpty($result->rows());
$this->assertMatchesRegularExpression('/green/i', $result->rows()[0]['fields']['name']);
$this->assertDoesNotMatchRegularExpression('/hop/i', $result->rows()[0]['fields']['name']);
Expand Down Expand Up @@ -434,18 +462,18 @@ public function testSearchWithFacets()
$this->assertNotNull($result->facets()['foo']);
$this->assertEquals('name', $result->facets()['foo']->field());
$this->assertEquals('ale', $result->facets()['foo']->terms()[0]->term());
$this->assertGreaterThan(1000, $result->facets()['foo']->terms()[0]->count());
$this->assertGreaterThan(10, $result->facets()['foo']->terms()[0]->count());

$this->assertNotNull($result->facets()['bar']);
$this->assertEquals('updated', $result->facets()['bar']->field());
$this->assertEquals('old', $result->facets()['bar']->dateRanges()[0]->name());
$this->assertGreaterThan(5000, $result->facets()['bar']->dateRanges()[0]->count());
$this->assertGreaterThan(30, $result->facets()['bar']->dateRanges()[0]->count());

$this->assertNotNull($result->facets()['baz']);
$this->assertEquals('abv', $result->facets()['baz']->field());
$this->assertEquals('light', $result->facets()['baz']->numericRanges()[0]->name());
$this->assertGreaterThan(0, $result->facets()['baz']->numericRanges()[0]->max());
$this->assertGreaterThan(100, $result->facets()['baz']->numericRanges()[0]->count());
$this->assertGreaterThan(15, $result->facets()['baz']->numericRanges()[0]->count());
}

public function testNullInNumericRangeFacet()
Expand Down
Loading

0 comments on commit ba770f5

Please sign in to comment.