Skip to content

Commit

Permalink
[4.x] Improve comb search driver performance (#9102)
Browse files Browse the repository at this point in the history
Improve comb search driver performance
  • Loading branch information
jacksleight authored Dec 1, 2023
1 parent 8a60959 commit fbbe89f
Showing 1 changed file with 44 additions and 42 deletions.
86 changes: 44 additions & 42 deletions src/Search/Comb/Comb.php
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,9 @@ private function searchOverData($params, $raw_query)
// loop over records
foreach ($this->haystack as $key => $item) {
$data = $item['pruned'];
$data = array_map(function ($property) {
return $this->flattenArray($property);
}, $data);

// counters
$found = [
Expand All @@ -534,76 +537,75 @@ private function searchOverData($params, $raw_query)
// loop over each query chunk
foreach ($params['chunks'] as $j => $chunk) {
$escaped_chunk = preg_quote($chunk, '#');
$chunk_is_word = ! preg_match('#\s#', $chunk);
$regex = [
'whole' => '#^'.$escaped_chunk.'$#i',
'partial' => '#'.$escaped_chunk.'#i',
'partial_anywhere' => '#'.$escaped_chunk.'#i',
'partial_from_start_anywhere' => '#(^|\s)'.$escaped_chunk.'#i',
'whole_anywhere' => '#(^|\s)'.$escaped_chunk.'($|\s)#i',
'partial_from_start' => '#^'.$escaped_chunk.'#i',
'whole' => '#^'.$escaped_chunk.'$#i',
];

// loop over each data property
foreach ($data as $name => $property) {
$property = $this->flattenArray($property);

if (! is_string($property)) {
continue;
}

$words = preg_split("#\s#i", $property);
preg_match('#^[^\s]+#', $property, $first_word);
$first_word = $first_word[0] ?? '';
$strength = (! isset($this->property_weights[$name])) ? 1 : $this->property_weights[$name];

// reset iterator
$i = 0;

// whole matching
$result = preg_match_all($regex['whole'], $property, $matches);
if ($result) {
$found['whole'] += $strength * $result;
}
$matched = false;

$result = preg_match_all($regex['partial'], $property, $matches);
$result = preg_match_all($regex['partial_anywhere'], $property);
if ($result) {
$matched = true;
$found['partial_whole'] += $strength * $result;
}

$result = preg_match_all($regex['partial_from_start'], $property, $matches);
if ($result) {
$found['partial_whole_start'] += $strength * $result;
}

// word matching
foreach ($words as $word) {
$result = preg_match_all($regex['whole'], $word, $matches);
if ($result) {
$found['whole_word'] += $strength * $result;

if ($i === 0) {
$found['whole_first_word'] += $strength * $result;
if ($chunk_is_word) {
$found['partial_word'] += $strength * $result;
if (preg_match_all($regex['partial_anywhere'], $first_word)) {
$found['partial_first_word'] += $strength * $result;
}
}

$result = preg_match_all($regex['partial'], $word, $matches);
$result = preg_match_all($regex['partial_from_start_anywhere'], $property);
if ($result) {
$found['partial_word'] += $strength * $result;
$matched = true;
if ($chunk_is_word) {
$found['partial_word_start'] += $strength * $result;
if (preg_match_all($regex['partial_from_start_anywhere'], $first_word)) {
$found['partial_first_word_start'] += $strength * $result;
}
}

if ($i === 0) {
$found['partial_first_word'] += $strength * $result;
$result = preg_match_all($regex['whole_anywhere'], $property);
if ($result) {
$matched = true;
if ($chunk_is_word) {
$found['whole_word'] += $strength * $result;
if (preg_match_all($regex['whole_anywhere'], $first_word)) {
$found['whole_first_word'] += $strength * $result;
}
}
}
}

$result = preg_match_all($regex['partial_from_start'], $word, $matches);
if ($result) {
$found['partial_word_start'] += $strength * $result;
$result = preg_match_all($regex['partial_from_start'], $property);
if ($result) {
$matched = true;
$found['partial_whole_start'] += $strength * $result;

if ($i === 0) {
$found['partial_first_word_start'] += $strength * $result;
$result = preg_match_all($regex['whole'], $property);
if ($result) {
$matched = true;
$found['whole'] += $strength * $result;
}
}
}

$i++;
}

// snippet extraction (only needs to run during one chunk)
if ($j === 0) {
if ($matched && $j === 0) {
$snippets[$name] = $this->extractSnippets($property, $params['chunks']);
}
}
Expand Down

0 comments on commit fbbe89f

Please sign in to comment.