From 9a044eae8a1de1f948d3329831fa623658e9a7aa Mon Sep 17 00:00:00 2001 From: Luca Ongaro Date: Thu, 26 Oct 2023 16:24:21 +0200 Subject: [PATCH] Wildcard query that matches all documents (#234) * Wildcard query that matches all documents This commit introduces a special wildcard value, `MiniSearch.wildcard`, that matches all documents: ```javascript // Return search results for all documents minisearch.search(MiniSearch.wildcard) // Return search results for all documents in the 'fiction' category minisearch.search(MiniSearch.wildcard, { filter: (result) => result.category === 'fiction' }) ``` This is useful for retrieving all results, but still apply search options such as `filter` and `boostDocument`. It can also be useful in query combinations, for example to query for all documents that DO NOT contain a specific term: ```javascript // Search for all documents that do NOT contain the term "maintenance" const results = ms.search({ combineWith: 'AND_NOT', queries: [ MiniSearch.wildcard, 'maintenance' ] }) ``` * Optimize wildcard search with no document boosting In this case, there is no need to sort the results. --- src/MiniSearch.test.js | 38 ++++++++++++++++++++++ src/MiniSearch.ts | 71 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 103 insertions(+), 6 deletions(-) diff --git a/src/MiniSearch.test.js b/src/MiniSearch.test.js index 823cf6fc..a2c48162 100644 --- a/src/MiniSearch.test.js +++ b/src/MiniSearch.test.js @@ -1203,6 +1203,32 @@ describe('MiniSearch', () => { expect(other.search('very')).toEqual(ms.search('very', { bm25: { k: 1, b: 0.7, d: 0.5 } })) }) + it('allows searching for the special value `MiniSearch.wildcard` to match all terms', () => { + const ms = new MiniSearch({ fields: ['text'], storeFields: ['cool'] }) + const documents = [ + { id: 1, text: 'something cool', cool: true }, + { id: 2, text: 'something else', cool: false }, + { id: 3, text: null, cool: true } + ] + ms.addAll(documents) + + // The string "*" is just a normal term + expect(ms.search('*')).toEqual([]) + + // The empty string is just a normal query + expect(ms.search('')).toEqual([]) + + // The value `MiniSearch.wildcard` matches all terms + expect(ms.search(MiniSearch.wildcard).map(({ id }) => id)).toEqual([1, 2, 3]) + + // Filters and document boosting are still applied + const results = ms.search(MiniSearch.wildcard, { + filter: (x) => x.cool, + boostDocument: (id) => id + }) + expect(results.map(({ id }) => id)).toEqual([3, 1]) + }) + describe('when passing a query tree', () => { it('searches according to the given combination', () => { const results = ms.search({ @@ -1223,6 +1249,18 @@ describe('MiniSearch', () => { expect(results.map(({ id }) => id)).toEqual([1, 2]) }) + it('allows combining wildcard queries', () => { + const results = ms.search({ + combineWith: 'AND_NOT', + queries: [ + MiniSearch.wildcard, + 'vita' + ] + }) + expect(results.length).toEqual(1) + expect(results.map(({ id }) => id)).toEqual([2]) + }) + it('uses the given options for each subquery, cascading them properly', () => { const results = ms.search({ combineWith: 'OR', diff --git a/src/MiniSearch.ts b/src/MiniSearch.ts index c67440d7..4297a454 100644 --- a/src/MiniSearch.ts +++ b/src/MiniSearch.ts @@ -326,11 +326,16 @@ export type AsPlainObject = { export type QueryCombination = SearchOptions & { queries: Query[] } +/** + * Wildcard query, used to match all terms + */ +export type Wildcard = typeof MiniSearch.wildcard + /** * Search query expression, either a query string or an expression tree * combining several queries with a combination of AND or OR. */ -export type Query = QueryCombination | string +export type Query = QueryCombination | string | Wildcard /** * Options to control vacuuming behavior. @@ -486,6 +491,8 @@ export default class MiniSearch { private _enqueuedVacuum: Promise | null private _enqueuedVacuumConditions: VacuumConditions | undefined + static readonly wildcard: unique symbol = Symbol('*') + /** * @param options Configuration options * @@ -1145,6 +1152,28 @@ export default class MiniSearch { * }) * ``` * + * ### Wildcard query + * + * Searching for an empty string (assuming the default tokenizer) returns no + * results. Sometimes though, one needs to match all documents, like in a + * "wildcard" search. This is possible by passing the special value + * `MiniSearch.wildcard` as the query: + * + * ```javascript + * // Return search results for all documents + * minisearch.search(MiniSearch.wildcard) + * ``` + * + * Note that search options such as `filter` and `boostDocument` are still + * applied, influencing which results are returned, and their order: + * + * ```javascript + * // Return search results for all documents in the 'fiction' category + * minisearch.search(MiniSearch.wildcard, { + * filter: (result) => result.category === 'fiction' + * }) + * ``` + * * ### Advanced combination of queries: * * It is possible to combine different subqueries with OR, AND, and AND_NOT, @@ -1191,14 +1220,13 @@ export default class MiniSearch { * @param options Search options. Each option, if not given, defaults to the corresponding value of `searchOptions` given to the constructor, or to the library default. */ search (query: Query, searchOptions: SearchOptions = {}): SearchResult[] { - const combinedResults = this.executeQuery(query, searchOptions) - + const rawResults = this.executeQuery(query, searchOptions) const results = [] - for (const [docId, { score, terms, match }] of combinedResults) { + for (const [docId, { score, terms, match }] of rawResults) { // Final score takes into account the number of matching QUERY terms. // The end user will only receive the MATCHED terms. - const quality = terms.length + const quality = terms.length || 1 const result = { id: this._documentIds.get(docId), @@ -1213,6 +1241,14 @@ export default class MiniSearch { } } + // If it's a wildcard query, and no document boost is applied, skip sorting + // the results, as all results have the same score of 1 + if (query === MiniSearch.wildcard && + searchOptions.boostDocument == null && + this._options.searchOptions.boostDocument == null) { + return results + } + results.sort(byScore) return results } @@ -1435,6 +1471,10 @@ export default class MiniSearch { * @ignore */ private executeQuery (query: Query, searchOptions: SearchOptions = {}): RawResult { + if (query === MiniSearch.wildcard) { + return this.executeWildcardQuery(searchOptions) + } + if (typeof query !== 'string') { const options = { ...searchOptions, ...query, queries: undefined } const results = query.queries.map((subquery) => this.executeQuery(subquery, options)) @@ -1521,6 +1561,25 @@ export default class MiniSearch { return results } + /** + * @ignore + */ + private executeWildcardQuery (searchOptions: SearchOptions): RawResult { + const results = new Map() as RawResult + const options: SearchOptionsWithDefaults = { ...this._options.searchOptions, ...searchOptions } + + for (const [shortId, id] of this._documentIds) { + const score = options.boostDocument ? options.boostDocument(id, '', this._storedFields.get(shortId)) : 1 + results.set(shortId, { + score, + terms: [], + match: {} + }) + } + + return results + } + /** * @ignore */ @@ -1892,7 +1951,7 @@ const defaultOptions = { searchOptions: undefined, storeFields: [], logger: (level: LogLevel, message: string): void => { - if (typeof console?.[level] === "function") console[level](message); + if (typeof console?.[level] === 'function') console[level](message) }, autoVacuum: true }