Skip to content

Commit

Permalink
Add search option to apply term boosting (#274)
Browse files Browse the repository at this point in the history
Term boosting (giving greater or lower importance to specific query
terms) was previously not supported. It was technically possible by
using the `boostDocument` search option (as shown here: #268) but cumbersome and error prone.

This commit adds a new search option, `boostTerm`, which makes it a lot
easier to apply term boosting. The option is a function that is invoked
with each search term, and is expected to return a numeric boost factor.
  • Loading branch information
lucaong authored Jul 22, 2024
1 parent 5abda9e commit ec9c0ff
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

`MiniSearch` follows [semantic versioning](https://semver.org/spec/v2.0.0.html).

## unreleased

- Add `boostTerm` search option to apply a custom boosting factor to specific
terms in the query

## v7.0.2

- [fix] Fix regression on tokenizer producing blank terms when multiple
Expand Down
16 changes: 16 additions & 0 deletions src/MiniSearch.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,22 @@ describe('MiniSearch', () => {
expect(results[0].score).toBeCloseTo(resultsWithoutBoost[0].score * boostFactor)
})

it('boosts terms by calling boostTerm with normalized query term, term index in the query, and array of all query terms', () => {
const query = 'Commedia nova'
const boostFactors = {
commedia: 1.5,
nova: 1.1
}
const boostTerm = jest.fn((term, i, terms) => boostFactors[term])
const resultsWithoutBoost = ms.search(query)
const results = ms.search(query, { boostTerm })

expect(boostTerm).toHaveBeenCalledWith('commedia', 0, ['commedia', 'nova'])
expect(boostTerm).toHaveBeenCalledWith('nova', 1, ['commedia', 'nova'])
expect(results[0].score).toBeCloseTo(resultsWithoutBoost[0].score * boostFactors.commedia)
expect(results[1].score).toBeCloseTo(resultsWithoutBoost[1].score * boostFactors.nova)
})

it('skips document if boostDocument returns a falsy value', () => {
const query = 'vita'
const boostDocument = jest.fn((id, term) => id === 3 ? null : 1)
Expand Down
30 changes: 24 additions & 6 deletions src/MiniSearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ export type SearchOptions = {
*/
boost?: { [fieldName: string]: number },

/**
* Function to calculate a boost factor for each term.
*
* This function, if provided, is called for each query term (as split by
* `tokenize` and processed by `processTerm`). The arguments passed to the
* function are the query term, the positional index of the term in the query,
* and the array of all query terms. It is expected to return a numeric boost
* factor for the term. A factor lower than 1 reduces the importance of the
* term, a factor greater than 1 increases it. A factor of exactly 1 is
* neutral, and does not affect the term's importance.
*/
boostTerm?: (term: string, i: number, terms: string[]) => number,

/**
* Relative weights to assign to prefix search results and fuzzy search
* results. Exact matches are assigned a weight of 1.
Expand Down Expand Up @@ -480,7 +493,8 @@ export type AutoVacuumOptions = VacuumOptions & VacuumConditions
type QuerySpec = {
prefix: boolean,
fuzzy: number | boolean,
term: string
term: string,
termBoost: number
}

type DocumentTermFreqs = Map<number, number>
Expand Down Expand Up @@ -1685,7 +1699,7 @@ export default class MiniSearch<T = any> {
const { fuzzy: fuzzyWeight, prefix: prefixWeight } = { ...defaultSearchOptions.weights, ...weights }

const data = this._index.get(query.term)
const results = this.termResults(query.term, query.term, 1, data, boosts, boostDocument, bm25params)
const results = this.termResults(query.term, query.term, 1, query.termBoost, data, boosts, boostDocument, bm25params)

let prefixMatches
let fuzzyMatches
Expand Down Expand Up @@ -1715,7 +1729,7 @@ export default class MiniSearch<T = any> {
// account for the fact that prefix matches stay more relevant than
// fuzzy matches for longer distances.
const weight = prefixWeight * term.length / (term.length + 0.3 * distance)
this.termResults(query.term, term, weight, data, boosts, boostDocument, bm25params, results)
this.termResults(query.term, term, weight, query.termBoost, data, boosts, boostDocument, bm25params, results)
}
}

Expand All @@ -1727,7 +1741,7 @@ export default class MiniSearch<T = any> {
// Weight gradually approaches 0 as distance goes to infinity, with the
// weight for the hypothetical distance 0 being equal to fuzzyWeight.
const weight = fuzzyWeight * term.length / (term.length + distance)
this.termResults(query.term, term, weight, data, boosts, boostDocument, bm25params, results)
this.termResults(query.term, term, weight, query.termBoost, data, boosts, boostDocument, bm25params, results)
}
}

Expand Down Expand Up @@ -1826,6 +1840,7 @@ export default class MiniSearch<T = any> {
sourceTerm: string,
derivedTerm: string,
termWeight: number,
termBoost: number,
fieldTermData: FieldTermData | undefined,
fieldBoosts: { [field: string]: number },
boostDocumentFn: ((id: any, term: string, storedFields?: Record<string, unknown>) => number) | undefined,
Expand Down Expand Up @@ -1864,7 +1879,7 @@ export default class MiniSearch<T = any> {
// present. This is currently not supported, and may require further
// analysis to see if it is a valid use case.
const rawScore = calcBM25Score(termFreq, matchingFields, this._documentCount, fieldLength, avgFieldLength, bm25params)
const weightedScore = termWeight * fieldBoost * docBoost * rawScore
const weightedScore = termWeight * termBoost * fieldBoost * docBoost * rawScore

const result = results.get(docId)
if (result) {
Expand Down Expand Up @@ -2118,7 +2133,10 @@ const termToQuerySpec = (options: SearchOptions) => (term: string, i: number, te
const prefix = (typeof options.prefix === 'function')
? options.prefix(term, i, terms)
: (options.prefix === true)
return { term, fuzzy, prefix }
const termBoost = (typeof options.boostTerm === 'function')
? options.boostTerm(term, i, terms)
: 1
return { term, fuzzy, prefix, termBoost }
}

const defaultOptions = {
Expand Down

0 comments on commit ec9c0ff

Please sign in to comment.