Skip to content

Commit

Permalink
Async load json (#262)
Browse files Browse the repository at this point in the history
* Async loading

* import types

* #261 Dedupe and test .loadJSONAsync()

* Added a missing comment
  • Loading branch information
scambier authored May 29, 2024
1 parent ef74099 commit 4e158dd
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 20 deletions.
20 changes: 20 additions & 0 deletions src/MiniSearch.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -1881,6 +1881,26 @@ e forse del mio dir poco ti cale`
})
})

describe('loadJSONAsync', () => {
const documents = [
{ id: 1, title: 'Divina Commedia', text: 'Nel mezzo del cammin di nostra vita', category: 'poetry' },
{ id: 2, title: 'I Promessi Sposi', text: 'Quel ramo del lago di Como', category: 'fiction' },
{ id: 3, title: 'Vita Nova', text: 'In quella parte del libro della mia memoria', category: 'poetry' }
]

it('makes a MiniSearch instance that is identical to .loadJSON()', async () => {
const options = { fields: ['title', 'text'], storeFields: ['category'] }
const ms = new MiniSearch(options)
ms.addAll(documents)
const json = JSON.stringify(ms)

const deserializedAsync = await MiniSearch.loadJSONAsync(json, options)
const deserialized = MiniSearch.loadJSON(json, options)

expect(deserialized).toEqual(deserializedAsync)
})
})

describe('getDefault', () => {
it('returns the default value of the given option', () => {
expect(MiniSearch.getDefault('idField')).toEqual('id')
Expand Down
121 changes: 105 additions & 16 deletions src/MiniSearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1473,6 +1473,20 @@ export default class MiniSearch<T = any> {
return this.loadJS(JSON.parse(json), options)
}

/**
* Async equivalent of {@link MiniSearch.loadJSON}
*
* @param json JSON-serialized index
* @param options configuration options, same as the constructor
* @return A Promise that will resolve to an instance of MiniSearch deserialized from the given JSON.
*/
static async loadJSONAsync<T = any> (json: string, options: Options<T>): Promise<MiniSearch<T>> {
if (options == null) {
throw new Error('MiniSearch: loadJSON should be given the same options used when serializing the index')
}
return this.loadJSAsync(JSON.parse(json), options)
}

/**
* Returns the default value of an option. It will throw an error if no option
* with the given name exists.
Expand Down Expand Up @@ -1508,32 +1522,17 @@ export default class MiniSearch<T = any> {
static loadJS<T = any> (js: AsPlainObject, options: Options<T>): MiniSearch<T> {
const {
index,
documentCount,
nextId,
documentIds,
fieldIds,
fieldLength,
averageFieldLength,
storedFields,
dirtCount,
serializationVersion
} = js
if (serializationVersion !== 1 && serializationVersion !== 2) {
throw new Error('MiniSearch: cannot deserialize an index created with an incompatible version')
}

const miniSearch = new MiniSearch(options)
const miniSearch = this.instantiateMiniSearch(js, options)

miniSearch._documentCount = documentCount
miniSearch._nextId = nextId
miniSearch._documentIds = objectToNumericMap(documentIds)
miniSearch._idToShortId = new Map<any, number>()
miniSearch._fieldIds = fieldIds
miniSearch._fieldLength = objectToNumericMap(fieldLength)
miniSearch._avgFieldLength = averageFieldLength
miniSearch._storedFields = objectToNumericMap(storedFields)
miniSearch._dirtCount = dirtCount || 0
miniSearch._index = new SearchableMap()

for (const [shortId, id] of miniSearch._documentIds) {
miniSearch._idToShortId.set(id, shortId)
Expand All @@ -1559,6 +1558,80 @@ export default class MiniSearch<T = any> {
return miniSearch
}

/**
* @ignore
*/
static async loadJSAsync<T = any> (js: AsPlainObject, options: Options<T>): Promise<MiniSearch<T>> {
const {
index,
documentIds,
fieldLength,
storedFields,
serializationVersion
} = js

const miniSearch = this.instantiateMiniSearch(js, options)

miniSearch._documentIds = await objectToNumericMapAsync(documentIds)
miniSearch._fieldLength = await objectToNumericMapAsync(fieldLength)
miniSearch._storedFields = await objectToNumericMapAsync(storedFields)

for (const [shortId, id] of miniSearch._documentIds) {
miniSearch._idToShortId.set(id, shortId)
}

let count = 0
for (const [term, data] of index) {
const dataMap = new Map() as FieldTermData

for (const fieldId of Object.keys(data)) {
let indexEntry = data[fieldId]

// Version 1 used to nest the index entry inside a field called ds
if (serializationVersion === 1) {
indexEntry = indexEntry.ds as unknown as SerializedIndexEntry
}

dataMap.set(parseInt(fieldId, 10), await objectToNumericMapAsync(indexEntry) as DocumentTermFreqs)
}

if (++count % 1000 === 0) await wait(0)
miniSearch._index.set(term, dataMap)
}

return miniSearch
}

/**
* @ignore
*/
private static instantiateMiniSearch<T = any> (js: AsPlainObject, options: Options<T>): MiniSearch<T> {
const {
documentCount,
nextId,
fieldIds,
averageFieldLength,
dirtCount,
serializationVersion
} = js

if (serializationVersion !== 1 && serializationVersion !== 2) {
throw new Error('MiniSearch: cannot deserialize an index created with an incompatible version')
}

const miniSearch = new MiniSearch(options)

miniSearch._documentCount = documentCount
miniSearch._nextId = nextId
miniSearch._idToShortId = new Map<any, number>()
miniSearch._fieldIds = fieldIds
miniSearch._avgFieldLength = averageFieldLength
miniSearch._dirtCount = dirtCount || 0
miniSearch._index = new SearchableMap()

return miniSearch
}

/**
* @ignore
*/
Expand Down Expand Up @@ -2106,6 +2179,22 @@ const objectToNumericMap = <T>(object: { [key: string]: T }): Map<number, T> =>
return map
}

const objectToNumericMapAsync = async <T>(object: { [key: string]: T }): Promise<Map<number, T>> => {
const map = new Map()

let count = 0
for (const key of Object.keys(object)) {
map.set(parseInt(key, 10), object[key])
if (++count % 1000 === 0) {
await wait(0)
}
}

return map
}

const wait = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms))

// This regular expression matches any Unicode space, newline, or punctuation
// character
const SPACE_OR_PUNCTUATION = /[\n\r\p{Z}\p{P}]/u
4 changes: 2 additions & 2 deletions src/SearchableMap/SearchableMap.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* eslint-disable no-labels */
import { TreeIterator, ENTRIES, KEYS, VALUES, LEAF } from './TreeIterator'
import fuzzySearch, { FuzzyResults } from './fuzzySearch'
import { RadixTree, Entry, Path } from './types'
import fuzzySearch, { type FuzzyResults } from './fuzzySearch'
import type { RadixTree, Entry, Path } from './types'

/**
* A class implementing the same interface as a standard JavaScript
Expand Down
2 changes: 1 addition & 1 deletion src/SearchableMap/TreeIterator.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { RadixTree, Entry, LeafType } from './types'
import type { RadixTree, Entry, LeafType } from './types'

/** @ignore */
const ENTRIES = 'ENTRIES'
Expand Down
2 changes: 1 addition & 1 deletion src/SearchableMap/fuzzySearch.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* eslint-disable no-labels */
import { LEAF } from './TreeIterator'
import { RadixTree } from './types'
import type { RadixTree } from './types'

export type FuzzyResult<T> = [T, number]

Expand Down

0 comments on commit 4e158dd

Please sign in to comment.