-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Use global namespaces instead of imports.
- Loading branch information
Showing
17 changed files
with
1,805 additions
and
1,813 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,53 +1,51 @@ | ||
import {CharacterFilter} from "./character_filter"; | ||
import {Tokenizer, whitespaceTokenizer} from "./tokenizer"; | ||
import {lowercaseTokenFilter, TokenFilter} from "./token_filter"; | ||
|
||
/** | ||
* A analyzer converts a string into tokens which are added to the inverted index for searching. | ||
*/ | ||
export interface Analyzer { | ||
/** | ||
* The character filters of the analyzer. | ||
*/ | ||
char_filter?: CharacterFilter[]; | ||
namespace LokiDB.FullTextSearch { | ||
/** | ||
* The tokenizer of the analyzer. | ||
* A analyzer converts a string into tokens which are added to the inverted index for searching. | ||
*/ | ||
tokenizer: Tokenizer; | ||
export interface Analyzer { | ||
/** | ||
* The character filters of the analyzer. | ||
*/ | ||
char_filter?: CharacterFilter[]; | ||
/** | ||
* The tokenizer of the analyzer. | ||
*/ | ||
tokenizer: Tokenizer; | ||
/** | ||
* The token filters of the analyzer. | ||
*/ | ||
token_filter?: TokenFilter[]; | ||
} | ||
|
||
/** | ||
* The token filters of the analyzer. | ||
* Analyzes a given string. | ||
* @param {Analyzer} analyzer - the analyzer | ||
* @param {string} str - the string | ||
* @returns {string[]} - the tokens | ||
*/ | ||
token_filter?: TokenFilter[]; | ||
} | ||
|
||
/** | ||
* Analyzes a given string. | ||
* @param {Analyzer} analyzer - the analyzer | ||
* @param {string} str - the string | ||
* @returns {string[]} - the tokens | ||
*/ | ||
export function analyze(analyzer: Analyzer, str: string): string[] { | ||
if (analyzer.char_filter) { | ||
for (let j = 0; j < analyzer.char_filter.length; j++) { | ||
str = analyzer.char_filter[j](str); | ||
export function analyze(analyzer: Analyzer, str: string): string[] { | ||
if (analyzer.char_filter) { | ||
for (let j = 0; j < analyzer.char_filter.length; j++) { | ||
str = analyzer.char_filter[j](str); | ||
} | ||
} | ||
} | ||
const tokens = analyzer.tokenizer(str); | ||
if (analyzer.token_filter) { | ||
for (let i = 0; i < tokens.length; i++) { | ||
for (let k = 0; k < analyzer.token_filter.length; k++) { | ||
tokens[i] = analyzer.token_filter[k](tokens[i], i, tokens); | ||
const tokens = analyzer.tokenizer(str); | ||
if (analyzer.token_filter) { | ||
for (let i = 0; i < tokens.length; i++) { | ||
for (let k = 0; k < analyzer.token_filter.length; k++) { | ||
tokens[i] = analyzer.token_filter[k](tokens[i], i, tokens); | ||
} | ||
} | ||
} | ||
// Remove empty tokens. | ||
return tokens.filter((token) => token); | ||
} | ||
// Remove empty tokens. | ||
return tokens.filter((token) => token); | ||
} | ||
|
||
/** | ||
* An analyzer with the whitespace tokenizer and the lowercase token filter. | ||
*/ | ||
export class StandardAnalyzer implements Analyzer { | ||
tokenizer = whitespaceTokenizer; | ||
token_filter = [lowercaseTokenFilter]; | ||
/** | ||
* An analyzer with the whitespace tokenizer and the lowercase token filter. | ||
*/ | ||
export class StandardAnalyzer implements Analyzer { | ||
tokenizer = whitespaceTokenizer; | ||
token_filter = [lowercaseTokenFilter]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
/** | ||
* A character filter is used to preprocess a string before it is passed to a tokenizer. | ||
*/ | ||
export type CharacterFilter = (value: string) => string; | ||
|
||
namespace LokiDB.FullTextSearch { | ||
/** | ||
* A character filter is used to preprocess a string before it is passed to a tokenizer. | ||
*/ | ||
export type CharacterFilter = (value: string) => string; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,24 @@ | ||
/** | ||
* A token filter takes tokens from a tokenizer and modify, delete or add tokens. | ||
*/ | ||
export type TokenFilter = (value: string, index: number, array: string[]) => string; | ||
namespace LokiDB.FullTextSearch { | ||
/** | ||
* A token filter takes tokens from a tokenizer and modify, delete or add tokens. | ||
*/ | ||
export type TokenFilter = (value: string, index: number, array: string[]) => string; | ||
|
||
/** | ||
* Converts a token to lowercase. | ||
* @param {string} token - the token | ||
* @returns {string} - the lowercased token | ||
*/ | ||
export function lowercaseTokenFilter(token: string): string { | ||
return token.toLowerCase(); | ||
} | ||
/** | ||
* Converts a token to lowercase. | ||
* @param {string} token - the token | ||
* @returns {string} - the lowercased token | ||
*/ | ||
export function lowercaseTokenFilter(token: string): string { | ||
return token.toLowerCase(); | ||
} | ||
|
||
/** | ||
* Converts a token to uppercase. | ||
* @param {string} token - the token | ||
* @returns {string} - the uppercased token | ||
*/ | ||
export function uppercaseTokenFilter(token: string): string { | ||
return token.toUpperCase(); | ||
/** | ||
* Converts a token to uppercase. | ||
* @param {string} token - the token | ||
* @returns {string} - the uppercased token | ||
*/ | ||
export function uppercaseTokenFilter(token: string): string { | ||
return token.toUpperCase(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,15 @@ | ||
/** | ||
* A tokenizer splits a string into individual tokens. | ||
*/ | ||
export type Tokenizer = (value: string) => string[]; | ||
namespace LokiDB.FullTextSearch { | ||
/** | ||
* A tokenizer splits a string into individual tokens. | ||
*/ | ||
export type Tokenizer = (value: string) => string[]; | ||
|
||
/** | ||
* Splits a string at whitespace characters into tokens. | ||
* @param {string} value - the string | ||
* @returns {string[]} - the tokens | ||
*/ | ||
export function whitespaceTokenizer(value: string): string[] { | ||
return value.split(/[\s]+/); | ||
/** | ||
* Splits a string at whitespace characters into tokens. | ||
* @param {string} value - the string | ||
* @returns {string[]} - the tokens | ||
*/ | ||
export function whitespaceTokenizer(value: string): string[] { | ||
return value.split(/[\s]+/); | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,107 +1,101 @@ | ||
import {InvertedIndex} from "./inverted_index"; | ||
import {IndexSearcher} from "./index_searcher"; | ||
import {Dict} from "../../common/types"; | ||
import {PLUGINS} from "../../common/plugin"; | ||
import {Query} from "./query_types"; | ||
import {Scorer} from "./scorer"; | ||
import {Analyzer} from "./analyzer/analyzer"; | ||
import {Serialization} from "../../loki/src/serialization/migration"; | ||
namespace LokiDB.FullTextSearch { | ||
|
||
export class FullTextSearch { | ||
/// The id field of each document. | ||
private _id: string; | ||
/// Set of ids of all indexed documents. | ||
private _docs: Set<InvertedIndex.DocumentIndex>; | ||
private _idxSearcher: IndexSearcher; | ||
private _invIdxs: Dict<InvertedIndex> = {}; | ||
export class FullTextSearch { | ||
/// The id field of each document. | ||
private _id: string; | ||
/// Set of ids of all indexed documents. | ||
private _docs: Set<DocumentIndex>; | ||
private _idxSearcher: IndexSearcher; | ||
private _invIdxs: Dict<InvertedIndex> = {}; | ||
|
||
/** | ||
* Registers the full-text search as plugin. | ||
*/ | ||
public static register(): void { | ||
PLUGINS["FullTextSearch"] = FullTextSearch; | ||
} | ||
/** | ||
* Registers the full-text search as plugin. | ||
*/ | ||
public static register(): void { | ||
PLUGINS["FullTextSearch"] = FullTextSearch; | ||
} | ||
|
||
/** | ||
* Initialize the full-text search for the given fields. | ||
* @param {object[]} fieldOptions - the field options | ||
* @param {string} fieldOptions.field - the name of the property field | ||
* @param {boolean=true} fieldOptions.store - flag to indicate if the full-text search should be stored on serialization or | ||
* rebuild on deserialization | ||
* @param {boolean=true} fieldOptions.optimizeChanges - flag to optimize updating and deleting of documents | ||
* (requires more memory but performs faster) | ||
* @param {Analyzer} fieldOptions.analyzer - an analyzer for the field | ||
* @param {string} [id] - the property name of the document index | ||
*/ | ||
constructor(fieldOptions: FullTextSearch.FieldOptions[] = [], id?: string) { | ||
// Create an inverted index for each field. | ||
for (let i = 0; i < fieldOptions.length; i++) { | ||
let fieldOption = fieldOptions[i]; | ||
this._invIdxs[fieldOption.field] = new InvertedIndex(fieldOption); | ||
/** | ||
* Initialize the full-text search for the given fields. | ||
* @param {object[]} fieldOptions - the field options | ||
* @param {string} fieldOptions.field - the name of the property field | ||
* @param {boolean=true} fieldOptions.store - flag to indicate if the full-text search should be stored on serialization or | ||
* rebuild on deserialization | ||
* @param {boolean=true} fieldOptions.optimizeChanges - flag to optimize updating and deleting of documents | ||
* (requires more memory but performs faster) | ||
* @param {Analyzer} fieldOptions.analyzer - an analyzer for the field | ||
* @param {string} [id] - the property name of the document index | ||
*/ | ||
constructor(fieldOptions: FullTextSearch.FieldOptions[] = [], id?: string) { | ||
// Create an inverted index for each field. | ||
for (let i = 0; i < fieldOptions.length; i++) { | ||
let fieldOption = fieldOptions[i]; | ||
this._invIdxs[fieldOption.field] = new InvertedIndex(fieldOption); | ||
} | ||
this._id = id; | ||
this._docs = new Set(); | ||
this._idxSearcher = new IndexSearcher(this._invIdxs, this._docs); | ||
} | ||
this._id = id; | ||
this._docs = new Set(); | ||
this._idxSearcher = new IndexSearcher(this._invIdxs, this._docs); | ||
} | ||
|
||
public addDocument(doc: object, id: InvertedIndex.DocumentIndex = doc[this._id]): void { | ||
let fieldNames = Object.keys(this._invIdxs); | ||
for (let i = 0, fieldName; i < fieldNames.length, fieldName = fieldNames[i]; i++) { | ||
if (doc[fieldName] !== undefined) { | ||
this._invIdxs[fieldName].insert(doc[fieldName], id); | ||
public addDocument(doc: object, id: DocumentIndex = doc[this._id]): void { | ||
let fieldNames = Object.keys(this._invIdxs); | ||
for (let i = 0, fieldName; i < fieldNames.length, fieldName = fieldNames[i]; i++) { | ||
if (doc[fieldName] !== undefined) { | ||
this._invIdxs[fieldName].insert(doc[fieldName], id); | ||
} | ||
} | ||
this._docs.add(id); | ||
this._idxSearcher.setDirty(); | ||
} | ||
this._docs.add(id); | ||
this._idxSearcher.setDirty(); | ||
} | ||
|
||
public removeDocument(doc: object, id: InvertedIndex.DocumentIndex = doc[this._id]): void { | ||
let fieldNames = Object.keys(this._invIdxs); | ||
for (let i = 0; i < fieldNames.length; i++) { | ||
this._invIdxs[fieldNames[i]].remove(id); | ||
public removeDocument(doc: object, id: DocumentIndex = doc[this._id]): void { | ||
let fieldNames = Object.keys(this._invIdxs); | ||
for (let i = 0; i < fieldNames.length; i++) { | ||
this._invIdxs[fieldNames[i]].remove(id); | ||
} | ||
this._docs.delete(id); | ||
this._idxSearcher.setDirty(); | ||
} | ||
this._docs.delete(id); | ||
this._idxSearcher.setDirty(); | ||
} | ||
|
||
public updateDocument(doc: object, id: InvertedIndex.DocumentIndex = doc[this._id]): void { | ||
this.removeDocument(doc, id); | ||
this.addDocument(doc, id); | ||
} | ||
public updateDocument(doc: object, id: DocumentIndex = doc[this._id]): void { | ||
this.removeDocument(doc, id); | ||
this.addDocument(doc, id); | ||
} | ||
|
||
public clear(): void { | ||
for (let id of this._docs) { | ||
this.removeDocument(null, id); | ||
public clear(): void { | ||
for (let id of this._docs) { | ||
this.removeDocument(null, id); | ||
} | ||
} | ||
} | ||
|
||
public search(query: Query): Scorer.ScoreResults { | ||
return this._idxSearcher.search(query); | ||
} | ||
public search(query: Query): Scorer.ScoreResults { | ||
return this._idxSearcher.search(query); | ||
} | ||
|
||
public toJSON(): Serialization.FullTextSearch { | ||
let serialized = {id: this._id, ii: {}}; | ||
let fieldNames = Object.keys(this._invIdxs); | ||
for (let i = 0; i < fieldNames.length; i++) { | ||
const fieldName = fieldNames[i]; | ||
serialized.ii[fieldName] = this._invIdxs[fieldName].toJSON(); | ||
public toJSON(): Serialization.FullTextSearch { | ||
let serialized = {id: this._id, ii: {}}; | ||
let fieldNames = Object.keys(this._invIdxs); | ||
for (let i = 0; i < fieldNames.length; i++) { | ||
const fieldName = fieldNames[i]; | ||
serialized.ii[fieldName] = this._invIdxs[fieldName].toJSON(); | ||
} | ||
return serialized; | ||
} | ||
return serialized; | ||
} | ||
|
||
public static fromJSONObject(serialized: Serialization.FullTextSearch, analyzers: Dict<Analyzer> = {}): FullTextSearch { | ||
let fts = new FullTextSearch([], serialized.id); | ||
let fieldNames = Object.keys(serialized.ii); | ||
for (let i = 0; i < fieldNames.length; i++) { | ||
const fieldName = fieldNames[i]; | ||
fts._invIdxs[fieldName] = InvertedIndex.fromJSONObject(serialized.ii[fieldName], analyzers[fieldName]); | ||
public static fromJSONObject(serialized: Serialization.FullTextSearch, analyzers: Dict<Analyzer> = {}): FullTextSearch { | ||
let fts = new FullTextSearch([], serialized.id); | ||
let fieldNames = Object.keys(serialized.ii); | ||
for (let i = 0; i < fieldNames.length; i++) { | ||
const fieldName = fieldNames[i]; | ||
fts._invIdxs[fieldName] = InvertedIndex.fromJSONObject(serialized.ii[fieldName], analyzers[fieldName]); | ||
} | ||
return fts; | ||
} | ||
return fts; | ||
} | ||
} | ||
|
||
export namespace FullTextSearch { | ||
export interface FieldOptions extends InvertedIndex.FieldOptions { | ||
field: string; | ||
export namespace FullTextSearch { | ||
export interface FieldOptions extends InvertedIndex.FieldOptions { | ||
field: string; | ||
} | ||
} | ||
} |
Oops, something went wrong.