From ad59121436c0ee84bb7685ece3e984be09829e54 Mon Sep 17 00:00:00 2001 From: andreakreichgauer <38720222+andreakreichgauer@users.noreply.github.com> Date: Thu, 7 Apr 2022 13:18:30 +0200 Subject: [PATCH] fix: endless search for multiple-term queries This PR fixes "endless" searches or time-outs due to long multiple-term queries. This closes #38 . --- README.md | 42 +++++++++-- .../{icd10Controller.ts => icd10.ts} | 21 +++++- src/express.ts | 10 +++ src/logger.ts | 1 - ...CD10gmCodesystem.ts => icd10CodeSystem.ts} | 13 +--- src/routers/icd10.ts | 21 ++++++ src/routes/icd10.ts | 18 ----- src/server.ts | 73 +++++++++---------- src/services/codeFilter.ts | 1 + src/services/filter.ts | 4 +- src/services/fuseSearch.ts | 5 +- src/services/textFilter.ts | 1 + src/utils/HTTPError.ts | 9 +++ 13 files changed, 139 insertions(+), 80 deletions(-) rename src/controller/{icd10Controller.ts => icd10.ts} (73%) create mode 100644 src/express.ts rename src/model/{ICD10gmCodesystem.ts => icd10CodeSystem.ts} (90%) create mode 100644 src/routers/icd10.ts delete mode 100644 src/routes/icd10.ts create mode 100644 src/utils/HTTPError.ts diff --git a/README.md b/README.md index e1690ef..5c22366 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ -# ICD 10 Rest API (German Version) +# ICD-10 API (German Version) API to search the german version of the 10th revision of the International Statistical Classification of Diseases Database. [![GitHub release (latest by date)](https://img.shields.io/github/v/release/dot-base/icd-10-api)](https://github.com/dot-base/icd-10-api/releases) ## Quick Nav -1. [Production Deployment](#Production-Deployment) +1. [Usage](#Usage) 1. [Contributing](#Contributing) -## Production Deployment -Want an ICD 10 api of your own? The easiest way is to deploy our docker container. Just follow the steps below to get started. +## Usage +Want an ICD-10 api of your own? The easiest way is to deploy our docker container. Just follow the steps below to get started. [![Docker Build Status](https://img.shields.io/badge/We%20love-Docker-blue?style=flat&logo=Docker)](https://github.com/orgs/dot-base/packages) @@ -19,12 +19,44 @@ Want an ICD 10 api of your own? The easiest way is to deploy our docker containe ### Deployment +1. Set environment variables to configure the container: + ```sh + export MAX_SEARCH_WORDS="6" + ``` 1. Start the container with a single command ``` docker run --name icd-10-api -p 3000:3000 -d ghcr.io/dot-base/icd-10-api:latest ``` -1. Done and dusted 🎉. The ICD 10 rest api is available on port 3000. +1. Done and dusted 🎉. The ICD-10 api is available on port 3000. + +## Configuration + +### Environment Variables +| Variable Name | Default | Example | +| --- | --- | --- | +| MAX_SEARCH_WORDS | 6 | - | + +## Considerations + +### Pre-processing and multi-term searches +The ICD-10 api processes a search query by first splitting it into separate search terms as in the following example: + +``` +'Parkinson-Syndrom Primär' -> ['Parkinson', 'Syndrom', Primär] +'Parkinson G20.9 unspezifisch' -> ['Parkinson', 'G20.9', 'unspezifisch'] +``` + +If a query consists of several terms, the ICD-10 api will assemble all combinations of these terms and order them by length: + +``` +'Parkinson-Syndrom Primär' -> ['Parkinson Syndrom Primär', 'Parkinson Syndrom', 'Parkinson Primär', 'Syndrom Primär', 'Parkinson', 'Syndrom', 'Primär'] +``` + +The service will search for matches in descending order, meaning it will first search for the full term '*Parkinson Syndrom Primär*'. If no match was found, the search will proceed with '*Parkinson AND Syndrom*' '*Parkinson AND Primär*' '*Syndrom AND Primär*'. If the combination of two search terms results in one or several matches, the search will stop and return the result. Otherwise, it will proceed to search for each single term separately. +Due too performance and time-out reasons the default max. value for search terms is set to 6, but can be changed indiviually by setting `MAX_SEARCH_WORDS`. +### Prioritization of ICD-10 codes +Terms that match the ICD code pattern are handled with priority. If a query contains something like '*Parkinson G20*' or '*Parkinson G20.9*', the service will first try to find exact matches for these ICD codes. It will only search for further results matching 'Parkinson', if no matching ICD codes were found. ## Contributing diff --git a/src/controller/icd10Controller.ts b/src/controller/icd10.ts similarity index 73% rename from src/controller/icd10Controller.ts rename to src/controller/icd10.ts index 1833570..b0f44a3 100644 --- a/src/controller/icd10Controller.ts +++ b/src/controller/icd10.ts @@ -2,30 +2,43 @@ import Fuse from "fuse.js"; import { ICodeSystem_Concept } from "@ahryman40k/ts-fhir-types/lib/R4"; import CodeFilter from "@/services/codeFilter"; import TextFilter from "@/services/textFilter"; +import HTTPError from "@/utils/HTTPError"; export class ICD10Controller { - private static icdRegex = new RegExp("[A-TV-Z][0-9][0-9].?[0-9A-TV-Z]{0,4}", "i"); + private static icd10Regex = new RegExp("[A-TV-Z][0-9][0-9].?[0-9A-TV-Z]{0,4}", "i"); private static stripRegex = new RegExp("[ -]+"); public static getFiltered(searchstring: string): Fuse.FuseResult[] { const searchTerms: string[] = ICD10Controller.splitTerms(searchstring); const icd10Codes: string[] = ICD10Controller.filterCodes(searchTerms); + /** + * If a query contains icd10 codes (e.g. G20.9), + * only codes are considered and remaining search terms are ignored + */ if (icd10Codes.length > 0) { const codeResponse = CodeFilter.initSearch(icd10Codes); if (codeResponse.length > 0) return codeResponse; } + if (searchTerms.length > Number(process.env.MAX_SEARCH_WORDS)) + throw new HTTPError( + `Search query exceeded max. amount of ${process.env.MAX_SEARCH_WORDS} allowed terms.`, + 400 + ); + const searchResult = TextFilter.initSearch(searchTerms); - // copy the results before removing extensions, otherwise - // we would change the actual database we are searching on + /** + * copy the results before removing extensions, otherwise + * we would change the actual dataset we are searching on + */ const searchResultCopy = JSON.parse(JSON.stringify(searchResult)); return ICD10Controller.removeExtensions(searchResultCopy); } private static isICD10Code(str: string): boolean { - return ICD10Controller.icdRegex.test(str); + return ICD10Controller.icd10Regex.test(str); } private static filterCodes(terms: string[]): string[] { diff --git a/src/express.ts b/src/express.ts new file mode 100644 index 0000000..fdff336 --- /dev/null +++ b/src/express.ts @@ -0,0 +1,10 @@ +import express from "express"; +import ICD10Router from "@/routers/icd10"; + +const app = express(); + +app.use(express.json()); + +app.use("/api/icd10", ICD10Router); + +export default app; diff --git a/src/logger.ts b/src/logger.ts index 56046e1..9e95641 100644 --- a/src/logger.ts +++ b/src/logger.ts @@ -1,4 +1,3 @@ - const logger = console; export default logger; diff --git a/src/model/ICD10gmCodesystem.ts b/src/model/icd10CodeSystem.ts similarity index 90% rename from src/model/ICD10gmCodesystem.ts rename to src/model/icd10CodeSystem.ts index b90f520..8cd05b0 100644 --- a/src/model/ICD10gmCodesystem.ts +++ b/src/model/icd10CodeSystem.ts @@ -3,22 +3,15 @@ import io from "io-ts"; import icd10gm from "@/data/codesystem_icd10_gm_2020.json"; import logger from "@/logger"; -export default class ICD10gm { - public static instance: ICD10gm; +class ICD10gm { public codesystem: R4.ICodeSystem; public processedCodesystem: R4.ICodeSystem; - private constructor() { + public constructor() { this.codesystem = ICD10gm.initCodesystem(); this.processedCodesystem = ICD10gm.preProcessCodeSystem(this.codesystem); } - public static getInstance(): ICD10gm { - if (!ICD10gm.instance) ICD10gm.instance = new ICD10gm(); - logger.info("Loading and prefiltering ICD10gm Codesystem succeded"); - return ICD10gm.instance; - } - private static initCodesystem(): R4.ICodeSystem { const icd10gmDecoded = R4.RTTI_CodeSystem.decode(icd10gm); @@ -85,3 +78,5 @@ export default class ICD10gm { return concept; } } + +export default new ICD10gm(); diff --git a/src/routers/icd10.ts b/src/routers/icd10.ts new file mode 100644 index 0000000..c324dcb --- /dev/null +++ b/src/routers/icd10.ts @@ -0,0 +1,21 @@ +import express from "express"; +import { ICD10Controller } from "@/controller/icd10"; +import HTTPError from "@/utils/HTTPError"; + +const router: express.Router = express.Router(); + +router.get("/", async (req: express.Request, res: express.Response) => { + if (!req.query.search) + return res.status(400).send("Request is missing a query parameter 'search'.").end(); + + try { + const icd10Codes = ICD10Controller.getFiltered(req.query.search as string); + return res.status(200).send(icd10Codes); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + } catch (e: any) { + if (e instanceof HTTPError) res.status(e.status).send(e.message); + else res.status(500).send(e.message); + } +}); + +export default router; diff --git a/src/routes/icd10.ts b/src/routes/icd10.ts deleted file mode 100644 index e29e48d..0000000 --- a/src/routes/icd10.ts +++ /dev/null @@ -1,18 +0,0 @@ -import express from "express"; -import { ICD10Controller } from "@/controller/icd10Controller"; - -const router: express.Router = express.Router(); - -router.get("/", async (req: express.Request, res: express.Response) => { - if (req.query.search) { - try { - const icd10Res = ICD10Controller.getFiltered(req.query.search as string); - return res.status(200).send(icd10Res); - } catch (e) { - return res.status(500).send(e.message); - } - } - return res.status(400).send("no query component 'search' present").end(); -}); - -export default router; diff --git a/src/server.ts b/src/server.ts index 0cb16a1..346a332 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,14 +1,11 @@ -import express from "express"; +import { Express } from "express"; +import express from "@/express"; import bodyParser from "body-parser"; import cors from "cors"; import * as Sentry from "@sentry/node"; import * as Tracing from "@sentry/tracing"; -import icd10Router from "@/routes/icd10"; -import ICD10gm from "@/model/ICD10gmCodesystem"; -import logger from "@/logger"; - -class Icd10Api { +export default class Server { private static get port(): string { return process.env.PORT || "3000"; } @@ -17,43 +14,43 @@ class Icd10Api { return !!process.env.SENTRY_DSN && !!process.env.SENTRY_ENVIRONMENT; } - private async startApiServer() { - const app: express.Application = express(); - - if (Icd10Api.sentryIsEnabled) { - Sentry.init({ - dsn: process.env.SENTRY_DSN, - integrations: [ - new Sentry.Integrations.Http({ tracing: true }), - new Tracing.Integrations.Express({ app }), - ], - tracesSampleRate: 1.0, - environment: process.env.SENTRY_ENVIRONMENT, - }); - - app.use(Sentry.Handlers.requestHandler()); - app.use(Sentry.Handlers.tracingHandler()); - } - - app.use(bodyParser.urlencoded({ extended: true })); - app.use(bodyParser.json()); - app.use(cors()); - - app.use("/api/icd10", icd10Router); - - if (Icd10Api.sentryIsEnabled) { - app.use(Sentry.Handlers.errorHandler()); - } - - app.listen(Icd10Api.port, () => { - logger.info(`Server listening on ${Icd10Api.port}`); + private static enableSentry(app: Express) { + if (!Server.sentryIsEnabled) return; + Sentry.init({ + dsn: process.env.SENTRY_DSN, + integrations: [ + new Sentry.Integrations.Http({ tracing: true }), + new Tracing.Integrations.Express({ app }), + ], + tracesSampleRate: 1.0, + environment: process.env.SENTRY_ENVIRONMENT, }); + + app.use(Sentry.Handlers.requestHandler()); + app.use(Sentry.Handlers.tracingHandler()); + app.use(Sentry.Handlers.errorHandler()); + } + + private static setDefaultEnvironmentVariables() { + process.env.MAX_SEARCH_WORDS = process.env.MAX_SEARCH_WORDS ?? "6"; } constructor() { - ICD10gm.getInstance(); this.startApiServer(); } + + private async startApiServer() { + express.use(bodyParser.urlencoded({ extended: true })); + express.use(bodyParser.json()); + express.use(cors()); + + Server.setDefaultEnvironmentVariables(); + Server.enableSentry(express); + + express.listen(Server.port, () => { + console.log(`Server is listening on ${Server.port}`); + }); + } } -new Icd10Api(); +new Server(); diff --git a/src/services/codeFilter.ts b/src/services/codeFilter.ts index c490d15..2a17513 100644 --- a/src/services/codeFilter.ts +++ b/src/services/codeFilter.ts @@ -6,6 +6,7 @@ import FuseSearch from "@/services/fuseSearch"; export default class CodeFilter extends Filter { protected static keys: Fuse.FuseOptionKeyObject[] = [{ name: "code", weight: 1 }]; + protected static queryOptions: QueryOptions = { matchType: MatchType.exactMatch, logicalOperator: LogicalOperator.OR, diff --git a/src/services/filter.ts b/src/services/filter.ts index 3690d66..a5dc987 100644 --- a/src/services/filter.ts +++ b/src/services/filter.ts @@ -9,11 +9,11 @@ export default abstract class Filter extends FuseSearch { /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ public static initSearch(terms: string[]): Fuse.FuseResult[] { - throw new Error("Error: Called method 'search' on abstract class Filter."); + throw new Error("Error: Called method 'initSearch' on abstract class Filter."); } /* eslint-disable-next-line @typescript-eslint/no-unused-vars */ protected static getQuery(queryStr: string[] | string): void { - throw new Error("Error: Called method 'setQuery' on abstract class Filter."); + throw new Error("Error: Called method 'getQuery' on abstract class Filter."); } } diff --git a/src/services/fuseSearch.ts b/src/services/fuseSearch.ts index c74c98b..adda264 100644 --- a/src/services/fuseSearch.ts +++ b/src/services/fuseSearch.ts @@ -1,4 +1,4 @@ -import ICD10gm from "@/model/ICD10gmCodesystem"; +import ICD10gm from "@/model/icd10CodeSystem"; import Fuse from "fuse.js"; import { ICodeSystem_Concept } from "@ahryman40k/ts-fhir-types/lib/R4"; import { QueryOptions } from "@/types/queryOptions"; @@ -21,8 +21,7 @@ export default class FuseSearch { keys: Fuse.FuseOptionKeyObject[], query: Fuse.Expression[] ): Fuse.FuseResult[] { - const icd10 = ICD10gm.getInstance(); - const base = icd10.processedCodesystem?.concept ?? []; + const base = ICD10gm.processedCodesystem?.concept ?? []; const options = FuseSearch.getOptions(keys); const index = Fuse.createIndex(keys, base); const fuse = new Fuse(base, options, index); diff --git a/src/services/textFilter.ts b/src/services/textFilter.ts index 823a7f7..ff4e7f8 100644 --- a/src/services/textFilter.ts +++ b/src/services/textFilter.ts @@ -9,6 +9,7 @@ export default class TextFilter extends Filter { { name: "extension.valueString", weight: 0.6 }, { name: "modifierExtension.valueString", weight: 0.4 }, ]; + protected static queryOptions: QueryOptions = { matchType: MatchType.fuzzy, logicalOperator: LogicalOperator.AND, diff --git a/src/utils/HTTPError.ts b/src/utils/HTTPError.ts new file mode 100644 index 0000000..46443b9 --- /dev/null +++ b/src/utils/HTTPError.ts @@ -0,0 +1,9 @@ +export default class HTTPError extends Error { + public status: number; + + constructor(message: string, status: number) { + super(message); + this.status = status; + } + } + \ No newline at end of file