From 81b065a0a0706bf9045f1385e98a817c2df0145f Mon Sep 17 00:00:00 2001 From: Amit S Namboothiry Date: Wed, 20 Nov 2024 20:55:31 +0530 Subject: [PATCH] Add fuzzy search --- api/common/post-fuzzy-search.js | 62 +++++++++++++++++++++++++++++ api/posts-service/list.js | 30 ++++++++------ api/posts-service/package-lock.json | 14 +++++++ api/posts-service/package.json | 1 + 4 files changed, 95 insertions(+), 12 deletions(-) create mode 100644 api/common/post-fuzzy-search.js diff --git a/api/common/post-fuzzy-search.js b/api/common/post-fuzzy-search.js new file mode 100644 index 00000000..942f2360 --- /dev/null +++ b/api/common/post-fuzzy-search.js @@ -0,0 +1,62 @@ +import Fuse from "fuse.js"; +import * as dynamoDbLib from "../libs/dynamodb-lib"; + +async function getPostDetails(postId) { + const params = { + TableName: "NaadanChords", + ProjectionExpression: + "postId, category, createdAt, updatedAt, postType, title, userId", + Key: { + postId, + }, + }; + let result = await dynamoDbLib.call("get", params); + return result.Item; +} + +export async function fuzzySearch(query) { + let allPostIds = []; + let params = { + TableName: "NaadanChords", + IndexName: "postType-updatedAt-index", + KeyConditionExpression: "postType = :postType", + ExpressionAttributeValues: { + ":postType": "POST", + }, + ScanIndexForward: false, + ProjectionExpression: "postId", + }; + + let lek = "init"; + while (lek) { + const data = await dynamoDbLib.call("query", params); + allPostIds.push(...data.Items); + lek = data.LastEvaluatedKey; + if (lek) params.ExclusiveStartKey = lek; + } + + const options = { + includeScore: true, + threshold: 0.5, + isCaseSensitive: false, + keys: ["postId"], + }; + + const scanResult = new Fuse(allPostIds, options); + let result = scanResult.search(query, { limit: 15 }); + + if (result.length > 0) { + allPostIds = result.map((item) => item.item.postId); + } else { + return { Items: [] }; + } + + // Get post details + result = { Items: [] }; + for (let i = 0; i < allPostIds.length; i++) { + const res = await getPostDetails(allPostIds[i]); + result.Items.push(res); + } + + return result; +} diff --git a/api/posts-service/list.js b/api/posts-service/list.js index cedd831d..19517d36 100644 --- a/api/posts-service/list.js +++ b/api/posts-service/list.js @@ -3,6 +3,7 @@ import * as userNameLib from "../libs/username-lib"; import * as searchFilterLib from "../libs/searchfilter-lib"; import { appendRatings } from "../common/post-ratings"; import { appendCommentsCount } from "../common/post-comments"; +import { fuzzySearch } from "../common/post-fuzzy-search"; export async function main(event, context, callback) { var lastEvaluatedKey; @@ -120,18 +121,23 @@ export async function main(event, context, callback) { result = await dynamoDbLib.call("scan", params); if (result?.Items?.length === 0) { - // No search results - const emptySearchWriteParams = { - TableName: "NaadanChordsEmptySearch", - Item: { - timestamp: Date.now(), - searchQuery: event.search, - ipAddress: event.sourceIP, - type: "SEARCH", - }, - }; - - await dynamoDbLib.call("put", emptySearchWriteParams); + // Try fuzzy search over post ids + result = await fuzzySearch(event.search); + + if (result?.Items?.length === 0) { + // No search results + const emptySearchWriteParams = { + TableName: "NaadanChordsEmptySearch", + Item: { + timestamp: Date.now(), + searchQuery: event.search, + ipAddress: event.sourceIP, + type: "SEARCH", + }, + }; + + await dynamoDbLib.call("put", emptySearchWriteParams); + } } } else { result = await dynamoDbLib.call("query", params); diff --git a/api/posts-service/package-lock.json b/api/posts-service/package-lock.json index 89f7a804..bc831b14 100644 --- a/api/posts-service/package-lock.json +++ b/api/posts-service/package-lock.json @@ -10,6 +10,7 @@ "license": "MIT", "dependencies": { "aws-sdk": "^2.1359.0", + "fuse.js": "^7.0.0", "serverless-domain-manager": "^5.1.0", "serverless-webpack": "^5.5.4", "webpack-node-externals": "^2.5.2" @@ -3079,6 +3080,14 @@ "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" }, + "node_modules/fuse.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz", + "integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==", + "engines": { + "node": ">=10" + } + }, "node_modules/gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", @@ -8692,6 +8701,11 @@ "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.1.tgz", "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" }, + "fuse.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz", + "integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==" + }, "gensync": { "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", diff --git a/api/posts-service/package.json b/api/posts-service/package.json index 5290fa9c..1942d712 100644 --- a/api/posts-service/package.json +++ b/api/posts-service/package.json @@ -13,6 +13,7 @@ }, "dependencies": { "aws-sdk": "^2.1359.0", + "fuse.js": "^7.0.0", "serverless-domain-manager": "^5.1.0", "serverless-webpack": "^5.5.4", "webpack-node-externals": "^2.5.2"