From 81e4effdaebcf6e20d416cd9237faea35f02b74a Mon Sep 17 00:00:00 2001 From: jclausen Date: Tue, 9 May 2023 11:23:06 -0400 Subject: [PATCH 01/10] bump snapshot [ci skip] --- box.json | 2 +- changelog.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/box.json b/box.json index 164d3f6..f012b00 100644 --- a/box.json +++ b/box.json @@ -2,7 +2,7 @@ "name":"Elasticsearch for the Coldbox Framework", "author":"Ortus Solutions Date: Fri, 26 May 2023 21:31:48 -0400 Subject: [PATCH 02/10] =?UTF-8?q?=F0=9F=93=A6=20NEW:=20Add=20Term=20Vector?= =?UTF-8?q?=20endpoint=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for Elasticsearch's "terms vector" API endpoint, which allows users to query for most common terms in specific document fields. https://www.elastic.co/guide/en/elasticsearch/reference/8.7/docs-termvectors.html --- models/io/HyperClient.cfc | 21 +++++++ .../tests/specs/unit/HyperClientTest.cfc | 57 +++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/models/io/HyperClient.cfc b/models/io/HyperClient.cfc index ed1ac9a..af116be 100644 --- a/models/io/HyperClient.cfc +++ b/models/io/HyperClient.cfc @@ -530,6 +530,27 @@ component accessors="true" threadSafe singleton { .json(); } + /** + * Request a vector of terms for the given index, document or document ID, and field names + * + * @indexName string Index name or alias. + * @params struct Struct of query parameters to influence the request. For example: `"offsets": false }` + * @body struct Body payload to send. For example: `{ "filter": { "max_num_terms": 3 } }` + */ + struct function getTermVectors( required string indexName, string id = "", struct params = {}, struct body = {} ){ + var endpoint = [arguments.indexName, "_termvectors" ]; + if ( arguments.id != "" ) { + endpoint.append( arguments.id ); + } + var vectorRequest = variables.nodePool.newRequest( arrayToList( endpoint, "/" ), "GET" ); + + return vectorRequest + .setBody( getUtil().toJSON( arguments.body ) ) + .withQueryParams( arguments.params ) + .send() + .json(); + } + /** * Returns a struct containing all indices in the system, with statistics * diff --git a/test-harness/tests/specs/unit/HyperClientTest.cfc b/test-harness/tests/specs/unit/HyperClientTest.cfc index bd1b135..af9856b 100644 --- a/test-harness/tests/specs/unit/HyperClientTest.cfc +++ b/test-harness/tests/specs/unit/HyperClientTest.cfc @@ -1007,6 +1007,63 @@ component extends="coldbox.system.testing.BaseTestCase" { expect( refreshResult._shards.total ).toBe( 0 ); } ); + describe( "termVectors", function() { + it( "can get term vectors by document ID", function() { + expect( variables ).toHaveKey( "testIndexName" ); + + // create document and save + var testDocument = { + "_id" : createUUID(), + "title" : "My Test Document", + "createdTime" : dateTimeFormat( now(), "yyyy-mm-dd'T'hh:nn:ssZZ" ) + }; + + var document = getWirebox() + .getInstance( "Document@cbElasticsearch" ) + .new( + variables.testIndexName, + "_doc", + testDocument + ).save( refresh = true ); + var result = variables.model.getTermVectors( + variables.testIndexName, + testDocument._id, + { "fields" : "title" } + ); + + expect( result.keyExists( "term_vectors" ) ).toBeTrue(); + expect( result.keyExists( "error" ) ).toBeFalse(); + expect( result.term_vectors ).toHaveKey( "title" ); + expect( result.term_vectors.title ).toBeStruct() + .toHaveKey( "field_statistics" ) + .toHaveKey( "terms" ); + }); + it( "can get term vectors by doc payload", function(){ + expect( variables ).toHaveKey( "testIndexName" ); + + // test options + var result = variables.model.getTermVectors( + indexName = variables.testIndexName, + body = { + "doc" : { + "title" : "My test document" + }, + "filter" : { + "min_word_length" : 3 + } + } + ); + + expect( result.keyExists( "error" ) ).toBeFalse(); + expect( result ).toHaveKey( "term_vectors" ); + + // ensure only short terms returned + expect( result.term_vectors.title.terms ) + .toHaveKey( "document" ) + .notToHaveKey( "my" ); + } ); + }); + it( "Tests getIndexStats method ", function(){ expect( variables ).toHaveKey( "testIndexName" ); From fc49c5ec78bdfaaec49be43e666bbd32843eb71b Mon Sep 17 00:00:00 2001 From: Michael Born Date: Fri, 26 May 2023 21:54:26 -0400 Subject: [PATCH 03/10] =?UTF-8?q?=F0=9F=93=96=20DOC:=20Add=20documentation?= =?UTF-8?q?=20for=20terms=20vector=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Searching/Search.md | 59 ++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/docs/Searching/Search.md b/docs/Searching/Search.md index 8c65525..73f00a3 100644 --- a/docs/Searching/Search.md +++ b/docs/Searching/Search.md @@ -19,10 +19,10 @@ To output the results of our search, we would use a loop, accessing the `Documen ```js for( var resultDocument in searchResults.getHits() ){ - var resultScore = resultDocument.getScore(); - var documentMemento = resultDocument.getMemento(); - var bookName = documentMemento.name; - var bookDescription = documentMemento.description; + var resultScore = resultDocument.getScore(); + var documentMemento = resultDocument.getMemento(); + var bookName = documentMemento.name; + var bookDescription = documentMemento.description; } ``` @@ -30,9 +30,9 @@ The "memento" is our structural representation of the document. We can also use ```js for( var resultDocument in searchResults.getHits() ){ - var resultScore = resultDocument.getScore(); - var bookName = resultDocument.getValue( "name" ); - var bookDescription = resultDoument.getValue( "description" ); + var resultScore = resultDocument.getScore(); + var bookName = resultDocument.getValue( "name" ); + var bookDescription = resultDoument.getValue( "description" ); } ``` @@ -249,7 +249,7 @@ var response = getInstance( "SearchBuilder@cbElasticsearch" ) // Body parameter: return a relevance score for each document, despite our custom sort .bodyParam( "track_scores", true ); // Body parameter: filter by minimum relevance score - .bodyParam( "min_score", 3 ) + .bodyParam( "min_score", 3 ) // run the search .execute(); ``` @@ -367,6 +367,49 @@ var terms = getInstance( "HyperClient@cbElasticsearch" ) } ); ``` +## Term Vectors + +The ["Term Vectors" Elasticsearch API](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html) allows you to retrieve information and statistics for terms in a specific document field. This could be useful for finding the most common term in a book description, or retrieving all terms with a minimum word length from the book title. + +### Retrieving Term Vectors By Document ID + +To retrieve term vectors for a known document ID: + +```js +var result = variables.model.getTermVectors( + "books", + "book_12345", + { "fields" : "title" } +); +``` + +Use the third argument, `params`, to configure the request using the [documented query parameters](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html#docs-termvectors-api-query-params): + +```js +var result = variables.model.getTermVectors( + indexName = "books", + id = "book_12345", + params = { + "fields" : "title", + "min_word_length" : 4 + } +); +``` + +### Retrieving Term Vectors By Payload + +If you wish to analyze a payload (not an existing document) you can pass a payload in the `body` argument's `"doc"` field: + +```js +var result = variables.model.getTermVectors( + indexName = "books", + body = { + "doc" : { + "title" : "The Lord of the Rings: The Fellowship of the Ring" + } + } +); +``` ## `SearchBuilder` Function Reference From 9c3c1c839b6cc1692d70901ebec53db6bbbc1415 Mon Sep 17 00:00:00 2001 From: Michael Born Date: Mon, 5 Jun 2023 09:13:46 -0400 Subject: [PATCH 04/10] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20Switch=20term=20vec?= =?UTF-8?q?tors=20to=20POST=20request=20due=20to=20request=20body?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes failing test on ACF, evidently because ACF does not support a request body on GET requests. --- models/io/HyperClient.cfc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/io/HyperClient.cfc b/models/io/HyperClient.cfc index af116be..ef327ee 100644 --- a/models/io/HyperClient.cfc +++ b/models/io/HyperClient.cfc @@ -542,7 +542,7 @@ component accessors="true" threadSafe singleton { if ( arguments.id != "" ) { endpoint.append( arguments.id ); } - var vectorRequest = variables.nodePool.newRequest( arrayToList( endpoint, "/" ), "GET" ); + var vectorRequest = variables.nodePool.newRequest( arrayToList( endpoint, "/" ), "POST" ); return vectorRequest .setBody( getUtil().toJSON( arguments.body ) ) From 4a1cfe4e0c8afa23f7005c8d5e642ae1c02b2f5e Mon Sep 17 00:00:00 2001 From: Michael Born Date: Wed, 7 Jun 2023 12:00:35 -0400 Subject: [PATCH 05/10] =?UTF-8?q?=F0=9F=93=A6=20NEW:=20Add=20searchBuilder?= =?UTF-8?q?=20passthrough=20method=20for=20term=20vectors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Searching/Search.md | 39 +++++++++++- models/SearchBuilder.cfc | 25 ++++++++ .../tests/specs/unit/HyperClientTest.cfc | 2 +- .../tests/specs/unit/SearchBuilderTest.cfc | 62 +++++++++++++++++++ 4 files changed, 124 insertions(+), 4 deletions(-) diff --git a/docs/Searching/Search.md b/docs/Searching/Search.md index 73f00a3..01552ee 100644 --- a/docs/Searching/Search.md +++ b/docs/Searching/Search.md @@ -376,7 +376,7 @@ The ["Term Vectors" Elasticsearch API](https://www.elastic.co/guide/en/elasticse To retrieve term vectors for a known document ID: ```js -var result = variables.model.getTermVectors( +var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors( "books", "book_12345", { "fields" : "title" } @@ -386,7 +386,7 @@ var result = variables.model.getTermVectors( Use the third argument, `params`, to configure the request using the [documented query parameters](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html#docs-termvectors-api-query-params): ```js -var result = variables.model.getTermVectors( +var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors( indexName = "books", id = "book_12345", params = { @@ -401,7 +401,7 @@ var result = variables.model.getTermVectors( If you wish to analyze a payload (not an existing document) you can pass a payload in the `body` argument's `"doc"` field: ```js -var result = variables.model.getTermVectors( +var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors( indexName = "books", body = { "doc" : { @@ -411,6 +411,39 @@ var result = variables.model.getTermVectors( ); ``` +### SearchBuilder Term Vector Fetch + +The SearchBuilder object also offers a `getTermVectors()` method with a more fluent argument syntax: + +```js +var result = getInstance( "SearchBuilder@cbElasticsearch" ) + .new( "books" ) + .getTermVectors( + myDocument._id, + "title,author.name" + ); +``` + +or pass a struct of options for more fine-grained term vector retrieval: + +```js +var result = getInstance( "SearchBuilder@cbElasticsearch" ) + .new( "books" ) + .getTermVectors( + myDocument._id, + "title,author.name", + { + "field_statistics" : false, + "payloads" : false, + "filter" : { + "min_term_freq": 1, + "min_word_length" : "4" + } + } + ); +``` + + ## `SearchBuilder` Function Reference * `new([string index], [string type], [struct properties])` - Populates a new SearchBuilder object. diff --git a/models/SearchBuilder.cfc b/models/SearchBuilder.cfc index 21734fd..f855da6 100644 --- a/models/SearchBuilder.cfc +++ b/models/SearchBuilder.cfc @@ -170,6 +170,31 @@ component accessors="true" { return getClient().deleteByQuery( this ); } + /** + * Request a vector of terms for the given index, document or document ID, and field names + * + * @params struct Struct of query parameters to influence the request. For example: `"offsets": false }` + * @body struct Body payload to send. For example: `{ "filter": { "max_num_terms": 3 } }` + */ + struct function getTermVectors( string id = "", string fields = "", struct options = {} ){ + var args = { + indexName = variables.index, + id = arguments.id, + params = arguments.options, + body = {} + }; + arguments.options[ "fields" ] = arguments.fields; + if ( arguments.options.keyExists( "doc" ) ){ + args.body[ "doc" ] = arguments.options.doc; + arguments.options.delete( "doc" ); + } + if ( arguments.options.keyExists( "filter" ) ){ + args.body[ "filter" ] = arguments.options.filter; + arguments.options.delete( "filter" ); + } + return getClient().getTermVectors( argumentCollection = args ); + } + /** * Backwards compatible setter for max result size * diff --git a/test-harness/tests/specs/unit/HyperClientTest.cfc b/test-harness/tests/specs/unit/HyperClientTest.cfc index af9856b..c4fda75 100644 --- a/test-harness/tests/specs/unit/HyperClientTest.cfc +++ b/test-harness/tests/specs/unit/HyperClientTest.cfc @@ -1031,8 +1031,8 @@ component extends="coldbox.system.testing.BaseTestCase" { { "fields" : "title" } ); - expect( result.keyExists( "term_vectors" ) ).toBeTrue(); expect( result.keyExists( "error" ) ).toBeFalse(); + expect( result.keyExists( "term_vectors" ) ).toBeTrue(); expect( result.term_vectors ).toHaveKey( "title" ); expect( result.term_vectors.title ).toBeStruct() .toHaveKey( "field_statistics" ) diff --git a/test-harness/tests/specs/unit/SearchBuilderTest.cfc b/test-harness/tests/specs/unit/SearchBuilderTest.cfc index 7ae62d9..d84bd12 100644 --- a/test-harness/tests/specs/unit/SearchBuilderTest.cfc +++ b/test-harness/tests/specs/unit/SearchBuilderTest.cfc @@ -1271,6 +1271,68 @@ component extends="coldbox.system.testing.BaseTestCase" { expect( dsl.suggest[ completionNameTwo ].completion.field ).toBe( completionNameTwo ); } ); } ); + + describe( "termVectors", function() { + it( "can get term vectors by document ID", function() { + expect( variables ).toHaveKey( "testIndexName" ); + + // create document and save + var testDocument = { + "_id" : createUUID(), + "title" : "My Test Document", + "createdTime" : dateTimeFormat( now(), "yyyy-mm-dd'T'hh:nn:ssZZ" ) + }; + + var document = getWirebox() + .getInstance( "Document@cbElasticsearch" ) + .new( + variables.testIndexName, + "testdocs", + testDocument + ).save( refresh = true ); + sleep(1000); + var result = variables.model.new( variables.testIndexName ) + .getTermVectors( + testDocument._id, + "title" + ); + + expect( result.keyExists( "error" ) ).toBeFalse(); + expect( result.keyExists( "term_vectors" ) ).toBeTrue(); + debug( result ); + expect( result.term_vectors ).toHaveKey( "title" ); + expect( result.term_vectors.title ).toBeStruct() + .toHaveKey( "field_statistics" ) + .toHaveKey( "terms" ); + }); + it( "can get term vectors by doc payload", function(){ + expect( variables ).toHaveKey( "testIndexName" ); + + // test options + var result = variables.model + .new( variables.testIndexName ) + .getTermVectors( + options = { + "field_statistics" : false, + "payloads" : false, + "doc" : { + "title" : "My test document" + }, + "filter" : { + "min_word_length" : 3 + } + } + ); + + expect( result.keyExists( "error" ) ).toBeFalse(); + expect( result ).toHaveKey( "term_vectors" ); + + // ensure only short terms returned + expect( result.term_vectors.title.terms ) + .toHaveKey( "document" ) + .notToHaveKey( "my" ); + } ); + }); } ); } From 992bcaa37839ff8d84cb253b28ca3cad7b088db8 Mon Sep 17 00:00:00 2001 From: Michael Born Date: Wed, 7 Jun 2023 14:23:13 -0400 Subject: [PATCH 06/10] =?UTF-8?q?=F0=9F=91=8C=20IMPROVE:=20Allow=20array?= =?UTF-8?q?=20of=20fields=20in=20getTermVectors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/SearchBuilder.cfc | 10 +++++++--- models/io/HyperClient.cfc | 1 + test-harness/tests/specs/unit/SearchBuilderTest.cfc | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/models/SearchBuilder.cfc b/models/SearchBuilder.cfc index f855da6..7cd2fb5 100644 --- a/models/SearchBuilder.cfc +++ b/models/SearchBuilder.cfc @@ -173,16 +173,20 @@ component accessors="true" { /** * Request a vector of terms for the given index, document or document ID, and field names * - * @params struct Struct of query parameters to influence the request. For example: `"offsets": false }` - * @body struct Body payload to send. For example: `{ "filter": { "max_num_terms": 3 } }` + * @id Primary key of a document to query term vectors on + * @fields Array or list of fields to pull term vectors on + * @options Any custom query or body parameters. */ - struct function getTermVectors( string id = "", string fields = "", struct options = {} ){ + struct function getTermVectors( string id = "", any fields = "", struct options = {} ){ var args = { indexName = variables.index, id = arguments.id, params = arguments.options, body = {} }; + if ( isArray( arguments.fields ) ) { + arguments.fields = arrayToList( arguments.fields ); + } arguments.options[ "fields" ] = arguments.fields; if ( arguments.options.keyExists( "doc" ) ){ args.body[ "doc" ] = arguments.options.doc; diff --git a/models/io/HyperClient.cfc b/models/io/HyperClient.cfc index ef327ee..3e94bb8 100644 --- a/models/io/HyperClient.cfc +++ b/models/io/HyperClient.cfc @@ -534,6 +534,7 @@ component accessors="true" threadSafe singleton { * Request a vector of terms for the given index, document or document ID, and field names * * @indexName string Index name or alias. + * @id string Document ID to query term vectors on. * @params struct Struct of query parameters to influence the request. For example: `"offsets": false }` * @body struct Body payload to send. For example: `{ "filter": { "max_num_terms": 3 } }` */ diff --git a/test-harness/tests/specs/unit/SearchBuilderTest.cfc b/test-harness/tests/specs/unit/SearchBuilderTest.cfc index d84bd12..f5e0d34 100644 --- a/test-harness/tests/specs/unit/SearchBuilderTest.cfc +++ b/test-harness/tests/specs/unit/SearchBuilderTest.cfc @@ -1312,6 +1312,7 @@ component extends="coldbox.system.testing.BaseTestCase" { var result = variables.model .new( variables.testIndexName ) .getTermVectors( + fields = [ "title" ], options = { "field_statistics" : false, "payloads" : false, From 6f69d176762f275edff369c3f3edf60ead7ef5ad Mon Sep 17 00:00:00 2001 From: Michael Born Date: Thu, 8 Jun 2023 11:50:35 -0400 Subject: [PATCH 07/10] =?UTF-8?q?=F0=9F=91=8C=20IMPROVE:=20Send=20fields?= =?UTF-8?q?=20as=20array=20in=20body=20for=20getTermVectors()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- models/SearchBuilder.cfc | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/models/SearchBuilder.cfc b/models/SearchBuilder.cfc index 7cd2fb5..eee4a94 100644 --- a/models/SearchBuilder.cfc +++ b/models/SearchBuilder.cfc @@ -175,27 +175,20 @@ component accessors="true" { * * @id Primary key of a document to query term vectors on * @fields Array or list of fields to pull term vectors on - * @options Any custom query or body parameters. + * @options Any custom parameters to send with the request. */ struct function getTermVectors( string id = "", any fields = "", struct options = {} ){ + if ( !isArray( arguments.fields ) ) { + arguments.fields = listToArray( arguments.fields ); + } var args = { indexName = variables.index, id = arguments.id, - params = arguments.options, - body = {} + params = {}, + body = arguments.options }; - if ( isArray( arguments.fields ) ) { - arguments.fields = arrayToList( arguments.fields ); - } - arguments.options[ "fields" ] = arguments.fields; - if ( arguments.options.keyExists( "doc" ) ){ - args.body[ "doc" ] = arguments.options.doc; - arguments.options.delete( "doc" ); - } - if ( arguments.options.keyExists( "filter" ) ){ - args.body[ "filter" ] = arguments.options.filter; - arguments.options.delete( "filter" ); - } + args.body[ "fields" ] = arguments.fields; + return getClient().getTermVectors( argumentCollection = args ); } From 2992160b4c2f838667251ec44d9c2770f65b3873 Mon Sep 17 00:00:00 2001 From: Michael Born Date: Thu, 8 Jun 2023 12:06:06 -0400 Subject: [PATCH 08/10] =?UTF-8?q?=F0=9F=91=8C=20IMPROVE:=20GetTermVectors(?= =?UTF-8?q?)=20improvements=20per=20Jon?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Searching/Search.md | 39 ++++++------------- models/SearchBuilder.cfc | 12 +----- models/io/HyperClient.cfc | 12 ++++-- .../tests/specs/unit/HyperClientTest.cfc | 6 +-- 4 files changed, 24 insertions(+), 45 deletions(-) diff --git a/docs/Searching/Search.md b/docs/Searching/Search.md index 01552ee..ce3e21b 100644 --- a/docs/Searching/Search.md +++ b/docs/Searching/Search.md @@ -373,37 +373,40 @@ The ["Term Vectors" Elasticsearch API](https://www.elastic.co/guide/en/elasticse ### Retrieving Term Vectors By Document ID -To retrieve term vectors for a known document ID: +To retrieve term vectors for a known document ID, pass the index name, id, and an array or list of fields to pull from: ```js var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors( "books", "book_12345", - { "fields" : "title" } + [ "title" ] ); ``` -Use the third argument, `params`, to configure the request using the [documented query parameters](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html#docs-termvectors-api-query-params): +You can fine-tune the request using the `options` argument: ```js var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors( indexName = "books", id = "book_12345", - params = { + options = { "fields" : "title", "min_word_length" : 4 } ); ``` +See the [query parameters](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html#docs-termvectors-api-query-params) documentation for more configuration options. + ### Retrieving Term Vectors By Payload -If you wish to analyze a payload (not an existing document) you can pass a payload in the `body` argument's `"doc"` field: +If you wish to analyze a payload (not an existing document) you can pass a `"doc"` payload in the `options` argument: ```js var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors( indexName = "books", - body = { + fields = [ "title" ], + options = { "doc" : { "title" : "The Lord of the Rings: The Fellowship of the Ring" } @@ -413,37 +416,17 @@ var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors( ### SearchBuilder Term Vector Fetch -The SearchBuilder object also offers a `getTermVectors()` method with a more fluent argument syntax: - -```js -var result = getInstance( "SearchBuilder@cbElasticsearch" ) - .new( "books" ) - .getTermVectors( - myDocument._id, - "title,author.name" - ); -``` - -or pass a struct of options for more fine-grained term vector retrieval: +The SearchBuilder object also offers a `getTermVectors()` method for convenience: ```js var result = getInstance( "SearchBuilder@cbElasticsearch" ) .new( "books" ) .getTermVectors( myDocument._id, - "title,author.name", - { - "field_statistics" : false, - "payloads" : false, - "filter" : { - "min_term_freq": 1, - "min_word_length" : "4" - } - } + [ "title,author.name" ] ); ``` - ## `SearchBuilder` Function Reference * `new([string index], [string type], [struct properties])` - Populates a new SearchBuilder object. diff --git a/models/SearchBuilder.cfc b/models/SearchBuilder.cfc index eee4a94..8af1dbc 100644 --- a/models/SearchBuilder.cfc +++ b/models/SearchBuilder.cfc @@ -178,16 +178,8 @@ component accessors="true" { * @options Any custom parameters to send with the request. */ struct function getTermVectors( string id = "", any fields = "", struct options = {} ){ - if ( !isArray( arguments.fields ) ) { - arguments.fields = listToArray( arguments.fields ); - } - var args = { - indexName = variables.index, - id = arguments.id, - params = {}, - body = arguments.options - }; - args.body[ "fields" ] = arguments.fields; + var args = arguments; + args.indexName = variables.index; return getClient().getTermVectors( argumentCollection = args ); } diff --git a/models/io/HyperClient.cfc b/models/io/HyperClient.cfc index 3e94bb8..b9f3e5b 100644 --- a/models/io/HyperClient.cfc +++ b/models/io/HyperClient.cfc @@ -536,9 +536,14 @@ component accessors="true" threadSafe singleton { * @indexName string Index name or alias. * @id string Document ID to query term vectors on. * @params struct Struct of query parameters to influence the request. For example: `"offsets": false }` - * @body struct Body payload to send. For example: `{ "filter": { "max_num_terms": 3 } }` + * @options struct Body payload to send. For example: `{ "filter": { "max_num_terms": 3 } }` */ - struct function getTermVectors( required string indexName, string id = "", struct params = {}, struct body = {} ){ + struct function getTermVectors( required string indexName, string id = "", any fields = [], struct options = {} ){ + arguments.options[ "fields" ] = arguments.fields; + if ( !isArray( arguments.options["fields"] ) ) { + arguments.options["fields"] = listToArray( arguments.options["fields"] ); + } + var endpoint = [arguments.indexName, "_termvectors" ]; if ( arguments.id != "" ) { endpoint.append( arguments.id ); @@ -546,8 +551,7 @@ component accessors="true" threadSafe singleton { var vectorRequest = variables.nodePool.newRequest( arrayToList( endpoint, "/" ), "POST" ); return vectorRequest - .setBody( getUtil().toJSON( arguments.body ) ) - .withQueryParams( arguments.params ) + .setBody( getUtil().toJSON( arguments.options ) ) .send() .json(); } diff --git a/test-harness/tests/specs/unit/HyperClientTest.cfc b/test-harness/tests/specs/unit/HyperClientTest.cfc index c4fda75..714b21e 100644 --- a/test-harness/tests/specs/unit/HyperClientTest.cfc +++ b/test-harness/tests/specs/unit/HyperClientTest.cfc @@ -1028,9 +1028,9 @@ component extends="coldbox.system.testing.BaseTestCase" { var result = variables.model.getTermVectors( variables.testIndexName, testDocument._id, - { "fields" : "title" } + "title" ); - +debug( result ); expect( result.keyExists( "error" ) ).toBeFalse(); expect( result.keyExists( "term_vectors" ) ).toBeTrue(); expect( result.term_vectors ).toHaveKey( "title" ); @@ -1044,7 +1044,7 @@ component extends="coldbox.system.testing.BaseTestCase" { // test options var result = variables.model.getTermVectors( indexName = variables.testIndexName, - body = { + options = { "doc" : { "title" : "My test document" }, From 477dbc9739443fcf516da2f325d1268276c69ccf Mon Sep 17 00:00:00 2001 From: Michael Born Date: Thu, 8 Jun 2023 13:29:40 -0400 Subject: [PATCH 09/10] Add 3.2.0 changelog --- changelog.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelog.md b/changelog.md index 417a8a0..addfb22 100644 --- a/changelog.md +++ b/changelog.md @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ---- ##[Unreleased] +## [3.2.0] - 06-08-2023 +### Added +* [Added `getTermsVector` to SearchBuilder and Client](https://cbelasticsearch.ortusbooks.com/searching/search#term-vectors) to allow for fetching term vectors on document field(s) + ## [3.1.2] - 05-09-2023 ### Fixed * Added additional error handling and failover to Logstash appender to prevent ES communication from failing to bring an application online From ec44463946b9bc902f4059b0ac0fa574ee51a0c1 Mon Sep 17 00:00:00 2001 From: Michael Born Date: Thu, 8 Jun 2023 13:31:54 -0400 Subject: [PATCH 10/10] Fix typo in 3.2.0 changelog note for "getTermVectors" --- changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index addfb22..2222e56 100644 --- a/changelog.md +++ b/changelog.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [3.2.0] - 06-08-2023 ### Added -* [Added `getTermsVector` to SearchBuilder and Client](https://cbelasticsearch.ortusbooks.com/searching/search#term-vectors) to allow for fetching term vectors on document field(s) +* [Added `getTermVectors` to SearchBuilder and Client](https://cbelasticsearch.ortusbooks.com/searching/search#term-vectors) to allow for fetching term vectors on document field(s) ## [3.1.2] - 05-09-2023 ### Fixed