Skip to content

Commit

Permalink
Merge pull request #127 from coldbox-modules/development
Browse files Browse the repository at this point in the history
Release Merge
  • Loading branch information
jclausen authored Jun 8, 2023
2 parents adc3fcc + ec44463 commit b63f9ac
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 9 deletions.
2 changes: 1 addition & 1 deletion box.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name":"Elasticsearch for the Coldbox Framework",
"author":"Ortus Solutions <[email protected]",
"location":"https://downloads.ortussolutions.com/ortussolutions/coldbox-modules/cbelasticsearch/@build.version@/[email protected]@[email protected]@.zip",
"version":"3.1.2",
"version":"3.1.3",
"slug":"cbelasticsearch",
"type":"modules",
"homepage":"https://cbelasticsearch.ortusbooks.com",
Expand Down
6 changes: 6 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

----
##[Unreleased]

## [3.2.0] - 06-08-2023
### Added
* [Added `getTermVectors` to SearchBuilder and Client](https://cbelasticsearch.ortusbooks.com/searching/search#term-vectors) to allow for fetching term vectors on document field(s)

## [3.1.2] - 05-09-2023
### Fixed
* Added additional error handling and failover to Logstash appender to prevent ES communication from failing to bring an application online
Expand Down
75 changes: 67 additions & 8 deletions docs/Searching/Search.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,20 @@ To output the results of our search, we would use a loop, accessing the `Documen

```js
for( var resultDocument in searchResults.getHits() ){
var resultScore = resultDocument.getScore();
var documentMemento = resultDocument.getMemento();
var bookName = documentMemento.name;
var bookDescription = documentMemento.description;
var resultScore = resultDocument.getScore();
var documentMemento = resultDocument.getMemento();
var bookName = documentMemento.name;
var bookDescription = documentMemento.description;
}
```

The "memento" is our structural representation of the document. We can also use the built-in method of the Document object:

```js
for( var resultDocument in searchResults.getHits() ){
var resultScore = resultDocument.getScore();
var bookName = resultDocument.getValue( "name" );
var bookDescription = resultDoument.getValue( "description" );
var resultScore = resultDocument.getScore();
var bookName = resultDocument.getValue( "name" );
var bookDescription = resultDoument.getValue( "description" );
}
```

Expand Down Expand Up @@ -249,7 +249,7 @@ var response = getInstance( "SearchBuilder@cbElasticsearch" )
// Body parameter: return a relevance score for each document, despite our custom sort
.bodyParam( "track_scores", true );
// Body parameter: filter by minimum relevance score
.bodyParam( "min_score", 3 )
.bodyParam( "min_score", 3 )
// run the search
.execute();
```
Expand Down Expand Up @@ -367,6 +367,65 @@ var terms = getInstance( "HyperClient@cbElasticsearch" )
} );
```

## Term Vectors

The ["Term Vectors" Elasticsearch API](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html) allows you to retrieve information and statistics for terms in a specific document field. This could be useful for finding the most common term in a book description, or retrieving all terms with a minimum word length from the book title.

### Retrieving Term Vectors By Document ID

To retrieve term vectors for a known document ID, pass the index name, id, and an array or list of fields to pull from:

```js
var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors(
"books",
"book_12345",
[ "title" ]
);
```

You can fine-tune the request using the `options` argument:

```js
var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors(
indexName = "books",
id = "book_12345",
options = {
"fields" : "title",
"min_word_length" : 4
}
);
```

See the [query parameters](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-termvectors.html#docs-termvectors-api-query-params) documentation for more configuration options.

### Retrieving Term Vectors By Payload

If you wish to analyze a payload (not an existing document) you can pass a `"doc"` payload in the `options` argument:

```js
var result = getInstance( "HyperClient@cbElasticsearch" ).getTermVectors(
indexName = "books",
fields = [ "title" ],
options = {
"doc" : {
"title" : "The Lord of the Rings: The Fellowship of the Ring"
}
}
);
```

### SearchBuilder Term Vector Fetch

The SearchBuilder object also offers a `getTermVectors()` method for convenience:

```js
var result = getInstance( "SearchBuilder@cbElasticsearch" )
.new( "books" )
.getTermVectors(
myDocument._id,
[ "title,author.name" ]
);
```

## `SearchBuilder` Function Reference

Expand Down
14 changes: 14 additions & 0 deletions models/SearchBuilder.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,20 @@ component accessors="true" {
return getClient().deleteByQuery( this );
}

/**
* Request a vector of terms for the given index, document or document ID, and field names
*
* @id Primary key of a document to query term vectors on
* @fields Array or list of fields to pull term vectors on
* @options Any custom parameters to send with the request.
*/
struct function getTermVectors( string id = "", any fields = "", struct options = {} ){
var args = arguments;
args.indexName = variables.index;

return getClient().getTermVectors( argumentCollection = args );
}

/**
* Backwards compatible setter for max result size
*
Expand Down
26 changes: 26 additions & 0 deletions models/io/HyperClient.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,32 @@ component accessors="true" threadSafe singleton {
.json();
}

/**
* Request a vector of terms for the given index, document or document ID, and field names
*
* @indexName string Index name or alias.
* @id string Document ID to query term vectors on.
* @params struct Struct of query parameters to influence the request. For example: `"offsets": false }`
* @options struct Body payload to send. For example: `{ "filter": { "max_num_terms": 3 } }`
*/
struct function getTermVectors( required string indexName, string id = "", any fields = [], struct options = {} ){
arguments.options[ "fields" ] = arguments.fields;
if ( !isArray( arguments.options["fields"] ) ) {
arguments.options["fields"] = listToArray( arguments.options["fields"] );
}

var endpoint = [arguments.indexName, "_termvectors" ];
if ( arguments.id != "" ) {
endpoint.append( arguments.id );
}
var vectorRequest = variables.nodePool.newRequest( arrayToList( endpoint, "/" ), "POST" );

return vectorRequest
.setBody( getUtil().toJSON( arguments.options ) )
.send()
.json();
}

/**
* Returns a struct containing all indices in the system, with statistics
*
Expand Down
57 changes: 57 additions & 0 deletions test-harness/tests/specs/unit/HyperClientTest.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,63 @@ component extends="coldbox.system.testing.BaseTestCase" {
expect( refreshResult._shards.total ).toBe( 0 );
} );

describe( "termVectors", function() {
it( "can get term vectors by document ID", function() {
expect( variables ).toHaveKey( "testIndexName" );

// create document and save
var testDocument = {
"_id" : createUUID(),
"title" : "My Test Document",
"createdTime" : dateTimeFormat( now(), "yyyy-mm-dd'T'hh:nn:ssZZ" )
};

var document = getWirebox()
.getInstance( "Document@cbElasticsearch" )
.new(
variables.testIndexName,
"_doc",
testDocument
).save( refresh = true );
var result = variables.model.getTermVectors(
variables.testIndexName,
testDocument._id,
"title"
);
debug( result );
expect( result.keyExists( "error" ) ).toBeFalse();
expect( result.keyExists( "term_vectors" ) ).toBeTrue();
expect( result.term_vectors ).toHaveKey( "title" );
expect( result.term_vectors.title ).toBeStruct()
.toHaveKey( "field_statistics" )
.toHaveKey( "terms" );
});
it( "can get term vectors by doc payload", function(){
expect( variables ).toHaveKey( "testIndexName" );

// test options
var result = variables.model.getTermVectors(
indexName = variables.testIndexName,
options = {
"doc" : {
"title" : "My test document"
},
"filter" : {
"min_word_length" : 3
}
}
);

expect( result.keyExists( "error" ) ).toBeFalse();
expect( result ).toHaveKey( "term_vectors" );

// ensure only short terms returned
expect( result.term_vectors.title.terms )
.toHaveKey( "document" )
.notToHaveKey( "my" );
} );
});

it( "Tests getIndexStats method ", function(){
expect( variables ).toHaveKey( "testIndexName" );

Expand Down
63 changes: 63 additions & 0 deletions test-harness/tests/specs/unit/SearchBuilderTest.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,69 @@ component extends="coldbox.system.testing.BaseTestCase" {
expect( dsl.suggest[ completionNameTwo ].completion.field ).toBe( completionNameTwo );
} );
} );

describe( "termVectors", function() {
it( "can get term vectors by document ID", function() {
expect( variables ).toHaveKey( "testIndexName" );

// create document and save
var testDocument = {
"_id" : createUUID(),
"title" : "My Test Document",
"createdTime" : dateTimeFormat( now(), "yyyy-mm-dd'T'hh:nn:ssZZ" )
};

var document = getWirebox()
.getInstance( "Document@cbElasticsearch" )
.new(
variables.testIndexName,
"testdocs",
testDocument
).save( refresh = true );
sleep(1000);
var result = variables.model.new( variables.testIndexName )
.getTermVectors(
testDocument._id,
"title"
);

expect( result.keyExists( "error" ) ).toBeFalse();
expect( result.keyExists( "term_vectors" ) ).toBeTrue();
debug( result );
expect( result.term_vectors ).toHaveKey( "title" );
expect( result.term_vectors.title ).toBeStruct()
.toHaveKey( "field_statistics" )
.toHaveKey( "terms" );
});
it( "can get term vectors by doc payload", function(){
expect( variables ).toHaveKey( "testIndexName" );

// test options
var result = variables.model
.new( variables.testIndexName )
.getTermVectors(
fields = [ "title" ],
options = {
"field_statistics" : false,
"payloads" : false,
"doc" : {
"title" : "My test document"
},
"filter" : {
"min_word_length" : 3
}
}
);

expect( result.keyExists( "error" ) ).toBeFalse();
expect( result ).toHaveKey( "term_vectors" );

// ensure only short terms returned
expect( result.term_vectors.title.terms )
.toHaveKey( "document" )
.notToHaveKey( "my" );
} );
});
} );
}

Expand Down

0 comments on commit b63f9ac

Please sign in to comment.