From 1259263fe4c3b42f40945d4b3f5b7055275ba067 Mon Sep 17 00:00:00 2001 From: "david.blasby" Date: Wed, 9 Oct 2024 10:44:17 -0700 Subject: [PATCH] search via queryables --- .gitignore | 3 + gn_checks.xml | 332 ++++++++++++++++ .../records/controller/ItemApiController.java | 83 ++-- .../ogcapi/records/controller/Query.java | 241 ++++++++++++ .../controller/QueryableApiController.java | 10 +- .../ogcapi/records/model/GnElasticInfo.java | 148 ++++++++ .../geonet/ogcapi/records/model/JsonItem.java | 5 + .../ogcapi/records/model/JsonProperty.java | 53 ++- .../ogcapi/records/model/JsonSchema.java | 8 +- .../ogcapi/records/service/QueryBuilder.java | 115 ++++++ .../records/service/QueryToElastic.java | 356 ++++++++++++++++++ .../records/service/QueryablesService.java | 84 ++++- .../util/ElasticIndexJson2CollectionInfo.java | 5 + .../records/util/RecordsEsQueryBuilder.java | 53 ++- .../main/resources/queryables/queryables.json | 277 ++++++++++++++ pom.xml | 2 +- 16 files changed, 1677 insertions(+), 98 deletions(-) create mode 100644 gn_checks.xml create mode 100644 modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/Query.java create mode 100644 modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/GnElasticInfo.java create mode 100644 modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryBuilder.java create mode 100644 modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryToElastic.java create mode 100644 modules/services/ogc-api-records/src/main/resources/queryables/queryables.json diff --git a/.gitignore b/.gitignore index 512e9d5d..82017323 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,6 @@ build/ ### VS Code ### .vscode/ + +## OS X +.DS_Store diff --git a/gn_checks.xml b/gn_checks.xml new file mode 100644 index 00000000..25218acf --- /dev/null +++ b/gn_checks.xml @@ -0,0 +1,332 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/ItemApiController.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/ItemApiController.java index 57cea862..26f26c59 100644 --- a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/ItemApiController.java +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/ItemApiController.java @@ -5,10 +5,7 @@ package org.fao.geonet.ogcapi.records.controller; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; import io.swagger.annotations.Api; import io.swagger.annotations.ApiParam; import io.swagger.v3.oas.annotations.responses.ApiResponse; @@ -54,6 +51,7 @@ import org.fao.geonet.ogcapi.records.model.Item; import org.fao.geonet.ogcapi.records.model.XsltModel; import org.fao.geonet.ogcapi.records.service.CollectionService; +import org.fao.geonet.ogcapi.records.service.QueryBuilder; import org.fao.geonet.ogcapi.records.service.RecordService; import org.fao.geonet.ogcapi.records.util.MediaTypeUtil; import org.fao.geonet.ogcapi.records.util.RecordsEsQueryBuilder; @@ -87,6 +85,7 @@ @Slf4j(topic = "org.fao.geonet.ogcapi.records") public class ItemApiController { + public static final String EXCEPTION_COLLECTION_NOT_FOUND = "ogcapir.exception.collection.notFound"; public static final String EXCEPTION_COLLECTION_ITEM_NOT_FOUND = @@ -114,10 +113,11 @@ public class ItemApiController { DcatConverter dcatConverter; @Autowired RecordService recordService; + @Autowired + QueryBuilder queryBuilder; /** * Describe a collection item. - * */ @io.swagger.v3.oas.annotations.Operation( summary = "Describe a collection item.", @@ -143,7 +143,7 @@ public ResponseEntity collectionsCollectionIdItemsRecordIdGet( @ApiParam(value = "Identifier (name) of a specific collection", required = true) @PathVariable("collectionId") String collectionId, @ApiParam(value = "Identifier (name) of a specific record", required = true) - @PathVariable("recordId")String recordId, + @PathVariable("recordId") String recordId, @ApiIgnore HttpServletRequest request, @ApiIgnore HttpServletResponse response, @ApiIgnore Model model) { @@ -165,7 +165,6 @@ public ResponseEntity collectionsCollectionIdItemsRecordIdGet( MediaType mediaType = mediaTypeUtil.calculatePriorityMediaTypeFromRequest(request, allowedMediaTypes); - if (mediaType.equals(MediaType.APPLICATION_JSON) || mediaType.equals(GnMediaType.APPLICATION_GEOJSON)) { try { @@ -201,7 +200,6 @@ public ResponseEntity collectionsCollectionIdItemsRecordIdGet( /** * Describe the collection items. - * */ @io.swagger.v3.oas.annotations.Operation( summary = "Describe the collection items.", @@ -248,6 +246,8 @@ public ResponseEntity collectionsCollectionIdItemsGet( @ApiParam(value = "") @RequestParam(value = "externalids", required = false) List externalids, + @RequestParam(value = "ids", required = false) + List ids, @ApiParam(value = "") @RequestParam(value = "sortby", required = false) List sortby, @@ -255,6 +255,18 @@ public ResponseEntity collectionsCollectionIdItemsGet( @ApiIgnore HttpServletResponse response, @ApiIgnore Model model) throws Exception { + var query = queryBuilder.buildFromRequest(collectionId, + bbox, + datetime, + limit, + startindex, + type, + q, + ids, + externalids, + sortby, + request.getParameterMap() + ); List allowedMediaTypes = ListUtils.union(MediaTypeUtil.defaultSupportedMediaTypes, Arrays.asList( @@ -276,15 +288,14 @@ public ResponseEntity collectionsCollectionIdItemsGet( boolean allSourceFields = mediaType.equals(GnMediaType.APPLICATION_DCAT2_XML) - || mediaType.equals(GnMediaType.APPLICATION_RDF_XML); + || mediaType.equals(GnMediaType.APPLICATION_RDF_XML); return collectionsCollectionIdItemsGetInternal( - collectionId, bbox, datetime, limit, startindex, type, q, externalids, sortby, + query, request, response, allSourceFields); } else { - return collectionsCollectionIdItemsGetAsHtml(collectionId, bbox, datetime, limit, - startindex, type, q, externalids, sortby, request, response, model); + return collectionsCollectionIdItemsGetAsHtml(query, request, response, model); } } @@ -473,7 +484,7 @@ private ResponseEntity collectionsCollectionIdItemsRecordIdGetAsHtml( private List setDefaultRssSortBy(List sortby, HttpServletRequest request) { boolean isRss = "rss".equals(request.getParameter("f")) || (request.getHeader(HttpHeaders.ACCEPT) != null - && request.getHeader(HttpHeaders.ACCEPT).contains(MediaType.APPLICATION_RSS_XML_VALUE)); + && request.getHeader(HttpHeaders.ACCEPT).contains(MediaType.APPLICATION_RSS_XML_VALUE)); if (isRss && (sortby == null || sortby.isEmpty())) { sortby = new ArrayList<>(); @@ -484,18 +495,10 @@ private List setDefaultRssSortBy(List sortby, HttpServletRequest private String search( - String collectionId, - List bbox, - String datetime, - Integer limit, - Integer startindex, - String type, - List q, - List externalids, - List sortby, + Query requestQuery, HttpServletRequest request, boolean allSourceFields) { - Source source = collectionService.retrieveSourceForCollection(collectionId); + Source source = collectionService.retrieveSourceForCollection(requestQuery.getCollectionId()); if (source == null) { throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Unable to find collection"); @@ -503,8 +506,7 @@ private String search( String collectionFilter = collectionService.retrieveCollectionFilter(source, false); String query = recordsEsQueryBuilder - .buildQuery(q, externalids, bbox, - startindex, limit, collectionFilter, sortby, + .buildQuery(requestQuery, collectionFilter, allSourceFields ? Set.of("*") : null); try { return proxy.searchAndGetResult(request.getSession(), request, query, null); @@ -519,23 +521,14 @@ private String search( private ResponseEntity collectionsCollectionIdItemsGetInternal( - String collectionId, - List bbox, - String datetime, - Integer limit, - Integer startindex, - String type, - List q, - List externalids, - List sortby, + Query query, HttpServletRequest request, HttpServletResponse response, boolean allSourceFields) { - sortby = setDefaultRssSortBy(sortby, request); + query.setSortBy(setDefaultRssSortBy(query.getSortBy(), request)); - String queryResponse = search(collectionId, bbox, datetime, limit, startindex, type, q, - externalids, sortby, request, allSourceFields); + String queryResponse = search(query, request, allSourceFields); try { streamResult(response, queryResponse, getResponseContentType(request)); @@ -551,22 +544,14 @@ private ResponseEntity collectionsCollectionIdItemsGetInternal( * Collection items as HTML. */ private ResponseEntity collectionsCollectionIdItemsGetAsHtml( - String collectionId, - List bbox, - String datetime, - Integer limit, - Integer startindex, - String type, - List q, - List externalids, - List sortby, + Query requestQuery, HttpServletRequest request, HttpServletResponse response, Model model) throws Exception { Locale locale = LocaleContextHolder.getLocale(); String language = locale.getISO3Language(); - Source source = collectionService.retrieveSourceForCollection(collectionId); + Source source = collectionService.retrieveSourceForCollection(requestQuery.getCollectionId()); if (source == null) { throw new ResponseStatusException(HttpStatus.NOT_FOUND, "Unable to find collection"); @@ -574,7 +559,7 @@ private ResponseEntity collectionsCollectionIdItemsGetAsHtml( String collectionFilter = collectionService.retrieveCollectionFilter(source, false); String query = recordsEsQueryBuilder - .buildQuery(q, externalids, bbox, startindex, limit, collectionFilter, sortby, null); + .buildQuery(requestQuery, collectionFilter, null); EsSearchResults results = new EsSearchResults(); try { @@ -588,10 +573,10 @@ private ResponseEntity collectionsCollectionIdItemsGetAsHtml( XsltModel modelSource = new XsltModel(); Map parameterMap = new HashMap<>(request.getParameterMap()); if (request.getParameter("limit") == null) { - parameterMap.put("limit", new String[]{limit + ""}); + parameterMap.put("limit", new String[]{requestQuery.getLimit() + ""}); } if (request.getParameter("startindex") == null) { - parameterMap.put("startindex", new String[]{startindex + ""}); + parameterMap.put("startindex", new String[]{requestQuery.getStartIndex() + ""}); } modelSource.setRequestParameters(parameterMap); modelSource.setCollection(source); diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/Query.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/Query.java new file mode 100644 index 00000000..13c80094 --- /dev/null +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/Query.java @@ -0,0 +1,241 @@ +/** + * (c) 2024 Open Source Geospatial Foundation - all rights reserved This code is licensed under the + * GPL 2.0 license, available at the root application directory. + */ + +package org.fao.geonet.ogcapi.records.controller; + +import java.util.List; +import java.util.Map; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; + +/** + * Represents an ogcapi records query. + * + *

see https://docs.ogc.org/DRAFTS/20-004.htm + */ +@XmlAccessorType(XmlAccessType.FIELD) +public class Query { + + /** + * A bounding box. If the spatial extent of the record intersects the specified bounding box, then + * the record shall be presented in the response document. + * + *

The bounding box SHALL be provided as four or six numbers, depending on whether the + * coordinate reference system includes a vertical axis (height or depth). + * + *

If the bounding box consists of four numbers, the coordinate reference system of the + * values SHALL be interpreted as WGS 84 longitude/latitude (http://www.opengis.net/def/crs/OGC/1.3/CRS84) + * unless a different coordinate reference system is specified in a parameter bbox-crs. + * + *

If the bounding box consists of six numbers, the coordinate reference system of the values + * SHALL be interpreted as WGS 84 longitude/latitude/ellipsoidal height + * (http://www.opengis.net/def/crs/OGC/0/CRS84h) unless a different coordinate reference system is + * specified in a parameter bbox-crs. + */ + public List bbox; + + /** + * CRS of the bbox. + * + *

NOTE: spec does NOT define this field, but it is implied (cf bbox). + */ + public String bboxCrs; + + + /** + * A time instance or time period. If the temporal extent of the record intersects the specified + * date/time value, then the record shall be presented in the response document. + */ + public String datetime; + + /** + * The number of records to be presented in a response document. + * + *

1-10,000 default=10 (spec says this may change) + * + *

If the value of the limit parameter is larger than the maximum value, this SHALL NOT + * result in an error (instead use the maximum as the parameter value). + * + *

Only items are counted that are on the first level of the collection. + * Any nested objects contained within the explicitly requested items SHALL not be counted. + */ + public Integer limit = 10; + + /** + * This is not in the specification, however, its implied in the examples and needed for paging. + */ + public Integer startIndex = 0; + + /** + * A comma-separated list of search terms. If any server-chosen text field in the record contains + * 1 or more of the terms listed, then this record shall appear in the response set. + * + *

Search terms that may appear together (logical OR) in a record SHALL be separated by + * literal commas. + * + *

Search terms that must appear together, and in the order specified, in a record SHALL be + * separated by one or more white space characters. + * + *

Keyword searches using the q parameter SHALL be case insensitive. + * + *

The specific set of text keys/fields/properties of a record to which the q operator is + * applied SHALL be left to the discretion of the implementation. + */ + @SuppressWarnings("checkstyle:membername") //single letter var name defined by OGCAPI query + public List q; + + /** + * An equality predicate consisting of a comma-separated list of resource types. Only records of + * the listed type shall appear in the response set. + * + *

The definition of the type parameter SHOULD be extended to enumerate the list of known + * record or resource types. + */ + public String type; + + + /** + * An equality predicate consisting of a comma-separated list of record identifiers. Only records + * with the specified identifiers shall appear in the response set. + */ + public List ids; + + /** + * An equality predicate consisting of a comma-separated list of external resource identifiers. + * Only records with the specified external identifiers shall appear in the response set. + * + *

pattern: ([^:]+:)?[^:]+ + * + *

If the search value is qualified with a scheme then both the scheme and the value of the + * record’s external identifier SHALL match in order for the record to be in the result set. + * + *

If the search value is not qualified with a scheme then only the value of the record’s + * external identifier SHALL match in order for the record to be in the result set. + * + *

If the search value is only the scheme then the scheme component of the record’s external + * identified SHALL match in order for the record to be in the result set. + */ + public List externalIds; + + /** + * Equality predicates with any queryable not already listed in this table. + * + *

Implied is that the keys MUST BE listed in the Queryables + */ + public Map propValues; + + + /** + * This is NOT part of the ogcapi query specification. But, it is implied because you must do a + * query against a collection. + */ + public String collectionId; + + /** + * How to order the results. + * + *

This is not in the query spec - cf. sorting + */ + public List sortBy; + + //--------------------------------------------------------- + + + public List getBbox() { + return bbox; + } + + public void setBbox(List bbox) { + this.bbox = bbox; + } + + public String getBboxCrs() { + return bboxCrs; + } + + public void setBboxCrs(String bboxCrs) { + this.bboxCrs = bboxCrs; + } + + public String getDatetime() { + return datetime; + } + + public void setDatetime(String datetime) { + this.datetime = datetime; + } + + public Integer getLimit() { + return limit; + } + + public void setLimit(Integer limit) { + this.limit = limit; + } + + public Integer getStartIndex() { + return startIndex; + } + + public void setStartIndex(Integer startIndex) { + this.startIndex = startIndex; + } + + public List getQ() { + return q; + } + + public void setQ(List q) { + this.q = q; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public List getIds() { + return ids; + } + + public void setIds(List ids) { + this.ids = ids; + } + + public List getExternalIds() { + return externalIds; + } + + public void setExternalIds(List externalIds) { + this.externalIds = externalIds; + } + + public Map getPropValues() { + return propValues; + } + + public void setPropValues(Map propValues) { + this.propValues = propValues; + } + + public String getCollectionId() { + return collectionId; + } + + public void setCollectionId(String collectionId) { + this.collectionId = collectionId; + } + + public List getSortBy() { + return sortBy; + } + + public void setSortBy(List sortBy) { + this.sortBy = sortBy; + } +} diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/QueryableApiController.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/QueryableApiController.java index 7accc200..b1157c68 100644 --- a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/QueryableApiController.java +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/controller/QueryableApiController.java @@ -1,3 +1,8 @@ +/** + * (c) 2024 Open Source Geospatial Foundation - all rights reserved This code is licensed under the + * GPL 2.0 license, available at the root application directory. + */ + package org.fao.geonet.ogcapi.records.controller; import io.swagger.annotations.Api; @@ -8,6 +13,7 @@ import javax.servlet.http.HttpServletResponse; import lombok.extern.slf4j.Slf4j; import org.fao.geonet.ogcapi.records.model.JsonSchema; +import org.fao.geonet.ogcapi.records.service.QueryToElastic; import org.fao.geonet.ogcapi.records.service.QueryablesService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.HttpStatus; @@ -33,6 +39,9 @@ public class QueryableApiController { @Autowired QueryablesService queryablesService; + @Autowired + QueryToElastic queryToElastic; + /** * Describe queryables for a collection. */ @@ -57,7 +66,6 @@ public ResponseEntity queryablesForCollection( @ApiIgnore Model model) throws Exception { var jsonSchema = queryablesService.buildQueryables(collectionId); - return ResponseEntity.ok(jsonSchema); } } diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/GnElasticInfo.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/GnElasticInfo.java new file mode 100644 index 00000000..ae08ea67 --- /dev/null +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/GnElasticInfo.java @@ -0,0 +1,148 @@ +/** + * (c) 2024 Open Source Geospatial Foundation - all rights reserved This code is licensed under the + * GPL 2.0 license, available at the root application directory. + */ + +package org.fao.geonet.ogcapi.records.model; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.JsonInclude.Include; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlElementWrapper; + +/** + * GN Elastic JSON index metadata. This describes some elastic index JSON paths and how to use them + * for searching for a Queryable. + * + *

Consult your local elastic index for how its built - http://localhost:9200/gn-records + * + *

A Queryable can be referenced to multiple columns. + * + *

Examples: + * 1. resourceTitleObject This has a bunch of sub-components. For example, "default", "langeng", + * "langdut". 2. + * + *

Also, note VERY BRIEFLY, elastic has 2 types of "columns" - keyword and text. text can + * be searched generally (i.e. fuzzy full text search), while keywords must be EQUAL (including case + * and must be a full, exact, match). + * + *

Elastic indexing is very rich - read the elastic documentation and consult to actual + * gn-records elastic index. + */ +@XmlAccessorType(XmlAccessType.FIELD) +public class GnElasticInfo { + + /** + * a JSON path that contains the queryable's info. + * + *

note - you can use ${lang3iso} in this path to define a language specific property. + * + *

i.e. "resourceTitleObject.lang${lang3iso}" would be converted to something like + * "resourceTitleObject.langeng" or "resourceTitleObject.langdut" etc... + */ + @JsonInclude(Include.NON_EMPTY) + @XmlElementWrapper(name = "elasticPath") + @XmlElement(name = "elasticPath") + private String elasticPath; + /** + * how is that location indexed? This is useful for hints to the UI. + * + *

TEXT - can do full text search. KEYWORD - must be exact match (with some fuzzyness). + */ + @JsonInclude(Include.NON_EMPTY) + @XmlElementWrapper(name = "elasticColumnType") + @XmlElement(name = "elasticColumnType") + private ElasticColumnType elasticColumnType; + /** + * how is that location indexed? This is useful for hints to the UI. + * + *

LOWERCASE = used the lowercase normalizer + */ + @JsonInclude(Include.NON_EMPTY) + @XmlElementWrapper(name = "elasticColumnFormat") + @XmlElement(name = "elasticColumnFormat") + private ElasticColumnFormat elasticColumnFormat; + /** + * What type of query to use? + * + *

MULTIMATCH - use an elastic `multi_match` (recommended for everything) + * + *

NESTED - some object trees prefer elastic `nested` searchs. "contacts" prefers this. + */ + @JsonInclude(Include.NON_EMPTY) + @XmlElementWrapper(name = "elasticColumnFormat") + @XmlElement(name = "elasticQueryType") + private ElasticQueryType elasticQueryType = ElasticQueryType.MULTIMATCH; + + public GnElasticInfo(String elasticPath, ElasticColumnType elasticColumnType) { + this(elasticPath, elasticColumnType, ElasticColumnFormat.NORMAL); + } + + public GnElasticInfo() { + } + + + /** + * build a gnElasticInfo with the main fields set. + * @param elasticPath Elastic JSON path + * @param elasticColumnType what data type is this? + * @param format Metadata for GUIs. + */ + public GnElasticInfo(String elasticPath, + ElasticColumnType elasticColumnType, + ElasticColumnFormat format) { + this.elasticPath = elasticPath; + this.elasticColumnType = elasticColumnType; + this.elasticColumnFormat = format; + } + + //------------------------------------- + + public ElasticQueryType getElasticQueryType() { + return elasticQueryType; + } + + public void setElasticQueryType(ElasticQueryType elasticQueryType) { + this.elasticQueryType = elasticQueryType; + } + + public String getElasticPath() { + return elasticPath; + } + + //------------------------------------- + + public void setElasticPath(String elasticPath) { + this.elasticPath = elasticPath; + } + + public ElasticColumnType getElasticColumnType() { + return elasticColumnType; + } + + public void setElasticColumnType(ElasticColumnType elasticColumnType) { + this.elasticColumnType = elasticColumnType; + } + + public ElasticColumnFormat getElasticColumnFormat() { + return elasticColumnFormat; + } + + public void setElasticColumnFormat(ElasticColumnFormat elasticColumnFormat) { + this.elasticColumnFormat = elasticColumnFormat; + } + + public enum ElasticColumnType { + KEYWORD, TEXT, OTHER, DATE, GEO, DATERANGE + } + + public enum ElasticColumnFormat { + NORMAL, LOWERCASE + } + + public enum ElasticQueryType { + NESTED, MULTIMATCH + } +} diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonItem.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonItem.java index 4edbdf81..6fd213b5 100644 --- a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonItem.java +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonItem.java @@ -1,3 +1,8 @@ +/** + * (c) 2024 Open Source Geospatial Foundation - all rights reserved This code is licensed under the + * GPL 2.0 license, available at the root application directory. + */ + package org.fao.geonet.ogcapi.records.model; import com.fasterxml.jackson.annotation.JsonInclude; diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonProperty.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonProperty.java index 0aa2f962..754aaebb 100644 --- a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonProperty.java +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonProperty.java @@ -1,8 +1,17 @@ +/** + * (c) 2024 Open Source Geospatial Foundation - all rights reserved This code is licensed under the + * GPL 2.0 license, available at the root application directory. + */ + package org.fao.geonet.ogcapi.records.model; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonInclude.Include; +import java.util.ArrayList; import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlElementWrapper; @@ -15,6 +24,7 @@ * *

THIS IS SIMPLIFIED - SEE FULL SPECIFICATION and JsonItem */ +@XmlAccessorType(XmlAccessType.FIELD) public class JsonProperty { public static final String TypeString = "string"; @@ -66,10 +76,23 @@ public class JsonProperty { * cf. https://docs.ogc.org/is/19-079r2/19-079r2.html. */ @JsonInclude(Include.NON_EMPTY) - @XmlElementWrapper(name = "x-ogc-role") @XmlElement(name = "x-ogc-role") - @com.fasterxml.jackson.annotation.JsonProperty("x-ogc-role") - public String xxOgcRole; + @com.fasterxml.jackson.annotation.JsonProperty(value = "x-ogc-role") + @org.codehaus.jackson.annotate.JsonProperty(value = "x-ogc-role") + @SuppressWarnings("checkstyle:membername") //defined by ogc, starts with single "x" + public String xOgcRole; + + /** + * Where this is in the GeoNetwork Elastic Index -> path to data. + * + *

List of the places in the elastic index to search for the queryable. + */ + @JsonInclude(Include.NON_EMPTY) + @XmlElement(name = "x-gn-elastic") + @com.fasterxml.jackson.annotation.JsonProperty(value = "x-gn-elastic") + @org.codehaus.jackson.annotate.JsonProperty(value = "x-gn-elastic") + @SuppressWarnings("checkstyle:membername") // starts with single "x" + public List xGnElastic; /** * cf. https://docs.ogc.org/is/19-079r2/19-079r2.html. @@ -84,14 +107,18 @@ public class JsonProperty { /** * builds a minimal JsonProperty (part of json schema). * - * @param type type of the property - * @param title title of the property + * @param type type of the property + * @param title title of the property * @param description description of the property */ public JsonProperty(String type, String title, String description) { this.type = type; this.title = title; this.description = description; + this.xGnElastic = new ArrayList<>(); + } + + public JsonProperty() { } //---------------------------------- @@ -137,12 +164,14 @@ public void setEnum(List enumeration) { this.enumeration = enumeration; } + public String getxOgcRole() { - return xxOgcRole; + return xOgcRole; } + @JsonIgnore public void setxOgcRole(String xxOgcRole) { - this.xxOgcRole = xxOgcRole; + this.xOgcRole = xxOgcRole; } public JsonItem getItems() { @@ -152,4 +181,14 @@ public JsonItem getItems() { public void setItems(JsonItem items) { this.items = items; } + + + public List getxGnElasticPath() { + return xGnElastic; + } + + @JsonIgnore + public void setxGnElasticPath(List xxGnElasticPath) { + this.xGnElastic = xxGnElasticPath; + } } diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonSchema.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonSchema.java index fad8602f..6f263f90 100644 --- a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonSchema.java +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/model/JsonSchema.java @@ -1,3 +1,8 @@ +/** + * (c) 2024 Open Source Geospatial Foundation - all rights reserved This code is licensed under the + * GPL 2.0 license, available at the root application directory. + */ + package org.fao.geonet.ogcapi.records.model; import com.fasterxml.jackson.annotation.JsonInclude; @@ -19,7 +24,8 @@ public class JsonSchema { @JsonInclude(Include.NON_EMPTY) @XmlElementWrapper(name = "$schema") @XmlElement(name = "$schema") - @com.fasterxml.jackson.annotation.JsonProperty("$schema") + @com.fasterxml.jackson.annotation.JsonProperty(value = "$schema") + @org.codehaus.jackson.annotate.JsonProperty(value = "$schema") public String schema = "https://json-schema.org/draft/2020-12/schema"; /** diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryBuilder.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryBuilder.java new file mode 100644 index 00000000..745c955f --- /dev/null +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryBuilder.java @@ -0,0 +1,115 @@ +/** + * (c) 2024 Open Source Geospatial Foundation - all rights reserved This code is licensed under the + * GPL 2.0 license, available at the root application directory. + */ + +package org.fao.geonet.ogcapi.records.service; + + +import java.math.BigDecimal; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.fao.geonet.ogcapi.records.controller.Query; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +/** + * Service for Query Processing. + * + *

Build a Query from a request + * + *

Handle the "extra parameters" (queryables) + * + *

Help Convert between OGCAPI Query and Elastic + */ +@Service +@Slf4j(topic = "org.fao.geonet.ogcapi.records") +public class QueryBuilder { + + @Autowired + QueryablesService queryablesService; + + + /** + * builds a query from the request - cf. ItemApiController#collectionsCollectionIdItemsGet. + * + * @param collectionId see {@link Query#collectionId} + * @param bbox see {@link Query#bbox} + * @param datetime see {@link Query#datetime} + * @param limit see {@link Query#limit} + * @param startindex see {@link Query#startIndex} + * @param type see {@link Query#type} + * @param q see {@link Query#q} + * @param ids see {@link Query#ids} + * @param externalids see {@link Query#externalIds} + * @param sortby see {@link Query#sortBy} + * @param parameterMap see {@link Query#propValues} + * @return query object filled in + */ + public Query buildFromRequest(String collectionId, + List bbox, + String datetime, + Integer limit, + Integer startindex, + String type, + List q, + List ids, + List externalids, + List sortby, + Map parameterMap) { + + var result = new Query(); + + result.setCollectionId(collectionId); + + if (bbox != null && bbox.size() > 0) { + result.setBbox(bbox.stream().map(x -> x.doubleValue()).collect(Collectors.toList())); + } + + result.setLimit(limit); + result.setStartIndex(startindex); + result.setDatetime(datetime); + result.setType(type); + result.setQ(q); + result.setIds(ids); + + result.setExternalIds(externalids); + result.setSortBy(sortby); + + injectExtraFromRequest(collectionId, result, parameterMap); + + return result; + } + + + /** + * handle the "other" Queryables (cf Queryables). + * + * @param result existing query to augment + * @param parameterMap parameters from the request query string + */ + private void injectExtraFromRequest(String collectionId, Query result, + Map parameterMap) { + var queryables = queryablesService.buildQueryables(collectionId) + .getProperties(); + + result.setPropValues(new LinkedHashMap<>()); + + //foreach key-value pair in the parameter map + for (var param : parameterMap.entrySet()) { + var queryable = queryables.get(param.getKey()); + if (queryable != null) { + //we found a param (key-value) in the request that matches one of our queryables + var values = param.getValue(); + if (values == null || values.length == 0) { + continue; + } + result.getPropValues().put(param.getKey(), values[0]); + } + } + } + +} diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryToElastic.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryToElastic.java new file mode 100644 index 00000000..661b3e1b --- /dev/null +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryToElastic.java @@ -0,0 +1,356 @@ +/** + * (c) 2024 Open Source Geospatial Foundation - all rights reserved This code is licensed under the + * GPL 2.0 license, available at the root application directory. + */ + +package org.fao.geonet.ogcapi.records.service; + +import static org.fao.geonet.ogcapi.records.model.GnElasticInfo.ElasticColumnType.DATE; +import static org.fao.geonet.ogcapi.records.model.GnElasticInfo.ElasticColumnType.DATERANGE; +import static org.fao.geonet.ogcapi.records.model.GnElasticInfo.ElasticColumnType.GEO; + +import java.io.IOException; +import java.util.List; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang.StringUtils; +import org.apache.lucene.search.join.ScoreMode; +import org.elasticsearch.common.geo.ShapeRelation; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.geometry.Rectangle; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.NestedQueryBuilder; +import org.elasticsearch.index.query.Operator; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.fao.geonet.ogcapi.records.controller.Query; +import org.fao.geonet.ogcapi.records.model.GnElasticInfo; +import org.fao.geonet.ogcapi.records.model.GnElasticInfo.ElasticColumnType; +import org.fao.geonet.ogcapi.records.model.GnElasticInfo.ElasticQueryType; +import org.fao.geonet.ogcapi.records.model.JsonProperty; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +/** + * This helps to build the "extra" (&property=value) queryables in the OGCAPI search to an Elastic + * Index query. + * + * + *

see the documentation on "queryables.json" with goes into more depth. + * + *

This class uses the metadata in the "queryables.json" to determine how to make the elastic + * query. + */ +@Service +@Slf4j(topic = "org.fao.geonet.ogcapi.records") +public class QueryToElastic { + + @Autowired + QueryablesService queryablesService; + + /** + * Given an already setup SearchSourceBuilder, add more queries to it + * for any of the request ¶m=search-Text. + * WHERE: param is a queryable (cf queryables.json). + * + * @param sourceBuilder mostly configured elastic query. + * @param query information from the search request. + */ + public void augmentWithQueryables(SearchSourceBuilder sourceBuilder, + Query query) { + var mainQuery = sourceBuilder.query(); + if (query.getPropValues() == null || query.getPropValues().isEmpty()) { + return; + } + + if (!(mainQuery instanceof BoolQueryBuilder)) { + //need to handle this case - but dont think it can happen... + throw new RuntimeException("ugg"); + } + var boolQuery = (BoolQueryBuilder) mainQuery; + + var jsonSchema = queryablesService.buildQueryables(query.getCollectionId()); + + for (var prop : query.getPropValues().entrySet()) { + var jsonProp = jsonSchema.getProperties().get(prop.getKey()); + var searchVal = prop.getValue(); + var q = create(jsonProp, searchVal, "*"); + if (q != null) { + boolQuery.must(q); + } + } + } + + /** + * Given a JsonProperty, construct the appropriate Elastic Query. + * Handles Geo, Nested, Time, and text searches. + * + * @param jsonProperty property to process. + * @param userSearchTerm what user is searching for + * @param lang3iso what language? + * @return QueryBuilder for this particular queryable. + */ + public QueryBuilder create(JsonProperty jsonProperty, String userSearchTerm, String lang3iso) { + + var isGeo = jsonProperty.getxGnElasticPath().stream() + .anyMatch(x -> x.getElasticColumnType() == GEO); + + var isNested = jsonProperty.getxGnElasticPath().stream() + .anyMatch(x -> x.getElasticQueryType() == ElasticQueryType.NESTED); + + var isTime = (jsonProperty.getxGnElasticPath().get(0).getElasticColumnType() == DATE) + || (jsonProperty.getxGnElasticPath().get(0).getElasticColumnType() == DATERANGE); + + if (isTime) { + return createVsDate(jsonProperty.getxGnElasticPath().get(0), userSearchTerm, lang3iso); + } + + if (isGeo) { + return createGeo(jsonProperty.getxGnElasticPath().get(0), userSearchTerm, lang3iso); + } + + if (isNested) { + return createNested(jsonProperty.getxGnElasticPath(), userSearchTerm, lang3iso); + } + + return createMulti(jsonProperty.getxGnElasticPath(), userSearchTerm, lang3iso); + } + + private QueryBuilder create(GnElasticInfo gnElasticPath, String userSearchTerm, String lang3iso) { + if (gnElasticPath.getElasticColumnType() == ElasticColumnType.TEXT + & StringUtils.countMatches(gnElasticPath.getElasticPath(), ".") > 2) { + return createNested(gnElasticPath, userSearchTerm, lang3iso); + } + if (gnElasticPath.getElasticColumnType() == ElasticColumnType.TEXT + & StringUtils.countMatches(gnElasticPath.getElasticPath(), ".") <= 2) { + return createMatch(gnElasticPath, userSearchTerm, lang3iso); + } + if (gnElasticPath.getElasticColumnType() == ElasticColumnType.KEYWORD) { + return createTerm(gnElasticPath, userSearchTerm, lang3iso); + } + return null; + } + + /** + * create a date range query. + * @param gnElasticInfo column (singular) you are querying + * @param userSearchTerm what searching for? (this will be parsed to an interval) + * @param lang3iso language + * @return QueryBuilder with a data-based range query. + */ + private QueryBuilder createVsDate(GnElasticInfo gnElasticInfo, String userSearchTerm, + String lang3iso) { + var result = QueryBuilders.rangeQuery(gnElasticInfo.getElasticPath()); + + processDateRequest(result, userSearchTerm); + + return result; + } + + + /** + * Format. + * + *

interval-bounded = date-time "/" date-time interval-half-bounded-start = [".."] "/" + * date-time interval-half-bounded-end = date-time "/" [".."] interval = + * interval-closed / interval-half-bounded-start / interval-half-bounded-end + * datetime = date-time / interval + * + *

The syntax of date-time is specified by RFC 3339, 5.6. + * https://www.rfc-editor.org/rfc/rfc3339.html#section-5.6 + * + * @param result RangeQueryBuilder to update with start/end (might only have start or end if "..") + * @param userSearchTerm date or interval to parse. + */ + private void processDateRequest(RangeQueryBuilder result, String userSearchTerm) { + if (!userSearchTerm.contains("/")) { + //its a single date (request) vs a date (elastic index) + result.relation(ShapeRelation.INTERSECTS.getRelationName()); + result.gte(userSearchTerm); + result.lte(userSearchTerm); + return; + } + //interval + var dateParts = userSearchTerm.split("/"); + if (dateParts.length != 2) { + return; //error! + } + + result.relation(ShapeRelation.INTERSECTS.getRelationName()); + if (!dateParts[0].equals("..")) { + result.gte(dateParts[0]); + } + if (!dateParts[1].equals("..")) { + result.lte(dateParts[1]); + } + } + + private QueryBuilder createGeo(GnElasticInfo gnElasticInfo, String userSearchTerm, + String lang3iso) { + + var nums = userSearchTerm.split(","); + if (nums.length != 4) { + return null; + } + + Rectangle rectangle = new Rectangle( + Double.parseDouble(nums[0]), + Double.parseDouble(nums[1]), + Double.parseDouble(nums[3]), + Double.parseDouble(nums[2])); + + try { + var geoQuery = QueryBuilders + .geoShapeQuery(gnElasticInfo.getElasticPath(), rectangle) + .relation(ShapeRelation.getRelationByName("intersects")); + return geoQuery; + } catch (IOException e) { + e.printStackTrace(); + } + return null; + } + + /** + * convert a JsonProperty to an elastic QUERY. + * + * @param jsonProperty property + * @return String representing the needed elastic query (or null if not possible) + */ + public QueryBuilder convert(JsonProperty jsonProperty, String userSearchTerm, String lang3iso) { + var gnElasticPath = jsonProperty.getxGnElasticPath(); + if (gnElasticPath == null || gnElasticPath.size() == 0) { + return null; + } + if (gnElasticPath.size() > 1) { + return createOR(gnElasticPath, userSearchTerm, lang3iso); + } + return create(gnElasticPath.get(0), userSearchTerm, lang3iso); + } + + /** + * create an elastic nested query. The main path will be the 1st path part of the full path. + * i.e. "contacts.phone" -> main path will be "contacts". + * + * @param columns what columns are we searching in. + * @param userSearchTerm what is the userer searching for + * @param lang3iso language + * @return QueryBuilder (could be a nested, or an OR ("bool" "should" query) with multiple nested) + */ + public QueryBuilder createNested(List columns, + String userSearchTerm, + String lang3iso) { + + if (columns.size() == 1) { + return createNested(columns.get(0), userSearchTerm, lang3iso); + } + + var result = QueryBuilders.boolQuery(); + + for (var gnElasticInfo : columns) { + var subQ = createNested(gnElasticInfo, userSearchTerm, lang3iso); + result.should(subQ); + } + + result.minimumShouldMatch(1); + + return result; + } + + private QueryBuilder createNested(GnElasticInfo gnElasticPath, + String userSearchTerm, + String lang3iso) { + userSearchTerm = userSearchTerm.replaceAll("\"", ""); + var path = gnElasticPath.getElasticPath(); + path = path.replace("${lang3iso}", lang3iso); + var firstPartPath = path.substring(0, path.indexOf(".")); + + var matchQueryBuilder = QueryBuilders.matchQuery(path, userSearchTerm); + + NestedQueryBuilder nestedQueryBuilder = QueryBuilders.nestedQuery(firstPartPath, + matchQueryBuilder, ScoreMode.Max); + nestedQueryBuilder.ignoreUnmapped(true); + + return nestedQueryBuilder; + } + + + /** + * Create a match_multi elastic query for the given columns. + * @param columns which columns? (can contain "*") + * @param userSearchTerm what text searching for? + * @param lang3iso language to inject as "$P{iso3lang} + * @return QueryBuilder with a match_multi + */ + public QueryBuilder createMulti(List columns, + String userSearchTerm, + String lang3iso) { + userSearchTerm = userSearchTerm.replaceAll("\"", ""); + + var paths = columns.stream() + .map(x -> x.getElasticPath()) + .map(x -> x.replace("${lang3iso}", lang3iso)) + .toArray(String[]::new); + + var result = QueryBuilders.multiMatchQuery(userSearchTerm, paths); + //try to match for minor typos + result.fuzziness(Fuzziness.AUTO); + result.fuzzyTranspositions(true); + result.lenient(true); + result.minimumShouldMatch("1"); + result.operator(Operator.AND); //capital of canada -> capital AND of AND canada + return result; + } + + + + + private QueryBuilder createMatch(GnElasticInfo gnElasticPath, String userSearchTerm, + String lang3iso) { + userSearchTerm = userSearchTerm.replaceAll("\"", ""); + var path = gnElasticPath.getElasticPath(); + path = path.replace("${lang3iso}", lang3iso); + + if (path.contains("*")) { + var queryBuilder = QueryBuilders.multiMatchQuery(userSearchTerm, path); + return queryBuilder; + } + + var queryBuilder = QueryBuilders.matchQuery(path, userSearchTerm); + return queryBuilder; + + } + + private QueryBuilder createTerm(GnElasticInfo gnElasticPath, String userSearchTerm, + String lang3iso) { + userSearchTerm = userSearchTerm.replaceAll("\"", ""); + var path = gnElasticPath.getElasticPath(); + path = path.replace("${lang3iso}", lang3iso); + var queryBuilder = QueryBuilders.termsQuery(path, userSearchTerm); + return queryBuilder; + } + + + /** + * elastic "SHOULD" is an "OR". + * + * @param gnElasticPaths set of queries to OR ("should") together + * @return String representing the needed elastic query (or null if not possible) + */ + private QueryBuilder createOR(List gnElasticPaths, + String userSearchTerm, + String lang3iso) { + + var boolQuerybuilder = QueryBuilders.boolQuery(); + boolQuerybuilder.minimumShouldMatch(1); + + for (var gnElasticInfo : gnElasticPaths) { + var orItem = create(gnElasticInfo, userSearchTerm, lang3iso); + if (orItem != null) { + boolQuerybuilder.should(orItem); + } + } + return boolQuerybuilder; + } + +} diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryablesService.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryablesService.java index 44e40fb1..e1f574a4 100644 --- a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryablesService.java +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/service/QueryablesService.java @@ -1,8 +1,14 @@ package org.fao.geonet.ogcapi.records.service; -import java.util.LinkedHashMap; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.util.Map; import lombok.extern.slf4j.Slf4j; +import org.codehaus.jackson.map.ObjectMapper; +import org.fao.geonet.ogcapi.records.model.GnElasticInfo; +import org.fao.geonet.ogcapi.records.model.GnElasticInfo.ElasticColumnFormat; +import org.fao.geonet.ogcapi.records.model.GnElasticInfo.ElasticColumnType; import org.fao.geonet.ogcapi.records.model.JsonProperty; import org.fao.geonet.ogcapi.records.model.JsonSchema; import org.springframework.stereotype.Service; @@ -23,13 +29,21 @@ public class QueryablesService { * @return schema based on collection */ public JsonSchema buildQueryables(String collectionId) { - var jsonSchema = new JsonSchema(); - Map properties = new LinkedHashMap<>(); - jsonSchema.setProperties(properties); - addStandardProperties(properties); + InputStream is = getClass().getClassLoader().getResourceAsStream("queryables/queryables.json"); - return jsonSchema; + try { + String text = new String(is.readAllBytes(), StandardCharsets.UTF_8); + text = text.replaceAll("(?m)^//.*", ""); + + var objectMapper = new ObjectMapper(); + var result = objectMapper.readValue(text, JsonSchema.class); + return result; + } catch (IOException e) { + e.printStackTrace(); + } + + return null; } /** @@ -46,14 +60,19 @@ public void addStandardProperties(Map properties) { p = new JsonProperty(JsonProperty.TypeString, "id", "A unique record identifier assigned by the server."); p.setxOgcRole("id"); + p.getxGnElasticPath().add(new GnElasticInfo("uuid", ElasticColumnType.KEYWORD)); properties.put("id", p); p = new JsonProperty(JsonProperty.TypeString, "created", "The date this record was created in the server."); + p.getxGnElasticPath().add( + new GnElasticInfo("createdDate", ElasticColumnType.DATE, ElasticColumnFormat.NORMAL)); properties.put("created", p); p = new JsonProperty(JsonProperty.TypeString, "updated", "The most recent date on which the record was changed."); + p.getxGnElasticPath().add( + new GnElasticInfo("resourceDate.date", ElasticColumnType.DATE, ElasticColumnFormat.NORMAL)); properties.put("updated", p); //conformsTo -- not in Elastic Index JSON @@ -61,10 +80,14 @@ public void addStandardProperties(Map properties) { p = new JsonProperty(JsonProperty.TypeString, "language", "The language used for textual values (i.e. titles, descriptions, etc.)" + " of this record."); + p.getxGnElasticPath().add( + new GnElasticInfo("mainLanguage", ElasticColumnType.KEYWORD)); properties.put("language", p); p = new JsonProperty(JsonProperty.TypeString, "languages", "The list of other languages in which this record is available."); + p.getxGnElasticPath().add( + new GnElasticInfo("otherLanguage", ElasticColumnType.KEYWORD)); properties.put("languages", p); //links -- not in Elastic Index JSON @@ -73,52 +96,89 @@ public void addStandardProperties(Map properties) { //table 9 //unclear what this maps to in elastic - p = new JsonProperty(JsonProperty.TypeString, "title", + p = new JsonProperty(JsonProperty.TypeString, "type", "The nature or genre of the resource described by this record."); + p.getxGnElasticPath().add( + new GnElasticInfo("resourceType", ElasticColumnType.KEYWORD)); properties.put("type", p); p = new JsonProperty(JsonProperty.TypeString, "title", "A human-readable name given to the resource described by this record."); + p.getxGnElasticPath().add( + new GnElasticInfo("resourceTitleObject.default", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("resourceTitleObject.lang${lang3iso}", ElasticColumnType.TEXT)); properties.put("title", p); p = new JsonProperty(JsonProperty.TypeString, "description", "A free-text description of the resource described by this record."); + p.getxGnElasticPath().add( + new GnElasticInfo("resourceAbstractObject.default", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("resourceAbstractObject.lang${lang3iso}", ElasticColumnType.TEXT)); properties.put("description", p); p = new JsonProperty(JsonProperty.TypeString, "geometry", "A spatial extent associated with the resource described by this record."); + p.getxGnElasticPath().add( + new GnElasticInfo("geom", ElasticColumnType.GEO)); properties.put("geometry", p); p = new JsonProperty(JsonProperty.TypeString, "time", "A temporal extent associated with the resource described by this record."); + p.getxGnElasticPath().add( + new GnElasticInfo("resourceTemporalDateRange", ElasticColumnType.DATERANGE)); properties.put("time", p); p = new JsonProperty(JsonProperty.TypeString, "keywords", "A list of free-form keywords or tags associated with the resource" + " described by this record."); + p.getxGnElasticPath().add( + new GnElasticInfo("tag.default", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("tag.lang${lang3iso}", ElasticColumnType.TEXT)); properties.put("keywords", p); p = new JsonProperty(JsonProperty.TypeString, "themes", "A knowledge organization system used to classify the resource" + " described by this resource."); + // looks like this index is disabled + p.getxGnElasticPath().add( + new GnElasticInfo("allKeywords.*.keywords.default", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("allKeywords.*.keywords.lang${lang3iso}", ElasticColumnType.TEXT)); properties.put("themes", p); p = new JsonProperty(JsonProperty.TypeString, "contacts", "A list of contacts qualified by their role(s) in association to the record" + " or the resource described by this record."); + p.getxGnElasticPath().add( + new GnElasticInfo("contact.organisationObject.default", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("contact.organisationObject.lang${lang3iso}", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("contact.address", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("contact.role", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("contact.email", ElasticColumnType.KEYWORD)); + p.getxGnElasticPath().add( + new GnElasticInfo("contact.website", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("contact.individual", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("contact.position", ElasticColumnType.TEXT)); + p.getxGnElasticPath().add( + new GnElasticInfo("contact.phone", ElasticColumnType.TEXT)); properties.put("contacts", p); //resourceLanguages -- not in Elastic Index JSON //externalIds -- not in Elastic Index JSON //formats -- not in Elastic Index JSON - p = new JsonProperty(JsonProperty.TypeString, "license", - "The legal provisions under which the resource described by this record" - + " is made available."); - properties.put("license", p); + //license isn't really easy to search since the iso and ogcapi define it differently //rights -- not in Elastic Index JSON - } } diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/util/ElasticIndexJson2CollectionInfo.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/util/ElasticIndexJson2CollectionInfo.java index 74895d34..9da25ce3 100644 --- a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/util/ElasticIndexJson2CollectionInfo.java +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/util/ElasticIndexJson2CollectionInfo.java @@ -1,3 +1,8 @@ +/** + * (c) 2024 Open Source Geospatial Foundation - all rights reserved This code is licensed under the + * GPL 2.0 license, available at the root application directory. + */ + package org.fao.geonet.ogcapi.records.util; import static org.fao.geonet.ogcapi.records.util.JsonUtils.getLangString; diff --git a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/util/RecordsEsQueryBuilder.java b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/util/RecordsEsQueryBuilder.java index f8a86621..6ec63872 100644 --- a/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/util/RecordsEsQueryBuilder.java +++ b/modules/services/ogc-api-records/src/main/java/org/fao/geonet/ogcapi/records/util/RecordsEsQueryBuilder.java @@ -1,7 +1,6 @@ package org.fao.geonet.ogcapi.records.util; import java.io.IOException; -import java.math.BigDecimal; import java.util.Arrays; import java.util.HashSet; import java.util.List; @@ -22,6 +21,8 @@ import org.elasticsearch.search.sort.SortOrder; import org.fao.geonet.common.search.SearchConfiguration; import org.fao.geonet.index.model.gn.IndexRecordFieldNames; +import org.fao.geonet.ogcapi.records.controller.Query; +import org.fao.geonet.ogcapi.records.service.QueryToElastic; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.context.properties.ConstructorBinding; import org.springframework.stereotype.Component; @@ -31,11 +32,9 @@ @Slf4j(topic = "org.fao.geonet.ogcapi") public class RecordsEsQueryBuilder { - @Autowired - private SearchConfiguration configuration; - + private static final String SORT_BY_SEPARATOR = ","; // TODO: Sources depends on output type - private static List defaultSources = Arrays.asList( + private static final List defaultSources = Arrays.asList( "resourceTitleObject", "resourceAbstractObject", "resourceType", "resourceDate", "id", "metadataIdentifier", "schema", @@ -43,11 +42,12 @@ public class RecordsEsQueryBuilder { "contact", "contactForResource", "cl_status", "edit", "tag", "changeDate", - "createDate", "mainLanguage", "geom", "formats"); - - private static final String SORT_BY_SEPARATOR = ","; - - private static String defaultSpatialOperation = "intersects"; + "createDate", "mainLanguage", "geom", "formats", "resourceTemporalDateRange"); + private static final String defaultSpatialOperation = "intersects"; + @Autowired + QueryToElastic queryToElastic; + @Autowired + private SearchConfiguration configuration; public RecordsEsQueryBuilder(SearchConfiguration configuration) { this.configuration = configuration; @@ -82,18 +82,14 @@ public String buildQuerySingleRecord(String uuid, * Creates a ElasticSearch query from Records API parameters. */ public String buildQuery( - List q, - List externalids, - List bbox, - Integer startIndex, Integer limit, + Query query, String collectionFilter, - List sortBy, Set sourceFields) { SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); - sourceBuilder.from(startIndex).size(limit); + sourceBuilder.from(query.getStartIndex()).size(query.getLimit()); - if (sortBy != null) { - sortBy.forEach(s -> Stream.of(s.split(SORT_BY_SEPARATOR)) + if (query.getSortBy() != null) { + query.getSortBy().forEach(s -> Stream.of(s.split(SORT_BY_SEPARATOR)) .forEach(order -> { boolean isDescending = order.startsWith("-"); sourceBuilder.sort( @@ -114,23 +110,23 @@ public String buildQuery( BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); QueryBuilder fullTextQuery = - QueryBuilders.queryStringQuery(buildFullTextSearchQuery(q)); + QueryBuilders.queryStringQuery(buildFullTextSearchQuery(query.getQ())); boolQuery.must(fullTextQuery); - if (externalids != null && !externalids.isEmpty()) { + if (query.getExternalIds() != null && !query.getExternalIds().isEmpty()) { boolQuery.must( QueryBuilders.termsQuery( IndexRecordFieldNames.uuid, - externalids)); + query.getExternalIds())); } GeoShapeQueryBuilder geoQuery; - if (bbox != null && bbox.size() == 4) { + if (query.getBbox() != null && query.getBbox().size() == 4) { Rectangle rectangle = new Rectangle( - bbox.get(0).doubleValue(), - bbox.get(2).doubleValue(), - bbox.get(3).doubleValue(), - bbox.get(1).doubleValue()); + query.getBbox().get(0).doubleValue(), + query.getBbox().get(2).doubleValue(), + query.getBbox().get(3).doubleValue(), + query.getBbox().get(1).doubleValue()); try { geoQuery = QueryBuilders @@ -153,7 +149,10 @@ public String buildQuery( sourceBuilder.query(boolQuery); sourceBuilder.trackTotalHits(configuration.getTrackTotalHits()); - log.debug("OGC API query: {}", sourceBuilder.toString()); + + queryToElastic.augmentWithQueryables(sourceBuilder, query); + + log.debug("OGC API query: {}", sourceBuilder); return sourceBuilder.toString(); } diff --git a/modules/services/ogc-api-records/src/main/resources/queryables/queryables.json b/modules/services/ogc-api-records/src/main/resources/queryables/queryables.json new file mode 100644 index 00000000..b5d0df88 --- /dev/null +++ b/modules/services/ogc-api-records/src/main/resources/queryables/queryables.json @@ -0,0 +1,277 @@ +// NOTE: lines starting with "//" are pre-processed out of this file so comments are fine. +// +// NOTE: To see how elastic is configured +// go to http://localhost:9200/gn-records This will give you the expanded configuration. +// +// NOTE: To see the actual elastic records go to +// http://localhost:9200/gn-records/_search?pretty=true&q=*:* +// +// NOTE: see core-geonetwork "records.json" for how GN has configured the elastic index +// If you modify this, rebuild GN and then go to admin -> tools -> delete and reindex +// There are cases where you might want to change "type:KEYWORD" to "type:TEXT" +// so you can do full-text searches (KEYWORD is exact match). +// You might want to use the "lowercase_normalizer" for keyword elatic properties so +// searchs can be case-insensitive. +// +// This is the queryables configuration (and OGCAPI endpoint response). +// +// Use this file to configure how elastic-based queries can be configured in an OGCAPI context. +// +// To add a queryable; +// 1. create a new property (JsonProperty) in the "properties" list. +// 2. add the "x-gn-elastic" list, with one element for each Elastic column that needs to be queried +// +// For the "x-gn-elastic" (GnElasticInfo): +// +// 1. add an "elasticPath". This is the path to the query property in the elastic document. +// +// You can use "${iso3lang}" to query into one of the multi-language fields. This will +// be replaced with the requested language. +// NOTE: this might not be in the request or the multi-language field in elastic might not +// be filled out. Typically, you might want to also search in "default" or use "lang*". +// +// You can also use "*" to query multiple fields (cf elastic query documentation). +// +// 2. add "elasticColumnType". This is the type of the column. You can get this from the +// elastic index definition (http://localhost:9200/gn-records). See ElasticQueryType enum. +// +// 3. most of the time you will not have to fill in "elasticQueryType" (multi_match is the +// the most likely type). However, you might need "nested" - although the only one where this +// is needed is for "contacts". See ElasticQueryType enum. +// +// 4. most of the time you will not need to fill in "elasticColumnFormat". This is mostly for +// a hint to the GUI that a keyword field is case-insensitive (which is very nice for users +// as a search for "GeoCat" will also match "geocat", or "geoCat", etc... +// +// Please look at the examples, below. +// +// DEBUGGING: +// +// I've found that getting this to work can be a bit tricky. +// +// 1. use http://localhost:9200/gn-records and "records.json" and "index.xsl" in the +// schema-specific section of GN. +// +// 2. Fill in the new property, as above +// +// 3. execute a search request to OGCAPI, adding &= +// +// 4. put a break point at the end of RecordsEsQueryBuilder#buildQuery and look at +// sourceBuilder.toString() This will be a full elastic query. +// +// 5. you can save this to a file and then directly execute the request: +// +// curl 'http://localhost:8080/geonetwork/srv/api/search/records/_search' -d "@json2.json" | jq . +// +// Hopefully this will help you debug the query. +// +{ + "type": "object", + "title": "Queryables for GeoNetwork Collection", + "properties": { + "id": { + "title": "id", + "type": "string", + "description": "A unique record identifier assigned by the server.", + "x-ogc-role": "id", + "x-gn-elastic": [ + { + "elasticPath": "uuid", + "elasticColumnType": "KEYWORD" + } + ] + }, + "created": { + "title": "created", + "type": "string", + "description": "The date this record was created in the server.", + "x-gn-elastic": [ + { + "elasticPath": "createDate", + "elasticColumnType": "DATE" + } + ] + }, + "updated": { + "title": "updated", + "type": "string", + "description": "The most recent date on which the record was changed.", + "x-gn-elastic": [ + { + "elasticPath": "resourceDate.date", + "elasticColumnType": "DATE" + } + ] + }, + "language": { + "title": "language", + "type": "string", + "description": "The language used for textual values (i.e. titles, descriptions, etc.) of this record.", + "x-gn-elastic": [ + { + "elasticPath": "mainLanguage", + "elasticColumnType": "KEYWORD" + } + ] + }, + "languages": { + "title": "languages", + "type": "string", + "description": "The list of other languages in which this record is available.", + "x-gn-elastic": [ + { + "elasticPath": "otherLanguage", + "elasticColumnType": "KEYWORD" + } + ] + }, + "type": { + "title": "type", + "type": "string", + "description": "The nature or genre of the resource described by this record.", + "x-gn-elastic": [ + { + "elasticPath": "resourceType", + "elasticColumnType": "KEYWORD" + } + ] + }, + "title": { + "title": "title", + "type": "string", + "description": "A human-readable name given to the resource described by this record.", + "x-gn-elastic": [ + { + "elasticPath": "resourceTitleObject.default", + "elasticColumnType": "TEXT" + }, + { + "elasticPath": "resourceTitleObject.lang${lang3iso}", + "elasticColumnType": "TEXT" + } + ] + }, + "description": { + "title": "description", + "type": "string", + "description": "A free-text description of the resource described by this record.", + "x-gn-elastic": [ + { + "elasticPath": "resourceAbstractObject.default", + "elasticColumnType": "TEXT" + }, + { + "elasticPath": "resourceAbstractObject.lang${lang3iso}", + "elasticColumnType": "TEXT" + } + ] + }, + "geometry": { + "title": "geometry", + "type": "string", + "description": "A spatial extent associated with the resource described by this record.", + "x-gn-elastic": [ + { + "elasticPath": "geom", + "elasticColumnType": "GEO" + } + ] + }, + "time": { + "title": "time", + "type": "string", + "description": "A temporal extent associated with the resource described by this record. interval-bounded = date-time \"/\" date-time\ninterval-half-bounded-start = [\"..\"] \"/\" date-time\ninterval-half-bounded-end = date-time \"/\" [\"..\"]\ninterval = interval-closed / interval-half-bounded-start / interval-half-bounded-end\ndatetime = date-time / interval", + "x-gn-elastic": [ + { + "elasticPath": "resourceTemporalDateRange", + "elasticColumnType": "DATERANGE" + } + ] + }, + "keywords": { + "title": "keywords", + "type": "string", + "description": "A list of free-form keywords or tags associated with the resource described by this record.", + "x-gn-elastic": [ + { + "elasticPath": "tag.default", + "elasticColumnType": "KEYWORD", + "elasticColumnFormat": "LOWERCASE" + }, + { + "elasticPath": "tag.lang${lang3iso}", + "elasticColumnType": "KEYWORD", + "elasticColumnFormat": "LOWERCASE" + } + ] + }, + "themes": { + "title": "themes", + "type": "string", + "description": "A knowledge organization system used to classify the resource described by this resource.", + "x-gn-elastic": [ + { + "elasticPath": "allKeywords.*.keywords.default", + "elasticColumnType": "TEXT" + }, + { + "elasticPath": "allKeywords.*.keywords.lang${lang3iso}", + "elasticColumnType": "TEXT" + } + ] + }, + "contacts": { + "title": "contacts", + "type": "string", + "description": "A list of contacts qualified by their role(s) in association to the record or the resource described by this record.", + "x-gn-elastic": [ + { + "elasticPath": "contact.organisationObject.default", + "elasticColumnType": "TEXT", + "elasticQueryType": "NESTED" + }, + { + "elasticPath": "contact.organisationObject.lang${lang3iso}", + "elasticColumnType": "TEXT", + "elasticQueryType": "NESTED" + }, + { + "elasticPath": "contact.address", + "elasticColumnType": "TEXT", + "elasticQueryType": "NESTED" + }, + { + "elasticPath": "contact.role", + "elasticColumnType": "TEXT", + "elasticQueryType": "NESTED" + }, + { + "elasticPath": "contact.email", + "elasticColumnType": "KEYWORD", + "elasticQueryType": "NESTED" + }, + { + "elasticPath": "contact.website", + "elasticColumnType": "TEXT", + "elasticQueryType": "NESTED" + }, + { + "elasticPath": "contact.individual", + "elasticColumnType": "TEXT", + "elasticQueryType": "NESTED" + }, + { + "elasticPath": "contact.position", + "elasticColumnType": "TEXT", + "elasticQueryType": "NESTED" + }, + { + "elasticPath": "contact.phone", + "elasticColumnType": "TEXT", + "elasticQueryType": "NESTED" + } + ] + } + }, + "$schema": "https://json-schema.org/draft/2020-12/schema" +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index bd41b30b..5a4c2877 100644 --- a/pom.xml +++ b/pom.xml @@ -369,7 +369,7 @@ - google_checks.xml + gn_checks.xml true 0 warning