From 9c048a02ffef4b7f3065e1562abfc65496a34ae9 Mon Sep 17 00:00:00 2001 From: Francois Prunayre Date: Tue, 30 Jan 2024 08:23:00 +0100 Subject: [PATCH] Thesaurus / Improve support of EU publication office SKOS format When loading thesaurus downloadable from the EU publication office, title and namespace of the thesaurus are not extracted properly. The SKOS format provided contains specificity eg. https://op.europa.eu/en/web/eu-vocabularies/dataset/-/resource?uri=http://publications.europa.eu/resource/dataset/data-theme Thesaurus title is stored in various properties ie. `at:prefLabel`, `rdfs:label`, `skos:prefLabel` but none of them were used so far for title extraction. ```xml Data theme Data theme 20220715-0 Data theme ``` This change add them to the XPath. Also fix some sonar lint items. Funded by Wallonia region (SPW) --- .../org/fao/geonet/kernel/AllThesaurus.java | 22 +++----- .../java/org/fao/geonet/kernel/Thesaurus.java | 53 ++++++++++--------- .../api/records/formatters/FormatterApi.java | 2 +- 3 files changed, 38 insertions(+), 39 deletions(-) diff --git a/core/src/main/java/org/fao/geonet/kernel/AllThesaurus.java b/core/src/main/java/org/fao/geonet/kernel/AllThesaurus.java index 971e1c072a0..361c7fc816b 100644 --- a/core/src/main/java/org/fao/geonet/kernel/AllThesaurus.java +++ b/core/src/main/java/org/fao/geonet/kernel/AllThesaurus.java @@ -27,15 +27,13 @@ import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.collect.Maps; - -import org.locationtech.jts.util.Assert; - import org.fao.geonet.Constants; import org.fao.geonet.constants.Geonet; import org.fao.geonet.exceptions.TermNotFoundException; import org.fao.geonet.kernel.search.keyword.KeywordRelation; import org.fao.geonet.languages.IsoLanguagesMapper; import org.fao.geonet.utils.Log; +import org.locationtech.jts.util.Assert; import org.openrdf.model.GraphException; import org.openrdf.model.URI; import org.openrdf.sesame.config.AccessDeniedException; @@ -46,6 +44,8 @@ import org.openrdf.sesame.repository.local.LocalRepository; import org.springframework.beans.factory.annotation.Autowired; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; @@ -59,9 +59,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - /** * @author Jesse on 2/27/2015. */ @@ -221,8 +218,7 @@ public synchronized URI addElement(KeywordBean keyword) throws IOException, Acce } @Override - public synchronized Thesaurus removeElement(KeywordBean keyword) throws MalformedQueryException, QueryEvaluationException, - IOException, AccessDeniedException { + public synchronized Thesaurus removeElement(KeywordBean keyword) throws AccessDeniedException { throw new UnsupportedOperationException(); } @@ -237,8 +233,7 @@ public synchronized Thesaurus removeElement(String uri) throws AccessDeniedExcep } @Override - public synchronized URI updateElement(KeywordBean keyword, boolean replace) throws AccessDeniedException, IOException, - MalformedQueryException, QueryEvaluationException, GraphException { + public synchronized URI updateElement(KeywordBean keyword, boolean replace) throws AccessDeniedException { throw new UnsupportedOperationException(); } @@ -266,12 +261,12 @@ public Thesaurus updateCode(KeywordBean bean, String newcode) throws AccessDenie } @Override - public synchronized Thesaurus updateCode(String namespace, String oldcode, String newcode) throws AccessDeniedException, IOException { + public synchronized Thesaurus updateCode(String namespace, String oldcode, String newcode) throws AccessDeniedException { throw new UnsupportedOperationException(); } @Override - public synchronized Thesaurus updateCodeByURI(String olduri, String newuri) throws AccessDeniedException, IOException { + public synchronized Thesaurus updateCodeByURI(String olduri, String newuri) throws AccessDeniedException { throw new UnsupportedOperationException(); } @@ -287,8 +282,7 @@ public IsoLanguagesMapper getIsoLanguageMapper() { } @Override - public synchronized void addRelation(String subject, KeywordRelation related, String relatedSubject) throws AccessDeniedException, - IOException, MalformedQueryException, QueryEvaluationException, GraphException { + public synchronized void addRelation(String subject, KeywordRelation related, String relatedSubject) throws AccessDeniedException { throw new UnsupportedOperationException(); } diff --git a/core/src/main/java/org/fao/geonet/kernel/Thesaurus.java b/core/src/main/java/org/fao/geonet/kernel/Thesaurus.java index d9937ed8958..32930d02fa2 100644 --- a/core/src/main/java/org/fao/geonet/kernel/Thesaurus.java +++ b/core/src/main/java/org/fao/geonet/kernel/Thesaurus.java @@ -76,6 +76,7 @@ public class Thesaurus { private static final String DEFAULT_THESAURUS_NAMESPACE = "http://custom.shared.obj.ch/concept#"; private static final String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + private static final String RDF_SCHEMA_NAMESPACE = "http://www.w3.org/2000/01/rdf-schema#"; private static final String SKOS_NAMESPACE = "http://www.w3.org/2004/02/skos/core#"; @@ -360,7 +361,8 @@ public boolean hasConceptScheme(String uri) { try { return performRequest(query).getRowCount() > 0; } catch (Exception e) { - Log.error(Geonet.THESAURUS_MAN, "Error retrieving concept scheme for " + thesaurusFile + ". Error is: " + e.getMessage()); + Log.error(Geonet.THESAURUS_MAN, + String.format("Error retrieving concept scheme for %s. Error is: %s", thesaurusFile, e.getMessage())); throw new RuntimeException(e); } } @@ -380,7 +382,8 @@ public List getConceptSchemes() { } return ret; } catch (Exception e) { - Log.error(Geonet.THESAURUS_MAN, "Error retrieving concept schemes for " + thesaurusFile + ". Error is: " + e.getMessage()); + Log.error(Geonet.THESAURUS_MAN, String.format( + "Error retrieving concept schemes for %s. Error is: %s", thesaurusFile, e.getMessage())); return Collections.emptyList(); } } @@ -452,8 +455,7 @@ public synchronized URI addElement(KeywordBean keyword) throws IOException, Acce /** * Remove keyword from thesaurus. */ - public synchronized Thesaurus removeElement(KeywordBean keyword) throws MalformedQueryException, - QueryEvaluationException, IOException, AccessDeniedException { + public synchronized Thesaurus removeElement(KeywordBean keyword) throws AccessDeniedException { String namespace = keyword.getNameSpaceCode(); String code = keyword.getRelativeCode(); @@ -518,8 +520,7 @@ private String toiso639_1_Lang(String lang) { * languages) and the coordinates will only be updated if they are non-empty * strings. */ - public synchronized URI updateElement(KeywordBean keyword, boolean replace) throws AccessDeniedException, IOException, - MalformedQueryException, QueryEvaluationException, GraphException { + public synchronized URI updateElement(KeywordBean keyword, boolean replace) throws AccessDeniedException { THESAURUS_SEARCH_CACHE.invalidateAll(); // Get thesaurus graph @@ -661,7 +662,7 @@ public Thesaurus updateCode(KeywordBean bean, String newcode) throws AccessDenie * Update concept code by creating URI from namespace and code. This is recommended when * thesaurus concept identifiers contains # eg. http://vocab.nerc.ac.uk/collection/P07/current#CFV13N44 */ - public synchronized Thesaurus updateCode(String namespace, String oldcode, String newcode) throws AccessDeniedException, IOException { + public synchronized Thesaurus updateCode(String namespace, String oldcode, String newcode) throws AccessDeniedException { Graph myGraph = repository.getGraph(); ValueFactory myFactory = myGraph.getValueFactory(); @@ -679,7 +680,7 @@ public synchronized Thesaurus updateCode(String namespace, String oldcode, Strin * * eg. http://vocab.nerc.ac.uk/collection/P07/current/CFV13N44/ */ - public synchronized Thesaurus updateCodeByURI(String olduri, String newuri) throws AccessDeniedException, IOException { + public synchronized Thesaurus updateCodeByURI(String olduri, String newuri) throws AccessDeniedException { Graph myGraph = repository.getGraph(); ValueFactory myFactory = myGraph.getValueFactory(); @@ -894,7 +895,11 @@ private void retrieveDublinCore(Element thesaurusEl) { // } private void retrieveMultiLingualTitles(Element thesaurusEl) { try { - String xpathTitles = "skos:ConceptScheme/dc:title[@xml:lang]|skos:ConceptScheme/dcterms:title[@xml:lang]|rdf:Description[rdf:type/@rdf:resource = 'http://www.w3.org/2004/02/skos/core#ConceptScheme']/dc:title[@xml:lang]"; + String xpathTitles = "skos:ConceptScheme/dc:title[@xml:lang]" + + "|skos:ConceptScheme/dcterms:title[@xml:lang]" + + "|skos:ConceptScheme/rdfs:label[@xml:lang]" + + "|skos:ConceptScheme/skos:prefLabel[@xml:lang]" + + "|rdf:Description[rdf:type/@rdf:resource = 'http://www.w3.org/2004/02/skos/core#ConceptScheme']/dc:title[@xml:lang]"; multilingualTitles.clear(); multilingualTitles.putAll(retrieveMultilingualField(thesaurusEl, xpathTitles)); } catch (Exception e) { @@ -944,25 +949,23 @@ private void retrieveThesaurusInformation(Path thesaurusFile, String defaultTitl try { Element thesaurusEl = Xml.loadFile(thesaurusFile); - List theNSs = new ArrayList<>(); - Namespace rdfNamespace = Namespace.getNamespace("rdf", RDF_NAMESPACE); - theNSs.add(rdfNamespace); - theNSs.add(Namespace.getNamespace("skos", SKOS_NAMESPACE)); - theNSs.add(Namespace.getNamespace("dc", DC_NAMESPACE)); - theNSs.add(Namespace.getNamespace("dcterms", DCTERMS_NAMESPACE)); + List theNSs = getThesaurusNamespaces(); this.defaultNamespace = null; retrieveMultiLingualTitles(thesaurusEl); retrieveDublinCore(thesaurusEl); Element titleEl = Xml.selectElement(thesaurusEl, - "skos:ConceptScheme/dc:title|skos:ConceptScheme/dcterms:title|" + - "skos:Collection/dc:title|skos:Collection/dcterms:title|" + - "rdf:Description/dc:title|rdf:Description/dcterms:title", theNSs); + "skos:ConceptScheme/dc:title|skos:ConceptScheme/dcterms:title" + + "|skos:ConceptScheme/rdfs:label|skos:ConceptScheme/skos:prefLabel" + + "|skos:Collection/dc:title|skos:Collection/dcterms:title" + + "|rdf:Description/dc:title|rdf:Description/dcterms:title", getThesaurusNamespaces()); if (titleEl != null) { this.title = titleEl.getValue(); - this.defaultNamespace = titleEl.getParentElement().getAttributeValue("about", rdfNamespace); + this.defaultNamespace = titleEl + .getParentElement() + .getAttributeValue("about", Namespace.getNamespace("rdf", RDF_NAMESPACE)); } else { this.title = defaultTitle; this.defaultNamespace = DEFAULT_THESAURUS_NAMESPACE; @@ -1027,11 +1030,13 @@ private void retrieveThesaurusInformation(Path thesaurusFile, String defaultTitl } if (Log.isDebugEnabled(Geonet.THESAURUS_MAN)) { - Log.debug(Geonet.THESAURUS_MAN, "Thesaurus information: " + this.title + " (" + this.date + ")"); + Log.debug(Geonet.THESAURUS_MAN, String.format( + "Thesaurus information: %s (%s)", this.title, this.date)); } } catch (Exception ex) { if (!ignoreMissingError) - Log.error(Geonet.THESAURUS_MAN, "Error getting thesaurus info for " + thesaurusFile + ". Error is: " + ex.getMessage()); + Log.error(Geonet.THESAURUS_MAN, String.format( + "Error getting thesaurus info for %s. Error is: %s", thesaurusFile, ex.getMessage())); } } @@ -1102,8 +1107,7 @@ public IsoLanguagesMapper getIsoLanguageMapper() { * @param subject the keyword that is related to the other keyword * @param related the relation between the two keywords */ - public synchronized void addRelation(String subject, KeywordRelation related, String relatedSubject) throws AccessDeniedException, IOException, - MalformedQueryException, QueryEvaluationException, GraphException { + public synchronized void addRelation(String subject, KeywordRelation related, String relatedSubject) throws AccessDeniedException { THESAURUS_SEARCH_CACHE.invalidateAll(); Graph myGraph = repository.getGraph(); @@ -1126,7 +1130,7 @@ public synchronized void addRelation(String subject, KeywordRelation related, St * @return keyword */ public KeywordBean getKeyword(String uri, String... languages) { - String cacheKey = "getKeyword" + uri + Arrays.stream(languages).collect(Collectors.joining("")); + String cacheKey = "getKeyword" + uri + String.join("", languages); Object cacheValue = THESAURUS_SEARCH_CACHE.getIfPresent(cacheKey); if (cacheValue != null) { return (KeywordBean) cacheValue; @@ -1370,6 +1374,7 @@ private ArrayList classifyTermWithNoBroaderTerms(KeywordBean term) private List getThesaurusNamespaces() { List theNSs = new ArrayList<>(); theNSs.add(Namespace.getNamespace("rdf", RDF_NAMESPACE)); + theNSs.add(Namespace.getNamespace("rdfs", RDF_SCHEMA_NAMESPACE)); theNSs.add(Namespace.getNamespace("skos", SKOS_NAMESPACE)); theNSs.add(Namespace.getNamespace("dc", DC_NAMESPACE)); theNSs.add(Namespace.getNamespace("dcterms", DCTERMS_NAMESPACE)); diff --git a/services/src/main/java/org/fao/geonet/api/records/formatters/FormatterApi.java b/services/src/main/java/org/fao/geonet/api/records/formatters/FormatterApi.java index 4a09f7162c7..1eda106b4e6 100644 --- a/services/src/main/java/org/fao/geonet/api/records/formatters/FormatterApi.java +++ b/services/src/main/java/org/fao/geonet/api/records/formatters/FormatterApi.java @@ -229,7 +229,7 @@ public void getRecordFormattedBy( // if text/html > xsl_view // if application/pdf > xsl_view and PDF output // if application/x-gn-+(xml|html|pdf|text) - // Force PDF ouutput when URL parameter is set. + // Force PDF output when URL parameter is set. // This is useful when making GET link to PDF which // can not use headers. if (MediaType.ALL_VALUE.equals(acceptHeader)) {