Skip to content

Commit

Permalink
Thesaurus / Improve support of EU publication office SKOS format
Browse files Browse the repository at this point in the history
When loading thesaurus downloadable from the EU publication office, title and namespace of the thesaurus are not extracted properly.

The SKOS format provided contains specificity

eg. https://op.europa.eu/en/web/eu-vocabularies/dataset/-/resource?uri=http://publications.europa.eu/resource/dataset/data-theme

Thesaurus title is stored in various properties ie. `at:prefLabel`, `rdfs:label`, `skos:prefLabel` but none of them were used so far for title extraction.
```xml
   <skos:ConceptScheme rdf:about="http://publications.europa.eu/resource/authority/data-theme"
                       at:table.id="data-theme"
                       at:table.version.number="20220715-0">
      <at:prefLabel xml:lang="en">Data theme</at:prefLabel>
      <rdfs:label xml:lang="en">Data theme</rdfs:label>
      <owl:versionInfo>20220715-0</owl:versionInfo>
      <skos:prefLabel xml:lang="en">Data theme</skos:prefLabel>
   </skos:ConceptScheme>
```

This change add them to the XPath.

Also fix some sonar lint items.

Funded by Wallonia region (SPW)
  • Loading branch information
fxprunayre committed Jan 30, 2024
1 parent 988355d commit 9c048a0
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 39 deletions.
22 changes: 8 additions & 14 deletions core/src/main/java/org/fao/geonet/kernel/AllThesaurus.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,13 @@
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import org.locationtech.jts.util.Assert;

import org.fao.geonet.Constants;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.exceptions.TermNotFoundException;
import org.fao.geonet.kernel.search.keyword.KeywordRelation;
import org.fao.geonet.languages.IsoLanguagesMapper;
import org.fao.geonet.utils.Log;
import org.locationtech.jts.util.Assert;
import org.openrdf.model.GraphException;
import org.openrdf.model.URI;
import org.openrdf.sesame.config.AccessDeniedException;
Expand All @@ -46,6 +44,8 @@
import org.openrdf.sesame.repository.local.LocalRepository;
import org.springframework.beans.factory.annotation.Autowired;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
Expand All @@ -59,9 +59,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

/**
* @author Jesse on 2/27/2015.
*/
Expand Down Expand Up @@ -221,8 +218,7 @@ public synchronized URI addElement(KeywordBean keyword) throws IOException, Acce
}

@Override
public synchronized Thesaurus removeElement(KeywordBean keyword) throws MalformedQueryException, QueryEvaluationException,
IOException, AccessDeniedException {
public synchronized Thesaurus removeElement(KeywordBean keyword) throws AccessDeniedException {
throw new UnsupportedOperationException();
}

Expand All @@ -237,8 +233,7 @@ public synchronized Thesaurus removeElement(String uri) throws AccessDeniedExcep
}

@Override
public synchronized URI updateElement(KeywordBean keyword, boolean replace) throws AccessDeniedException, IOException,
MalformedQueryException, QueryEvaluationException, GraphException {
public synchronized URI updateElement(KeywordBean keyword, boolean replace) throws AccessDeniedException {
throw new UnsupportedOperationException();
}

Expand Down Expand Up @@ -266,12 +261,12 @@ public Thesaurus updateCode(KeywordBean bean, String newcode) throws AccessDenie
}

@Override
public synchronized Thesaurus updateCode(String namespace, String oldcode, String newcode) throws AccessDeniedException, IOException {
public synchronized Thesaurus updateCode(String namespace, String oldcode, String newcode) throws AccessDeniedException {
throw new UnsupportedOperationException();
}

@Override
public synchronized Thesaurus updateCodeByURI(String olduri, String newuri) throws AccessDeniedException, IOException {
public synchronized Thesaurus updateCodeByURI(String olduri, String newuri) throws AccessDeniedException {
throw new UnsupportedOperationException();
}

Expand All @@ -287,8 +282,7 @@ public IsoLanguagesMapper getIsoLanguageMapper() {
}

@Override
public synchronized void addRelation(String subject, KeywordRelation related, String relatedSubject) throws AccessDeniedException,
IOException, MalformedQueryException, QueryEvaluationException, GraphException {
public synchronized void addRelation(String subject, KeywordRelation related, String relatedSubject) throws AccessDeniedException {
throw new UnsupportedOperationException();
}

Expand Down
53 changes: 29 additions & 24 deletions core/src/main/java/org/fao/geonet/kernel/Thesaurus.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ public class Thesaurus {
private static final String DEFAULT_THESAURUS_NAMESPACE = "http://custom.shared.obj.ch/concept#";

private static final String RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
private static final String RDF_SCHEMA_NAMESPACE = "http://www.w3.org/2000/01/rdf-schema#";

private static final String SKOS_NAMESPACE = "http://www.w3.org/2004/02/skos/core#";

Expand Down Expand Up @@ -360,7 +361,8 @@ public boolean hasConceptScheme(String uri) {
try {
return performRequest(query).getRowCount() > 0;
} catch (Exception e) {
Log.error(Geonet.THESAURUS_MAN, "Error retrieving concept scheme for " + thesaurusFile + ". Error is: " + e.getMessage());
Log.error(Geonet.THESAURUS_MAN,
String.format("Error retrieving concept scheme for %s. Error is: %s", thesaurusFile, e.getMessage()));
throw new RuntimeException(e);
}
}
Expand All @@ -380,7 +382,8 @@ public List<String> getConceptSchemes() {
}
return ret;
} catch (Exception e) {
Log.error(Geonet.THESAURUS_MAN, "Error retrieving concept schemes for " + thesaurusFile + ". Error is: " + e.getMessage());
Log.error(Geonet.THESAURUS_MAN, String.format(
"Error retrieving concept schemes for %s. Error is: %s", thesaurusFile, e.getMessage()));
return Collections.emptyList();
}
}
Expand Down Expand Up @@ -452,8 +455,7 @@ public synchronized URI addElement(KeywordBean keyword) throws IOException, Acce
/**
* Remove keyword from thesaurus.
*/
public synchronized Thesaurus removeElement(KeywordBean keyword) throws MalformedQueryException,
QueryEvaluationException, IOException, AccessDeniedException {
public synchronized Thesaurus removeElement(KeywordBean keyword) throws AccessDeniedException {
String namespace = keyword.getNameSpaceCode();
String code = keyword.getRelativeCode();

Expand Down Expand Up @@ -518,8 +520,7 @@ private String toiso639_1_Lang(String lang) {
* languages) and the coordinates will only be updated if they are non-empty
* strings.
*/
public synchronized URI updateElement(KeywordBean keyword, boolean replace) throws AccessDeniedException, IOException,
MalformedQueryException, QueryEvaluationException, GraphException {
public synchronized URI updateElement(KeywordBean keyword, boolean replace) throws AccessDeniedException {
THESAURUS_SEARCH_CACHE.invalidateAll();

// Get thesaurus graph
Expand Down Expand Up @@ -661,7 +662,7 @@ public Thesaurus updateCode(KeywordBean bean, String newcode) throws AccessDenie
* Update concept code by creating URI from namespace and code. This is recommended when
* thesaurus concept identifiers contains # eg. http://vocab.nerc.ac.uk/collection/P07/current#CFV13N44
*/
public synchronized Thesaurus updateCode(String namespace, String oldcode, String newcode) throws AccessDeniedException, IOException {
public synchronized Thesaurus updateCode(String namespace, String oldcode, String newcode) throws AccessDeniedException {
Graph myGraph = repository.getGraph();

ValueFactory myFactory = myGraph.getValueFactory();
Expand All @@ -679,7 +680,7 @@ public synchronized Thesaurus updateCode(String namespace, String oldcode, Strin
*
* eg. http://vocab.nerc.ac.uk/collection/P07/current/CFV13N44/
*/
public synchronized Thesaurus updateCodeByURI(String olduri, String newuri) throws AccessDeniedException, IOException {
public synchronized Thesaurus updateCodeByURI(String olduri, String newuri) throws AccessDeniedException {
Graph myGraph = repository.getGraph();

ValueFactory myFactory = myGraph.getValueFactory();
Expand Down Expand Up @@ -894,7 +895,11 @@ private void retrieveDublinCore(Element thesaurusEl) {
// }
private void retrieveMultiLingualTitles(Element thesaurusEl) {
try {
String xpathTitles = "skos:ConceptScheme/dc:title[@xml:lang]|skos:ConceptScheme/dcterms:title[@xml:lang]|rdf:Description[rdf:type/@rdf:resource = 'http://www.w3.org/2004/02/skos/core#ConceptScheme']/dc:title[@xml:lang]";
String xpathTitles = "skos:ConceptScheme/dc:title[@xml:lang]" +
"|skos:ConceptScheme/dcterms:title[@xml:lang]" +
"|skos:ConceptScheme/rdfs:label[@xml:lang]" +
"|skos:ConceptScheme/skos:prefLabel[@xml:lang]" +
"|rdf:Description[rdf:type/@rdf:resource = 'http://www.w3.org/2004/02/skos/core#ConceptScheme']/dc:title[@xml:lang]";
multilingualTitles.clear();
multilingualTitles.putAll(retrieveMultilingualField(thesaurusEl, xpathTitles));
} catch (Exception e) {
Expand Down Expand Up @@ -944,25 +949,23 @@ private void retrieveThesaurusInformation(Path thesaurusFile, String defaultTitl
try {
Element thesaurusEl = Xml.loadFile(thesaurusFile);

List<Namespace> theNSs = new ArrayList<>();
Namespace rdfNamespace = Namespace.getNamespace("rdf", RDF_NAMESPACE);
theNSs.add(rdfNamespace);
theNSs.add(Namespace.getNamespace("skos", SKOS_NAMESPACE));
theNSs.add(Namespace.getNamespace("dc", DC_NAMESPACE));
theNSs.add(Namespace.getNamespace("dcterms", DCTERMS_NAMESPACE));
List<Namespace> theNSs = getThesaurusNamespaces();

this.defaultNamespace = null;
retrieveMultiLingualTitles(thesaurusEl);
retrieveDublinCore(thesaurusEl);

Element titleEl = Xml.selectElement(thesaurusEl,
"skos:ConceptScheme/dc:title|skos:ConceptScheme/dcterms:title|" +
"skos:Collection/dc:title|skos:Collection/dcterms:title|" +
"rdf:Description/dc:title|rdf:Description/dcterms:title", theNSs);
"skos:ConceptScheme/dc:title|skos:ConceptScheme/dcterms:title" +
"|skos:ConceptScheme/rdfs:label|skos:ConceptScheme/skos:prefLabel" +
"|skos:Collection/dc:title|skos:Collection/dcterms:title" +
"|rdf:Description/dc:title|rdf:Description/dcterms:title", getThesaurusNamespaces());

if (titleEl != null) {
this.title = titleEl.getValue();
this.defaultNamespace = titleEl.getParentElement().getAttributeValue("about", rdfNamespace);
this.defaultNamespace = titleEl
.getParentElement()
.getAttributeValue("about", Namespace.getNamespace("rdf", RDF_NAMESPACE));
} else {
this.title = defaultTitle;
this.defaultNamespace = DEFAULT_THESAURUS_NAMESPACE;
Expand Down Expand Up @@ -1027,11 +1030,13 @@ private void retrieveThesaurusInformation(Path thesaurusFile, String defaultTitl
}

if (Log.isDebugEnabled(Geonet.THESAURUS_MAN)) {
Log.debug(Geonet.THESAURUS_MAN, "Thesaurus information: " + this.title + " (" + this.date + ")");
Log.debug(Geonet.THESAURUS_MAN, String.format(
"Thesaurus information: %s (%s)", this.title, this.date));
}
} catch (Exception ex) {
if (!ignoreMissingError)
Log.error(Geonet.THESAURUS_MAN, "Error getting thesaurus info for " + thesaurusFile + ". Error is: " + ex.getMessage());
Log.error(Geonet.THESAURUS_MAN, String.format(
"Error getting thesaurus info for %s. Error is: %s", thesaurusFile, ex.getMessage()));
}
}

Expand Down Expand Up @@ -1102,8 +1107,7 @@ public IsoLanguagesMapper getIsoLanguageMapper() {
* @param subject the keyword that is related to the other keyword
* @param related the relation between the two keywords
*/
public synchronized void addRelation(String subject, KeywordRelation related, String relatedSubject) throws AccessDeniedException, IOException,
MalformedQueryException, QueryEvaluationException, GraphException {
public synchronized void addRelation(String subject, KeywordRelation related, String relatedSubject) throws AccessDeniedException {
THESAURUS_SEARCH_CACHE.invalidateAll();

Graph myGraph = repository.getGraph();
Expand All @@ -1126,7 +1130,7 @@ public synchronized void addRelation(String subject, KeywordRelation related, St
* @return keyword
*/
public KeywordBean getKeyword(String uri, String... languages) {
String cacheKey = "getKeyword" + uri + Arrays.stream(languages).collect(Collectors.joining(""));
String cacheKey = "getKeyword" + uri + String.join("", languages);
Object cacheValue = THESAURUS_SEARCH_CACHE.getIfPresent(cacheKey);
if (cacheValue != null) {
return (KeywordBean) cacheValue;
Expand Down Expand Up @@ -1370,6 +1374,7 @@ private ArrayList <KeywordBean> classifyTermWithNoBroaderTerms(KeywordBean term)
private List<Namespace> getThesaurusNamespaces() {
List<Namespace> theNSs = new ArrayList<>();
theNSs.add(Namespace.getNamespace("rdf", RDF_NAMESPACE));
theNSs.add(Namespace.getNamespace("rdfs", RDF_SCHEMA_NAMESPACE));
theNSs.add(Namespace.getNamespace("skos", SKOS_NAMESPACE));
theNSs.add(Namespace.getNamespace("dc", DC_NAMESPACE));
theNSs.add(Namespace.getNamespace("dcterms", DCTERMS_NAMESPACE));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ public void getRecordFormattedBy(
// if text/html > xsl_view
// if application/pdf > xsl_view and PDF output
// if application/x-gn-<formatterId>+(xml|html|pdf|text)
// Force PDF ouutput when URL parameter is set.
// Force PDF output when URL parameter is set.
// This is useful when making GET link to PDF which
// can not use headers.
if (MediaType.ALL_VALUE.equals(acceptHeader)) {
Expand Down

0 comments on commit 9c048a0

Please sign in to comment.