Merge branch 'development' into lukaskabc/Enhancement-520

# Conflicts: # src/main/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDao.java # src/test/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDaoTest.java # src/test/java/cz/cvut/kbss/termit/rest/VocabularyControllerTest.java
kbss-cvut · Nov 24, 2024 · ffad638 · ffad638
2 parents ab07485 + 8315b90
commit ffad638
Show file tree

Hide file tree

Showing 32 changed files with 695 additions and 228 deletions.
diff --git a/doc/setup.md b/doc/setup.md
@@ -39,6 +39,7 @@ by the application:
 * `lucene` - decides whether Lucene text indexing is enabled and should be used in full text search queries.
 * `admin-registration-only` - decides whether new users can be registered only by application admin, or whether anyone can register.
 * `no-cache` - disables Ehcache, which is used to cache lists of resources and vocabularies for faster retrieval, and persistence cache.
+* `development` - indicates that the application is running is development. This, for example, means that mail server does not need to be configured.
 
 The `lucene` Spring profile is activated automatically by the `graphdb` Maven. `admin-registration-only` and `no-cache` have to be added
 either in `application.yml` directly, or one can pass the parameter to Maven build, e.g.:

diff --git a/src/main/java/cz/cvut/kbss/termit/exception/UnsupportedTextAnalysisLanguageException.java b/src/main/java/cz/cvut/kbss/termit/exception/UnsupportedTextAnalysisLanguageException.java
@@ -0,0 +1,14 @@
+package cz.cvut.kbss.termit.exception;
+
+import cz.cvut.kbss.termit.model.Asset;
+import cz.cvut.kbss.termit.model.resource.File;
+
+/**
+ * Indicates that a language is not supported by the text analysis service.
+ */
+public class UnsupportedTextAnalysisLanguageException extends TermItException {
+
+    public UnsupportedTextAnalysisLanguageException(String message, Asset<?> asset) {
+        super(message, asset instanceof File ? "error.annotation.file.unsupportedLanguage" : "error.annotation.term.unsupportedLanguage");
+    }
+}
diff --git a/src/main/java/cz/cvut/kbss/termit/model/TextAnalysisRecord.java b/src/main/java/cz/cvut/kbss/termit/model/TextAnalysisRecord.java
@@ -17,10 +17,12 @@
  */
 package cz.cvut.kbss.termit.model;
 
+import cz.cvut.kbss.jopa.model.annotations.OWLAnnotationProperty;
 import cz.cvut.kbss.jopa.model.annotations.OWLClass;
 import cz.cvut.kbss.jopa.model.annotations.OWLDataProperty;
 import cz.cvut.kbss.jopa.model.annotations.OWLObjectProperty;
 import cz.cvut.kbss.jopa.model.annotations.ParticipationConstraints;
+import cz.cvut.kbss.jopa.vocabulary.DC;
 import cz.cvut.kbss.termit.model.resource.Resource;
 import cz.cvut.kbss.termit.util.Vocabulary;
 
@@ -44,12 +46,16 @@ public class TextAnalysisRecord extends AbstractEntity {
     @OWLObjectProperty(iri = Vocabulary.s_p_ma_slovnik_pro_analyzu)
     private Set<URI> vocabularies;
 
+    @OWLAnnotationProperty(iri = DC.Terms.LANGUAGE, simpleLiteral = true)
+    private String language;
+
     public TextAnalysisRecord() {
     }
 
-    public TextAnalysisRecord(Instant date, Resource analyzedResource) {
+    public TextAnalysisRecord(Instant date, Resource analyzedResource, String language) {
         this.date = date;
         this.analyzedResource = analyzedResource;
+        this.language = language;
     }
 
     public Instant getDate() {
@@ -76,6 +82,14 @@ public void setVocabularies(Set<URI> vocabularies) {
         this.vocabularies = vocabularies;
     }
 
+    public String getLanguage() {
+        return language;
+    }
+
+    public void setLanguage(String language) {
+        this.language = language;
+    }
+
     @Override
     public boolean equals(Object o) {
         if (this == o) {
@@ -86,12 +100,13 @@ public boolean equals(Object o) {
         }
         return Objects.equals(date, that.date) &&
                 Objects.equals(analyzedResource, that.analyzedResource) &&
-                Objects.equals(vocabularies, that.vocabularies);
+                Objects.equals(vocabularies, that.vocabularies) &&
+                Objects.equals(language, that.language);
     }
 
     @Override
     public int hashCode() {
-        return Objects.hash(date, analyzedResource, vocabularies);
+        return Objects.hash(date, analyzedResource, vocabularies, language);
     }
 
     @Override
@@ -100,6 +115,7 @@ public String toString() {
                 "date=" + date +
                 ",analyzedResource=" + analyzedResource +
                 ",vocabularies=" + vocabularies +
+                ", language=" + language +
                 "}";
     }
 }
diff --git a/src/main/java/cz/cvut/kbss/termit/model/resource/File.java b/src/main/java/cz/cvut/kbss/termit/model/resource/File.java
@@ -21,16 +21,16 @@
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import cz.cvut.kbss.jopa.model.annotations.FetchType;
 import cz.cvut.kbss.jopa.model.annotations.Inferred;
+import cz.cvut.kbss.jopa.model.annotations.OWLAnnotationProperty;
 import cz.cvut.kbss.jopa.model.annotations.OWLClass;
 import cz.cvut.kbss.jopa.model.annotations.OWLObjectProperty;
 import cz.cvut.kbss.jopa.model.annotations.Types;
+import cz.cvut.kbss.jopa.vocabulary.DC;
 import cz.cvut.kbss.jsonld.annotation.JsonLdAttributeOrder;
-import cz.cvut.kbss.termit.exception.TermItException;
 import cz.cvut.kbss.termit.model.util.SupportsStorage;
 import cz.cvut.kbss.termit.service.IdentifierResolver;
 import cz.cvut.kbss.termit.util.Vocabulary;
 
-import java.lang.reflect.Field;
 import java.util.Objects;
 import java.util.Set;
 
@@ -43,6 +43,9 @@ public class File extends Resource implements SupportsStorage {
     @OWLObjectProperty(iri = Vocabulary.s_p_je_casti_dokumentu, fetch = FetchType.EAGER)
     private Document document;
 
+    @OWLAnnotationProperty(iri = DC.Terms.LANGUAGE, simpleLiteral = true)
+    private String language;
+
     @Types
     private Set<String> types;
 
@@ -54,6 +57,14 @@ public void setDocument(Document document) {
         this.document = document;
     }
 
+    public String getLanguage() {
+        return language;
+    }
+
+    public void setLanguage(String language) {
+        this.language = language;
+    }
+
     public Set<String> getTypes() {
         return types;
     }
@@ -73,15 +84,11 @@ public boolean equals(Object o) {
         return Objects.equals(getUri(), file.getUri());
     }
 
-    @Override
-    public int hashCode() {
-        return Objects.hash(getUri());
-    }
-
     @Override
     public String toString() {
         return "File{" +
-                super.toString() + (document != null ? "document=<" + document.getUri() + ">" : "") + '}';
+                super.toString() + (language != null ? "@" + language : "") +
+                (document != null ? "document=<" + document.getUri() + ">" : "") + '}';
     }
 
     /**
@@ -109,12 +116,4 @@ public String getDirectoryName() {
             return IdentifierResolver.normalizeToAscii(labelPart) + '_' + getUri().hashCode();
         }
     }
-
-    public static Field getDocumentField() {
-        try {
-            return File.class.getDeclaredField("document");
-        } catch (NoSuchFieldException e) {
-            throw new TermItException("Fatal error! Unable to retrieve \"document\" field.", e);
-        }
-    }
 }
diff --git a/src/main/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDao.java b/src/main/java/cz/cvut/kbss/termit/persistence/dao/VocabularyDao.java
@@ -49,7 +49,7 @@
 import cz.cvut.kbss.termit.service.snapshot.SnapshotProvider;
 import cz.cvut.kbss.termit.util.Configuration;
 import cz.cvut.kbss.termit.util.Utils;
-import cz.cvut.kbss.termit.util.throttle.CacheableFuture;
+import cz.cvut.kbss.termit.util.throttle.ThrottledFuture;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.beans.factory.annotation.Autowired;
@@ -223,10 +223,13 @@ public Vocabulary update(Vocabulary entity) {
     /**
      * Forcefully removes the specified vocabulary.
      * <p>
-     * This deletes the whole graph of the vocabulary, all terms in the vocabulary's glossary and then removes the vocabulary itself. Extreme caution
-     * should be exercised when using this method. All relevant data, including documents and files, will be dropped.
+     * This deletes the whole graph of the vocabulary, all terms in the vocabulary's glossary and then removes the
+     * vocabulary itself. Extreme caution should be exercised when using this method. All relevant data, including
+     * documents and files, will be dropped.
      * <p>
-     * Publishes {@link VocabularyWillBeRemovedEvent} before the actual removal to allow other services to clean up related resources (e.g., delete the document).
+     * Publishes {@link VocabularyWillBeRemovedEvent} before the actual removal to allow other services to clean up
+     * related resources (e.g., delete the document).
+     *
      * @param entity The vocabulary to delete
      */
     @ModifiesData
@@ -242,9 +245,9 @@ public void remove(Vocabulary entity) {
      * <p>
      * Forcefully removes the specified vocabulary.
      * <p>
-     * This deletes all terms in the vocabulary's glossary and then removes the vocabulary itself.
-     * Extreme caution should be exercised when using this method,
-     * as it does not check for any references or usage and just drops all the relevant data.
+     * This deletes all terms in the vocabulary's glossary and then removes the vocabulary itself. Extreme caution
+     * should be exercised when using this method, as it does not check for any references or usage and just drops all
+     * the relevant data.
      * <p>
      * The document is not removed.
      */
@@ -254,27 +257,27 @@ public void removeVocabularyKeepDocument(Vocabulary entity) {
 
     /**
      * <p>
-     * Does not publish the {@link VocabularyWillBeRemovedEvent}.<br>
-     * You should use {@link #remove(Vocabulary)} instead.
+     * Does not publish the {@link VocabularyWillBeRemovedEvent}.<br> You should use {@link #remove(Vocabulary)}
+     * instead.
      * <p>
      * Forcefully removes the specified vocabulary.
      * <p>
      * This deletes all terms in the vocabulary's glossary and then removes the vocabulary itself. Extreme caution
      * should be exercised when using this method, as it does not check for any references or usage and just drops all
      * the relevant data.
-     * @param entity The vocabulary to delete
-     * @param dropGraph if false,
-     *                  executes {@code  src/main/resources/query/remove/removeGlossaryTerms.ru} removing terms,
-     *                  their relations, model, glossary and vocabulary itself, keeps the document.
-     *                  When true, the whole vocabulary graph is dropped.
+     *
+     * @param entity    The vocabulary to delete
+     * @param dropGraph if false, executes {@code  src/main/resources/query/remove/removeGlossaryTerms.ru} removing
+     *                  terms, their relations, model, glossary and vocabulary itself, keeps the document. When true,
+     *                  the whole vocabulary graph is dropped.
      */
     private void removeVocabulary(Vocabulary entity, boolean dropGraph) {
         Objects.requireNonNull(entity);
         LOG.debug("Forcefully removing vocabulary {} and all its contents.", entity);
         try {
             final URI vocabularyContext = contextMapper.getVocabularyContext(entity.getUri());
 
-            if(dropGraph) {
+            if (dropGraph) {
                 // drops whole named graph
                 em.createNativeQuery("DROP GRAPH ?context")
                   .setParameter("context", vocabularyContext)
@@ -323,8 +326,8 @@ public Optional<Glossary> findGlossary(URI uri) {
     }
 
     /**
-     * Checks whether terms from the {@code subjectVocabulary} reference (as parent terms) any terms from the {@code
-     * targetVocabulary}.
+     * Checks whether terms from the {@code subjectVocabulary} reference (as parent terms) any terms from the
+     * {@code targetVocabulary}.
      *
      * @param subjectVocabulary Subject vocabulary identifier
      * @param targetVocabulary  Target vocabulary identifier
@@ -367,7 +370,7 @@ public void refreshLastModified(RefreshLastModifiedEvent event) {
     }
 
     @Transactional
-    public CacheableFuture<Collection<ValidationResult>> validateContents(URI vocabulary) {
+    public ThrottledFuture<Collection<ValidationResult>> validateContents(URI vocabulary) {
         final VocabularyContentValidator validator = context.getBean(VocabularyContentValidator.class);
         final Collection<URI> importClosure = getTransitivelyImportedVocabularies(vocabulary);
         importClosure.add(vocabulary);
@@ -403,7 +406,7 @@ public List<AggregatedChangeInfo> getChangesOfContent(Vocabulary vocabulary) {
      * Gets content change records of the specified vocabulary.
      *
      * @param vocabulary Vocabulary whose content changes to get
-     * @param pageReq Specification of the size and number of the page to return
+     * @param pageReq    Specification of the size and number of the page to return
      * @return List of change records, ordered by date in descending order
      */
     public List<AbstractChangeRecord> getDetailedHistoryOfContent(Vocabulary vocabulary, ChangeRecordFilterDto filter, Pageable pageReq) {
@@ -561,16 +564,17 @@ public List<RdfsStatement> getVocabularyRelations(Vocabulary vocabulary, Collect
 
         try {
             return em.createNativeQuery("""
-                             SELECT DISTINCT ?object ?relation ?subject {
-                                 ?object a ?vocabularyType ;
-                                    ?relation ?subject . 
-                                 FILTER(?object != ?subject) .
-                                 FILTER(?relation NOT IN (?excluded)) .
-                             } ORDER BY ?object ?relation
-                             """, "RDFStatement")
+                                                SELECT DISTINCT ?object ?relation ?subject {
+                                                    ?object a ?vocabularyType ;
+                                                       ?relation ?subject .
+                                                    FILTER(?object != ?subject) .
+                                                    FILTER(?relation NOT IN (?excluded)) .
+                                                } ORDER BY ?object ?relation
+                                                """, "RDFStatement")
                      .setParameter("subject", vocabularyUri)
-                    .setParameter("excluded", excludedRelations)
-                     .setParameter("vocabularyType", URI.create(EntityToOwlClassMapper.getOwlClassForEntity(Vocabulary.class)))
+                     .setParameter("excluded", excludedRelations)
+                     .setParameter("vocabularyType",
+                                   URI.create(EntityToOwlClassMapper.getOwlClassForEntity(Vocabulary.class)))
                      .getResultList();
         } catch (RuntimeException e) {
             throw new PersistenceException(e);
@@ -588,31 +592,31 @@ public List<RdfsStatement> getTermRelations(Vocabulary vocabulary) {
 
         try {
             return em.createNativeQuery("""
-                             SELECT DISTINCT ?object ?relation ?subject WHERE {
-                                     ?term a ?termType;
-                                         ?inVocabulary ?vocabulary .
-
-                                     {
-                                        ?term ?relation ?secondTerm .
-                                        ?secondTerm a ?termType;
-                                            ?inVocabulary ?secondVocabulary .
-                                            
-                                        BIND(?term as ?object)
-                                        BIND(?secondTerm as ?subject)
-                                     } UNION {
-                                        ?secondTerm ?relation ?term .
-                                        ?secondTerm a ?termType;
-                                            ?inVocabulary ?secondVocabulary .
-
-                                        BIND(?secondTerm as ?object)
-                                        BIND(?term as ?subject)
-                                     }
-
-                                     FILTER(?relation IN (?deniedRelations))
-                                     FILTER(?object != ?subject)
-                                     FILTER(?secondVocabulary != ?vocabulary)
-                             } ORDER by ?object ?relation ?subject
-                             """, "RDFStatement"
+                                                SELECT DISTINCT ?object ?relation ?subject WHERE {
+                                                        ?term a ?termType;
+                                                            ?inVocabulary ?vocabulary .
+
+                                                        {
+                                                           ?term ?relation ?secondTerm .
+                                                           ?secondTerm a ?termType;
+                                                               ?inVocabulary ?secondVocabulary .
+
+                                                           BIND(?term as ?object)
+                                                           BIND(?secondTerm as ?subject)
+                                                        } UNION {
+                                                           ?secondTerm ?relation ?term .
+                                                           ?secondTerm a ?termType;
+                                                               ?inVocabulary ?secondVocabulary .
+
+                                                           BIND(?secondTerm as ?object)
+                                                           BIND(?term as ?subject)
+                                                        }
+
+                                                        FILTER(?relation IN (?deniedRelations))
+                                                        FILTER(?object != ?subject)
+                                                        FILTER(?secondVocabulary != ?vocabulary)
+                                                } ORDER by ?object ?relation ?subject
+                                                """, "RDFStatement"
                      ).setMaxResults(DEFAULT_PAGE_SIZE)
                      .setParameter("termType", termType)
                      .setParameter("inVocabulary", inVocabulary)
@@ -623,4 +627,32 @@ public List<RdfsStatement> getTermRelations(Vocabulary vocabulary) {
             throw new PersistenceException(e);
         }
     }
+
+    /**
+     * Returns the list of all distinct languages (language tags) used by terms in the specified vocabulary.
+     *
+     * @param vocabularyUri Vocabulary identifier
+     * @return List of distinct languages
+     */
+    public List<String> getLanguages(URI vocabularyUri) {
+        Objects.requireNonNull(vocabularyUri);
+        try {
+            return em.createNativeQuery("""
+                                                SELECT DISTINCT ?lang WHERE {
+                                                    ?x a ?type ;
+                                                    ?inVocabulary ?vocabulary ;
+                                                    ?labelProp ?label .
+                                                    BIND (LANG(?label) as ?lang)
+                                                }
+                                                """, String.class)
+                     .setParameter("type", URI.create(SKOS.CONCEPT))
+                     .setParameter("inVocabulary",
+                                   URI.create(cz.cvut.kbss.termit.util.Vocabulary.s_p_je_pojmem_ze_slovniku))
+                     .setParameter("vocabulary", vocabularyUri)
+                     .setParameter("labelProp", URI.create(SKOS.PREF_LABEL))
+                     .getResultList();
+        } catch (RuntimeException e) {
+            throw new PersistenceException(e);
+        }
+    }
 }