From 50f76e7edc3a009b77bc8c51b4e048b5e8b8ef03 Mon Sep 17 00:00:00 2001 From: Sam Leeflang Date: Wed, 14 Feb 2024 16:52:30 +0100 Subject: [PATCH] Make the queue configurable for kafka - Includes scientificNameHtmlLabel for identifications - Includes small update pom file - Includes ignore for new image issue --- .github/workflows/.trivyignore | 4 ++++ pom.xml | 7 +++---- .../core/translator/service/BioCaseService.java | 2 +- .../core/translator/service/DwcaService.java | 2 +- .../core/translator/service/KafkaService.java | 6 ++++-- .../resources/json-schema/identifications.json | 7 +++++++ .../translator/service/BioCaseServiceTest.java | 6 +++--- .../core/translator/service/DwcaServiceTest.java | 14 +++++++------- .../core/translator/service/KafkaServiceTest.java | 8 ++++++-- 9 files changed, 36 insertions(+), 20 deletions(-) diff --git a/.github/workflows/.trivyignore b/.github/workflows/.trivyignore index e69de29..e74d4f0 100644 --- a/.github/workflows/.trivyignore +++ b/.github/workflows/.trivyignore @@ -0,0 +1,4 @@ +# Date: Feb 12, 2024 +# Notes: Issue with libexpat, parsing large tokens can trigger a denial of service +# Needs to be fixed in Docker Image. +CVE-2023-52425 \ No newline at end of file diff --git a/pom.xml b/pom.xml index 8fc2f3e..b0fab03 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.springframework.boot spring-boot-starter-parent - 3.2.0 + 3.2.2 eu.dissco.core @@ -21,9 +21,8 @@ 3.1.0 2.1.2 4.0.1 - 1.4.12 5.2.0 - 1.19.0 + 1.19.5 dissco https://sonarcloud.io ../app-it/target/site/jacoco-aggregate/jacoco.xml @@ -88,7 +87,7 @@ dwca-io ${dwca-io.version} - + commons-beanutils commons-beanutils diff --git a/src/main/java/eu/dissco/core/translator/service/BioCaseService.java b/src/main/java/eu/dissco/core/translator/service/BioCaseService.java index 1a3a214..f9e7885 100644 --- a/src/main/java/eu/dissco/core/translator/service/BioCaseService.java +++ b/src/main/java/eu/dissco/core/translator/service/BioCaseService.java @@ -190,7 +190,7 @@ private void processUnit(DataSet dataset, Unit unit) attributes.getOdsNormalisedPhysicalSpecimenId(), unit, attributes.getDwcInstitutionId()); log.debug("Result digital Specimen: {}", digitalSpecimen); - kafkaService.sendMessage("digital-specimen", + kafkaService.sendMessage( new DigitalSpecimenEvent( enrichmentServices(false), digitalSpecimen, diff --git a/src/main/java/eu/dissco/core/translator/service/DwcaService.java b/src/main/java/eu/dissco/core/translator/service/DwcaService.java index 66425cb..6f00da6 100644 --- a/src/main/java/eu/dissco/core/translator/service/DwcaService.java +++ b/src/main/java/eu/dissco/core/translator/service/DwcaService.java @@ -159,7 +159,7 @@ private void processDigitalSpecimen(Collection fullRecords, log.debug("Digital Specimen: {}", digitalObjects); var translatorEvent = new DigitalSpecimenEvent(enrichmentServices(false), digitalObjects.getLeft(), digitalObjects.getRight()); - kafkaService.sendMessage("digital-specimen", translatorEvent); + kafkaService.sendMessage(translatorEvent); } catch (DiSSCoDataException e) { log.error("Encountered data issue with record: {}", fullRecord, e); } diff --git a/src/main/java/eu/dissco/core/translator/service/KafkaService.java b/src/main/java/eu/dissco/core/translator/service/KafkaService.java index 780e0a0..d5fb6b9 100644 --- a/src/main/java/eu/dissco/core/translator/service/KafkaService.java +++ b/src/main/java/eu/dissco/core/translator/service/KafkaService.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dissco.core.translator.domain.DigitalSpecimenEvent; +import eu.dissco.core.translator.properties.KafkaProperties; import java.util.concurrent.CompletableFuture; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -16,10 +17,11 @@ public class KafkaService { private final KafkaTemplate kafkaTemplate; + private final KafkaProperties properties; private final ObjectMapper mapper; - public void sendMessage(String topic, DigitalSpecimenEvent event) throws JsonProcessingException { - CompletableFuture> future = kafkaTemplate.send(topic, + public void sendMessage(DigitalSpecimenEvent event) throws JsonProcessingException { + CompletableFuture> future = kafkaTemplate.send(properties.getTopic(), mapper.writeValueAsString(event)); future.whenComplete((result, ex) -> { if (ex != null) { diff --git a/src/main/resources/json-schema/identifications.json b/src/main/resources/json-schema/identifications.json index 216d898..9221b0b 100644 --- a/src/main/resources/json-schema/identifications.json +++ b/src/main/resources/json-schema/identifications.json @@ -117,6 +117,13 @@ "Roptrocerus typographi (Györfi, 1952)" ] }, + "ods:scientificNameHtmlLabel": { + "type": "string", + "description": "A Hyper Text Markup Language (HTML) representation of the scientific name. Includes correct formatting of the name.", + "examples": [ + "Absidia ginsan Komin. et al., 1952" + ] + }, "dwc:scientificNameAuthorship": { "type": "string", "description": "https://rs.tdwg.org/dwc/terms/scientificNameAuthorship", diff --git a/src/test/java/eu/dissco/core/translator/service/BioCaseServiceTest.java b/src/test/java/eu/dissco/core/translator/service/BioCaseServiceTest.java index 4a8f12e..2e25b9a 100644 --- a/src/test/java/eu/dissco/core/translator/service/BioCaseServiceTest.java +++ b/src/test/java/eu/dissco/core/translator/service/BioCaseServiceTest.java @@ -98,7 +98,7 @@ void testRetrieveData206() throws Exception { // Then then(webClient).should(times(2)).get(); - then(kafkaService).should(times(99)).sendMessage(eq("digital-specimen"), any( + then(kafkaService).should(times(99)).sendMessage(any( DigitalSpecimenEvent.class)); } @@ -123,7 +123,7 @@ void testRetrieveDataWithMedia206() throws Exception { // Then then(webClient).should(times(1)).get(); - then(kafkaService).should(times(100)).sendMessage(eq("digital-specimen"), any( + then(kafkaService).should(times(100)).sendMessage( any( DigitalSpecimenEvent.class)); } @@ -147,7 +147,7 @@ void testRetrieveDataInvalidMedia() throws Exception { // Then var captor = ArgumentCaptor.forClass(DigitalSpecimenEvent.class); then(webClient).should(times(1)).get(); - then(kafkaService).should(times(1)).sendMessage(eq("digital-specimen"), captor.capture()); + then(kafkaService).should(times(1)).sendMessage(captor.capture()); assertThat(captor.getValue().digitalMediaObjectEvents()).isEmpty(); } diff --git a/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java b/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java index 14d61ee..e31c591 100644 --- a/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java +++ b/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java @@ -118,7 +118,7 @@ void testRetrieveData() throws Exception { // Then then(dwcaRepository).should(times(2)).createTable(anyString()); then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList()); - then(kafkaService).should(times(9)).sendMessage(eq("digital-specimen"), any( + then(kafkaService).should(times(9)).sendMessage(any( DigitalSpecimenEvent.class)); assertThat(captor.getValue().get("eml:license").asText()).isEqualTo( "http://creativecommons.org/licenses/by-nc/4.0/legalcode"); @@ -160,7 +160,7 @@ void testRetrieveDataEmlException() throws Exception { // Then then(dwcaRepository).should(times(2)).createTable(anyString()); then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList()); - then(kafkaService).should(times(9)).sendMessage(eq("digital-specimen"), any( + then(kafkaService).should(times(9)).sendMessage( any( DigitalSpecimenEvent.class)); assertThat(captor.getValue().get("eml:license")).isNull(); assertThat(captor.getValue().get("eml:title")).isNull(); @@ -183,7 +183,7 @@ void testRetrieveDataWithLicenseText() throws Exception { // Then then(dwcaRepository).should(times(2)).createTable(anyString()); then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList()); - then(kafkaService).should(times(9)).sendMessage(eq("digital-specimen"), any( + then(kafkaService).should(times(9)).sendMessage(any( DigitalSpecimenEvent.class)); assertThat(captor.getValue().get("eml:license").asText()).isEqualTo( "Creative Commons Attribution Non Commercial (CC-BY-NC) 4.0 License"); @@ -246,7 +246,7 @@ void testRetrieveDataWithGbifMedia() throws Exception { // Then then(dwcaRepository).should(times(3)).createTable(anyString()); then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList()); - then(kafkaService).should(times(19)).sendMessage(eq("digital-specimen"), any( + then(kafkaService).should(times(19)).sendMessage(any( DigitalSpecimenEvent.class)); cleanup("src/test/resources/dwca/test/dwca-kew-gbif-media.zip"); } @@ -282,7 +282,7 @@ void testRetrieveDataWithAcMedia() throws Exception { // Then then(dwcaRepository).should(times(2)).createTable(anyString()); then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList()); - then(kafkaService).should(times(14)).sendMessage(eq("digital-specimen"), any( + then(kafkaService).should(times(14)).sendMessage(any( DigitalSpecimenEvent.class)); cleanup("src/test/resources/dwca/test/dwca-naturalis-ac-media.zip"); } @@ -307,7 +307,7 @@ void testRetrieveDataWithInvalidAcMedia() throws Exception { var captor = ArgumentCaptor.forClass(DigitalSpecimenEvent.class); then(dwcaRepository).should(times(2)).createTable(anyString()); then(dwcaRepository).should(times(2)).postRecords(anyString(), anyList()); - then(kafkaService).should(times(1)).sendMessage(eq("digital-specimen"), captor.capture()); + then(kafkaService).should(times(1)).sendMessage(captor.capture()); assertThat(captor.getValue().digitalMediaObjectEvents()).isEmpty(); cleanup("src/test/resources/dwca/test/dwca-invalid-ac-media.zip"); } @@ -346,7 +346,7 @@ void testRetrieveDataWithAssociatedMedia() throws Exception { // Then then(dwcaRepository).should(times(1)).createTable(anyString()); then(dwcaRepository).should(times(1)).postRecords(anyString(), anyList()); - then(kafkaService).should(times(20)).sendMessage(eq("digital-specimen"), any( + then(kafkaService).should(times(20)).sendMessage(any( DigitalSpecimenEvent.class)); cleanup("src/test/resources/dwca/test/dwca-lux-associated-media.zip"); } diff --git a/src/test/java/eu/dissco/core/translator/service/KafkaServiceTest.java b/src/test/java/eu/dissco/core/translator/service/KafkaServiceTest.java index 8097dbf..5c5b4ad 100644 --- a/src/test/java/eu/dissco/core/translator/service/KafkaServiceTest.java +++ b/src/test/java/eu/dissco/core/translator/service/KafkaServiceTest.java @@ -13,6 +13,7 @@ import eu.dissco.core.translator.domain.DigitalMediaObjectEvent; import eu.dissco.core.translator.domain.DigitalSpecimenEvent; import eu.dissco.core.translator.domain.DigitalSpecimenWrapper; +import eu.dissco.core.translator.properties.KafkaProperties; import java.util.List; import java.util.concurrent.CompletableFuture; import org.junit.jupiter.api.BeforeEach; @@ -30,11 +31,13 @@ class KafkaServiceTest { private KafkaTemplate kafkaTemplate; @Mock private SendResult sendResult; + @Mock + private KafkaProperties properties; private KafkaService service; @BeforeEach void setup() { - this.service = new KafkaService(kafkaTemplate, MAPPER); + this.service = new KafkaService(kafkaTemplate, properties, MAPPER); } @Test @@ -42,10 +45,11 @@ void testSendMessage() throws JsonProcessingException { // Given var x = CompletableFuture.completedFuture(sendResult); given(kafkaTemplate.send(anyString(), anyString())).willReturn(x); + given(properties.getTopic()).willReturn("test-topic"); var digitalSpecimenEvent = givenDigitalSpecimenEvent(); // When - service.sendMessage("test-topic", digitalSpecimenEvent); + service.sendMessage(digitalSpecimenEvent); // Then then(kafkaTemplate).should()