From d7f0b58927c28f74040607a4d732c7e3f90c4f75 Mon Sep 17 00:00:00 2001 From: southeo Date: Tue, 5 Dec 2023 15:42:27 +0100 Subject: [PATCH 1/2] Copy batches into db --- .../configuration/BatchInserterConfig.java | 26 ++++ .../exception/DisscoRepositoryException.java | 4 - .../translator/repository/BatchInserter.java | 57 +++++++ .../translator/repository/DwcaRepository.java | 24 +-- .../core/translator/service/DwcaService.java | 17 +- .../repository/BaseRepositoryIT.java | 2 +- .../repository/BatchInserterTest.java | 145 ++++++++++++++++++ .../repository/DwcaRepositoryTest.java | 57 ++++++- .../translator/service/DwcaServiceTest.java | 21 ++- 9 files changed, 317 insertions(+), 36 deletions(-) create mode 100644 src/main/java/eu/dissco/core/translator/configuration/BatchInserterConfig.java create mode 100644 src/main/java/eu/dissco/core/translator/repository/BatchInserter.java create mode 100644 src/test/java/eu/dissco/core/translator/repository/BatchInserterTest.java diff --git a/src/main/java/eu/dissco/core/translator/configuration/BatchInserterConfig.java b/src/main/java/eu/dissco/core/translator/configuration/BatchInserterConfig.java new file mode 100644 index 0000000..a75951e --- /dev/null +++ b/src/main/java/eu/dissco/core/translator/configuration/BatchInserterConfig.java @@ -0,0 +1,26 @@ +package eu.dissco.core.translator.configuration; + +import java.sql.DriverManager; +import java.sql.SQLException; +import lombok.RequiredArgsConstructor; +import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties; +import org.springframework.context.annotation.Configuration; +import org.postgresql.copy.CopyManager; +import org.postgresql.core.BaseConnection; +import org.springframework.context.annotation.Bean; + + +@Configuration +@RequiredArgsConstructor +public class BatchInserterConfig { + + private final DataSourceProperties properties; + + @Bean + public CopyManager copyManager() throws SQLException { + var connection = DriverManager.getConnection(properties.getUrl(), properties.getUsername(), + properties.getPassword()); + return new CopyManager((BaseConnection) connection); + } + +} diff --git a/src/main/java/eu/dissco/core/translator/exception/DisscoRepositoryException.java b/src/main/java/eu/dissco/core/translator/exception/DisscoRepositoryException.java index 05bee47..2515daf 100644 --- a/src/main/java/eu/dissco/core/translator/exception/DisscoRepositoryException.java +++ b/src/main/java/eu/dissco/core/translator/exception/DisscoRepositoryException.java @@ -2,10 +2,6 @@ public class DisscoRepositoryException extends Exception { - public DisscoRepositoryException(String message) { - super(message); - } - public DisscoRepositoryException(String message, Throwable cause) { super(message, cause); } diff --git a/src/main/java/eu/dissco/core/translator/repository/BatchInserter.java b/src/main/java/eu/dissco/core/translator/repository/BatchInserter.java new file mode 100644 index 0000000..09db4d6 --- /dev/null +++ b/src/main/java/eu/dissco/core/translator/repository/BatchInserter.java @@ -0,0 +1,57 @@ +package eu.dissco.core.translator.repository; + +import com.fasterxml.jackson.databind.JsonNode; +import eu.dissco.core.translator.exception.DisscoRepositoryException; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.sql.SQLException; +import java.util.List; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.tuple.Pair; +import org.postgresql.copy.CopyManager; +import org.springframework.stereotype.Component; + +@Component +@RequiredArgsConstructor +@Slf4j +public class BatchInserter { + + private final CopyManager copyManager; + + public void batchCopy(String tableName, List> dbRecords) + throws DisscoRepositoryException { + try (var outputStream = new ByteArrayOutputStream()) { + for (var dbRecord : dbRecords) { + outputStream.write(getCsvRow(dbRecord)); + } + var inputStream = new ByteArrayInputStream(outputStream.toByteArray()); + copyManager.copyIn("COPY " + tableName + + " FROM stdin DELIMITER ','", inputStream); + } catch (IOException | SQLException e) { + throw new DisscoRepositoryException( + String.format("An error has occurred inserting %d records into temp table %s", + dbRecords.size(), tableName), e); + } + } + + private static byte[] getCsvRow(Pair dbRecord) { + return (dbRecord.getLeft() + "," + + cleanString(dbRecord.getRight()) + + "\n").getBytes(StandardCharsets.UTF_8); + } + + private static String cleanString(JsonNode jsonNode) { + if (jsonNode.isEmpty()) { + return "{}"; + } + var node = jsonNode.toString(); + node = node.replace("\\u0000", ""); + node = node.replace("\\", "\\\\"); + node = node.replace(",", "\\,"); + return node; + } + +} diff --git a/src/main/java/eu/dissco/core/translator/repository/DwcaRepository.java b/src/main/java/eu/dissco/core/translator/repository/DwcaRepository.java index 7eec52d..9e88e72 100644 --- a/src/main/java/eu/dissco/core/translator/repository/DwcaRepository.java +++ b/src/main/java/eu/dissco/core/translator/repository/DwcaRepository.java @@ -8,16 +8,15 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; +import eu.dissco.core.translator.exception.DisscoRepositoryException; import java.util.List; import java.util.Map; -import java.util.Objects; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.tuple.Pair; import org.jooq.DSLContext; import org.jooq.Field; import org.jooq.JSONB; -import org.jooq.Query; import org.jooq.Record; import org.jooq.Table; import org.jooq.impl.DSL; @@ -34,6 +33,7 @@ public class DwcaRepository { private final ObjectMapper mapper; private final DSLContext context; + private final BatchInserter batchInserter; public void createTable(String tableName) { context.createTable(tableName) @@ -43,26 +43,13 @@ public void createTable(String tableName) { context.createIndex().on(tableName, idField.getName()).execute(); } - private Table getTable(String tableName) { return DSL.table("\"" + tableName + "\""); } - public void postRecords(String tableName, List> dbRecords) { - var queries = dbRecords.stream().map(dbRecord -> recordToQuery(tableName, dbRecord)).filter( - Objects::nonNull).toList(); - context.batch(queries).execute(); - } - - private Query recordToQuery(String tableName, Pair dbRecord) { - try { - return context.insertInto(getTable(tableName)).set(idField, dbRecord.getLeft()) - .set(dataField, - JSONB.jsonb(mapper.writeValueAsString(dbRecord.getRight()).replace("\\u0000", ""))); - } catch (JsonProcessingException e) { - log.error("Unable to map JSON to JSONB, ignoring record: {}", dbRecord.getLeft(), e); - return null; - } + public void postRecords(String tableName, List> dbRecords) + throws DisscoRepositoryException { + batchInserter.batchCopy(tableName, dbRecords); } public Map getCoreRecords(List batch, String tableName) { @@ -89,6 +76,5 @@ public void deleteTable(String tableName) { context.dropTableIfExists(tableName).execute(); } - } diff --git a/src/main/java/eu/dissco/core/translator/service/DwcaService.java b/src/main/java/eu/dissco/core/translator/service/DwcaService.java index dff8dad..ee048f5 100644 --- a/src/main/java/eu/dissco/core/translator/service/DwcaService.java +++ b/src/main/java/eu/dissco/core/translator/service/DwcaService.java @@ -11,6 +11,7 @@ import eu.dissco.core.translator.domain.DigitalSpecimenWrapper; import eu.dissco.core.translator.domain.Enrichment; import eu.dissco.core.translator.exception.DiSSCoDataException; +import eu.dissco.core.translator.exception.DisscoRepositoryException; import eu.dissco.core.translator.exception.OrganisationException; import eu.dissco.core.translator.properties.DwcaProperties; import eu.dissco.core.translator.properties.EnrichmentProperties; @@ -101,6 +102,8 @@ public void retrieveData() { } catch (InterruptedException e) { log.error("Failed during downloading file due to interruption", e); Thread.currentThread().interrupt(); + } catch (DisscoRepositoryException e) { + log.error("Failed during batch copy into temp tables with exception", e); } finally { if (archive != null) { log.info("Cleaning up database tables"); @@ -329,14 +332,13 @@ private Collection> prepareChunks(List inputList, int chunk } - private List postArchiveToDatabase(Archive archive) { + private List postArchiveToDatabase(Archive archive) throws DisscoRepositoryException { var tableNames = generateTableNames(archive); createTempTables(tableNames); log.info("Created tables: {}", tableNames); var idList = postCore(archive.getCore()); postExtensions(archive.getExtensions(), idList); return idList; - } private void removeTempTables(Archive archive) { @@ -358,7 +360,8 @@ private List generateTableNames(Archive archive) { private String getTableName(ArchiveFile archiveFile) { var fullSourceSystemId = webClientProperties.getSourceSystemId(); var minifiedSourceSystemId = fullSourceSystemId.substring(fullSourceSystemId.indexOf('/') + 1); - return minifiedSourceSystemId + "_" + archiveFile.getRowType().prefixedName(); + minifiedSourceSystemId = minifiedSourceSystemId.replace("-", "_"); + return (minifiedSourceSystemId + "_" + archiveFile.getRowType().prefixedName()).toLowerCase().replace(":","_"); } private void createTempTables(List tableNames) { @@ -367,7 +370,7 @@ private void createTempTables(List tableNames) { } } - private ArrayList postCore(ArchiveFile core) { + private ArrayList postCore(ArchiveFile core) throws DisscoRepositoryException { var dbRecords = new ArrayList>(); var idList = new ArrayList(); for (var rec : core) { @@ -393,14 +396,14 @@ private ArrayList postCore(ArchiveFile core) { } private void postToDatabase(ArchiveFile archiveFile, - ArrayList> dbRecords) { + ArrayList> dbRecords) throws DisscoRepositoryException { log.info("Persisting {} records to database", dbRecords.size()); dwcaRepository.postRecords(getTableName(archiveFile), dbRecords); dbRecords.clear(); } - - private void postExtensions(Set extensions, List idsList) { + private void postExtensions(Set extensions, List idsList) + throws DisscoRepositoryException { var dbRecords = new ArrayList>(); for (var extension : extensions) { log.info("Processing records of extension: {}", extension.getRowType().toString()); diff --git a/src/test/java/eu/dissco/core/translator/repository/BaseRepositoryIT.java b/src/test/java/eu/dissco/core/translator/repository/BaseRepositoryIT.java index 92ba0f0..c594655 100644 --- a/src/test/java/eu/dissco/core/translator/repository/BaseRepositoryIT.java +++ b/src/test/java/eu/dissco/core/translator/repository/BaseRepositoryIT.java @@ -23,7 +23,7 @@ public class BaseRepositoryIT { @Container private static final PostgreSQLContainer CONTAINER = new PostgreSQLContainer<>(POSTGIS); protected DSLContext context; - private HikariDataSource dataSource; + protected HikariDataSource dataSource; @BeforeEach void prepareDatabase() { diff --git a/src/test/java/eu/dissco/core/translator/repository/BatchInserterTest.java b/src/test/java/eu/dissco/core/translator/repository/BatchInserterTest.java new file mode 100644 index 0000000..dfd88aa --- /dev/null +++ b/src/test/java/eu/dissco/core/translator/repository/BatchInserterTest.java @@ -0,0 +1,145 @@ +package eu.dissco.core.translator.repository; + +import static eu.dissco.core.translator.TestUtils.MAPPER; +import static org.assertj.core.api.Assertions.assertThat; + +import com.fasterxml.jackson.databind.JsonNode; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; +import org.apache.commons.lang3.tuple.Pair; +import org.jooq.Field; +import org.jooq.JSONB; +import org.jooq.Record; +import org.jooq.Table; +import org.jooq.impl.DSL; +import org.jooq.impl.SQLDataType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.postgresql.copy.CopyManager; +import org.postgresql.core.BaseConnection; +import org.testcontainers.shaded.org.yaml.snakeyaml.events.Event.ID; + +class BatchInserterTest extends BaseRepositoryIT { + + private BatchInserter batchInserter; + private static final String TABLE_NAME = "xxx_xxx_xxx_core"; + private final Field ID_FIELD = DSL.field("dwcaid", String.class); + private static final Field DATA_FIELD = DSL.field("data", JSONB.class); + private static final String RECORD_ID = "11a8a4c6-3188-4305-9688-d68942f4038e"; + private static final String RECORD_ID_ALT = "32546f7b-f62a-4368-8c60-922f1cba4ab8"; + @BeforeEach + void setup() throws SQLException { + var connection = DriverManager.getConnection(dataSource.getJdbcUrl(), dataSource.getUsername(), + dataSource.getPassword()); + var copyManager = new CopyManager((BaseConnection) connection); + batchInserter = new BatchInserter(copyManager); + context.createTable(TABLE_NAME) + .column(ID_FIELD, SQLDataType.VARCHAR) + .column(DATA_FIELD, SQLDataType.JSONB) + .execute(); + context.createIndex().on(TABLE_NAME, ID_FIELD.getName()).execute(); + } + + @AfterEach + void destroy(){ + context.dropTableIfExists(getTable(TABLE_NAME)).execute(); + } + + @Test + void testBatchInsert() throws Exception { + // Given + var records = givenCoreRecords(); + var idField = context.meta().getTables(TABLE_NAME).get(0).field(ID_FIELD); + + // When + batchInserter.batchCopy(TABLE_NAME, records); + var result = context.select(getTable(TABLE_NAME).asterisk()) + .from(getTable(TABLE_NAME)) + .where(idField.eq(RECORD_ID)) + .fetchOne(); + + // Then + assertThat(MAPPER.readTree(result.get(DATA_FIELD).data())).isEqualTo(givenJsonNode()); + } + + @ParameterizedTest + @MethodSource("badStrings") + void testBadCharacters(String badString) throws Exception { + // Given + var node = MAPPER.createObjectNode(); + node.put("field", badString); + var pair = List.of(Pair.of(RECORD_ID, (JsonNode) node)); + var idField = context.meta().getTables(TABLE_NAME).get(0).field(ID_FIELD); + + // When + batchInserter.batchCopy(TABLE_NAME, pair); + var result = context.select(getTable(TABLE_NAME).asterisk()) + .from(getTable(TABLE_NAME)) + .where(idField.eq(RECORD_ID)) + .fetchOne(); + + // Then + assertThat(MAPPER.readTree(result.get(DATA_FIELD).data())).isEqualTo(node); + } + + @Test + void testBadCharacters() throws Exception { + // Given + var node = MAPPER.createObjectNode(); + node.put("field", "\u0000"); + var pair = List.of(Pair.of(RECORD_ID, (JsonNode) node)); + var expected = MAPPER.readTree(""" + { + "field":"" + } + """); + var idField = context.meta().getTables(TABLE_NAME).get(0).field(ID_FIELD); + + // When + batchInserter.batchCopy(TABLE_NAME, pair); + var result = context.select(getTable(TABLE_NAME).asterisk()) + .from(getTable(TABLE_NAME)) + .where(idField.eq(RECORD_ID)) + .fetchOne(); + + // Then + assertThat(MAPPER.readTree(result.get(DATA_FIELD).data())).isEqualTo(expected); + } + + private static Stream badStrings(){ + return Stream.of( + Arguments.of("bad \b string"), + Arguments.of("bad \f string"), + Arguments.of("bad \n string"), + Arguments.of("bad \r string"), + Arguments.of("bad \t string"), + Arguments.of("bad, string"), + Arguments.of("bad \\N string") + ); + } + + private List> givenCoreRecords() { + var records = new ArrayList>(); + records.add(Pair.of(RECORD_ID, givenJsonNode())); + records.add(Pair.of(RECORD_ID_ALT, MAPPER.createObjectNode())); + return records; + } + + private JsonNode givenJsonNode(){ + var node = MAPPER.createObjectNode(); + node.put("test", "test"); + node.put("data", "value"); + return node; + } + + private Table getTable(String tableName) { + return DSL.table("\"" + tableName + "\""); + } +} diff --git a/src/test/java/eu/dissco/core/translator/repository/DwcaRepositoryTest.java b/src/test/java/eu/dissco/core/translator/repository/DwcaRepositoryTest.java index 26cdb76..530eb68 100644 --- a/src/test/java/eu/dissco/core/translator/repository/DwcaRepositoryTest.java +++ b/src/test/java/eu/dissco/core/translator/repository/DwcaRepositoryTest.java @@ -5,7 +5,9 @@ import static java.util.stream.Collectors.mapping; import static java.util.stream.Collectors.toList; import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.BDDMockito.then; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import java.util.ArrayList; import java.util.List; @@ -13,12 +15,27 @@ import java.util.UUID; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; +import org.jooq.Field; +import org.jooq.JSONB; +import org.jooq.Query; +import org.jooq.Record; +import org.jooq.Table; +import org.jooq.impl.DSL; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +@ExtendWith(MockitoExtension.class) class DwcaRepositoryTest extends BaseRepositoryIT { private DwcaRepository repository; + @Mock + BatchInserter batchInserter; + + private final Field idField = DSL.field("dwcaid", String.class); + private final Field dataField = DSL.field("data", JSONB.class); private static JsonNode givenRecord(String corruptedValue) { var objectNode = MAPPER.createObjectNode(); @@ -29,7 +46,7 @@ private static JsonNode givenRecord(String corruptedValue) { @BeforeEach void setup() { - repository = new DwcaRepository(MAPPER, context); + repository = new DwcaRepository(MAPPER, context, batchInserter); } @Test @@ -38,7 +55,7 @@ void getCoreRecords() { var tableName = "XXX-XXX-XXX_Core"; var records = givenCoreRecords(); repository.createTable(tableName); - repository.postRecords(tableName, records); + postRecords(tableName, records); // When var results = repository.getCoreRecords(records.stream().map(Pair::getLeft).toList(), @@ -57,7 +74,7 @@ void getCorruptCoreRecords() { var records = List.of( Pair.of(UUID.randomUUID().toString(), givenRecord("\u0000 someCorruptedInformation"))); repository.createTable(tableName); - repository.postRecords(tableName, records); + postRecords(tableName, records); // When var results = repository.getCoreRecords(records.stream().map(Pair::getLeft).toList(), @@ -75,7 +92,7 @@ void getCoreExtensionRecords() { var tableName = "XXX-XXX-XXX_Extension"; var records = givenExtensionRecord(); repository.createTable(tableName); - repository.postRecords(tableName, records); + postRecords(tableName, records); // When var results = repository.getRecords(records.stream().map(Pair::getLeft).toList(), tableName); @@ -86,6 +103,19 @@ void getCoreExtensionRecords() { records.stream().collect(groupingBy(Pair::getLeft, mapping(Pair::getRight, toList())))); } + @Test + void testPostRecords() throws Exception { + // Given + var tableName = "XXX-XXX-XXX_Extension"; + var records = givenExtensionRecord(); + + // When + repository.postRecords(tableName, records); + + // Then + then(batchInserter).should().batchCopy(tableName, records); + } + private ArrayList> givenExtensionRecord() { var records = new ArrayList>(); for (int i = 0; i < 10; i++) { @@ -113,4 +143,23 @@ private List> givenCoreRecords() { return records; } + private void postRecords(String tableName, List> dbRecords) { + var queries = dbRecords.stream().map(dbRecord -> recordToQuery(tableName, dbRecord)).toList(); + context.batch(queries).execute(); + } + + private Query recordToQuery(String tableName, Pair dbRecord) { + try { + return context.insertInto(getTable(tableName)).set(idField, dbRecord.getLeft()) + .set(dataField, + JSONB.jsonb(MAPPER.writeValueAsString(dbRecord.getRight()).replace("\\u0000", ""))); + } catch (JsonProcessingException e) { + return null; + } + } + + private Table getTable(String tableName) { + return DSL.table("\"" + tableName + "\""); + } + } diff --git a/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java b/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java index d12cf70..4976cd9 100644 --- a/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java +++ b/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java @@ -11,12 +11,14 @@ import static org.mockito.ArgumentMatchers.eq; import static org.mockito.BDDMockito.given; import static org.mockito.BDDMockito.then; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.times; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import eu.dissco.core.translator.TestUtils; import eu.dissco.core.translator.domain.DigitalSpecimenEvent; +import eu.dissco.core.translator.exception.DisscoRepositoryException; import eu.dissco.core.translator.properties.DwcaProperties; import eu.dissco.core.translator.properties.EnrichmentProperties; import eu.dissco.core.translator.properties.FdoProperties; @@ -125,6 +127,23 @@ void testRetrieveData() throws Exception { cleanup("src/test/resources/dwca/test/dwca-rbins.zip"); } + @Test + void testCopyTableFails() throws Exception { + // Given + givenDWCA("/dwca-rbins.zip"); + doThrow(new DisscoRepositoryException("", new Exception())).when(dwcaRepository) + .postRecords(eq("ABC-DDD-ASD_dwc:Occurrence"), anyList()); + + // When + service.retrieveData(); + + // Then + then(dwcaRepository).should(times(2)).deleteTable(any()); + then(dwcaRepository).should(times(2)).createTable(any()); + then(dwcaRepository).shouldHaveNoMoreInteractions(); + then(kafkaService).shouldHaveNoInteractions(); + } + @Test void testRetrieveDataEmlException() throws Exception { // Given @@ -364,7 +383,7 @@ private void givenDWCA(String file) { } @Test - void testRetrieveDataNull() throws IOException { + void testRetrieveDataNull() throws Exception { // Given givenDWCA("/dwca-lux-associated-media.zip"); var nullMap = new HashMap(); From 918c90162dc2d174f9229ddb4a59db4a8f2a3a22 Mon Sep 17 00:00:00 2001 From: southeo Date: Tue, 5 Dec 2023 16:22:13 +0100 Subject: [PATCH 2/2] table names --- .../dissco/core/translator/service/DwcaService.java | 10 +++++++--- .../core/translator/service/DwcaServiceTest.java | 11 +++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/main/java/eu/dissco/core/translator/service/DwcaService.java b/src/main/java/eu/dissco/core/translator/service/DwcaService.java index ee048f5..9a56b6d 100644 --- a/src/main/java/eu/dissco/core/translator/service/DwcaService.java +++ b/src/main/java/eu/dissco/core/translator/service/DwcaService.java @@ -359,9 +359,13 @@ private List generateTableNames(Archive archive) { private String getTableName(ArchiveFile archiveFile) { var fullSourceSystemId = webClientProperties.getSourceSystemId(); - var minifiedSourceSystemId = fullSourceSystemId.substring(fullSourceSystemId.indexOf('/') + 1); - minifiedSourceSystemId = minifiedSourceSystemId.replace("-", "_"); - return (minifiedSourceSystemId + "_" + archiveFile.getRowType().prefixedName()).toLowerCase().replace(":","_"); + var minifiedSourceSystemId = fullSourceSystemId.substring(fullSourceSystemId.indexOf('/') + 1) + .replace("-", "_"); + var tableName = (minifiedSourceSystemId + "_" + archiveFile.getRowType() + .prefixedName()).toLowerCase() + .replace(":", "_"); + tableName = tableName.replace("/", "_"); + return tableName.replace(".", "_"); } private void createTempTables(List tableNames) { diff --git a/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java b/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java index 4976cd9..ee27014 100644 --- a/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java +++ b/src/test/java/eu/dissco/core/translator/service/DwcaServiceTest.java @@ -132,7 +132,7 @@ void testCopyTableFails() throws Exception { // Given givenDWCA("/dwca-rbins.zip"); doThrow(new DisscoRepositoryException("", new Exception())).when(dwcaRepository) - .postRecords(eq("ABC-DDD-ASD_dwc:Occurrence"), anyList()); + .postRecords(eq("abc_ddd_asd_dwc_occurrence"), anyList()); // When service.retrieveData(); @@ -229,9 +229,9 @@ void testRetrieveDataWithGbifMedia() throws Exception { // Given givenDWCA("/dwca-kew-gbif-media.zip"); given(dwcaRepository.getCoreRecords(anyList(), anyString())).willReturn(givenSpecimenMap(19)); - given(dwcaRepository.getRecords(anyList(), eq("ABC-DDD-ASD_dwc:Identification"))).willReturn( + given(dwcaRepository.getRecords(anyList(), eq("abc_ddd_asd_dwc_identification"))).willReturn( Map.of()); - given(dwcaRepository.getRecords(anyList(), eq("ABC-DDD-ASD_gbif:Multimedia"))).willReturn( + given(dwcaRepository.getRecords(anyList(), eq("abc_ddd_asd_gbif_multimedia"))).willReturn( givenImageMap(19)); given(digitalSpecimenDirector.assembleDigitalSpecimenTerm(any(JsonNode.class), anyBoolean())) .willReturn(givenDigitalSpecimen()); @@ -267,8 +267,7 @@ void testRetrieveDataWithAcMedia() throws Exception { // Given givenDWCA("/dwca-naturalis-ac-media.zip"); given(dwcaRepository.getCoreRecords(anyList(), anyString())).willReturn(givenSpecimenMap(14)); - given(dwcaRepository.getRecords(anyList(), - eq("ABC-DDD-ASD_http://rs.tdwg.org/ac/terms/Multimedia"))).willReturn(givenImageMap(14)); + given(dwcaRepository.getRecords(anyList(), eq("abc_ddd_asd_http___rs_tdwg_org_ac_terms_multimedia"))).willReturn(givenImageMap(14)); given(digitalSpecimenDirector.assembleDigitalSpecimenTerm(any(JsonNode.class), anyBoolean())) .willReturn(givenDigitalSpecimen()); given(digitalSpecimenDirector.assembleDigitalMediaObjects(anyBoolean(), any(JsonNode.class), @@ -293,7 +292,7 @@ void testRetrieveDataWithInvalidAcMedia() throws Exception { givenDWCA("/dwca-invalid-ac-media.zip"); given(dwcaRepository.getCoreRecords(anyList(), anyString())).willReturn(givenSpecimenMap(1)); given(dwcaRepository.getRecords(anyList(), - eq("ABC-DDD-ASD_http://rs.tdwg.org/ac/terms/Multimedia"))).willReturn(givenImageMap(1)); + eq("abc_ddd_asd_http___rs_tdwg_org_ac_terms_multimedia"))).willReturn(givenImageMap(1)); given(digitalSpecimenDirector.assembleDigitalSpecimenTerm(any(JsonNode.class), anyBoolean())) .willReturn(givenDigitalSpecimen()); given(digitalSpecimenDirector.assembleDigitalMediaObjects(anyBoolean(), any(JsonNode.class),