-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #50 from DiSSCo/feature/pgcopy-temp-tables
Copy batches into db
- Loading branch information
Showing
9 changed files
with
326 additions
and
42 deletions.
There are no files selected for viewing
26 changes: 26 additions & 0 deletions
26
src/main/java/eu/dissco/core/translator/configuration/BatchInserterConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package eu.dissco.core.translator.configuration; | ||
|
||
import java.sql.DriverManager; | ||
import java.sql.SQLException; | ||
import lombok.RequiredArgsConstructor; | ||
import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties; | ||
import org.springframework.context.annotation.Configuration; | ||
import org.postgresql.copy.CopyManager; | ||
import org.postgresql.core.BaseConnection; | ||
import org.springframework.context.annotation.Bean; | ||
|
||
|
||
@Configuration | ||
@RequiredArgsConstructor | ||
public class BatchInserterConfig { | ||
|
||
private final DataSourceProperties properties; | ||
|
||
@Bean | ||
public CopyManager copyManager() throws SQLException { | ||
var connection = DriverManager.getConnection(properties.getUrl(), properties.getUsername(), | ||
properties.getPassword()); | ||
return new CopyManager((BaseConnection) connection); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
src/main/java/eu/dissco/core/translator/repository/BatchInserter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
package eu.dissco.core.translator.repository; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import eu.dissco.core.translator.exception.DisscoRepositoryException; | ||
import java.io.ByteArrayInputStream; | ||
import java.io.ByteArrayOutputStream; | ||
import java.io.IOException; | ||
import java.nio.charset.StandardCharsets; | ||
import java.sql.SQLException; | ||
import java.util.List; | ||
import lombok.RequiredArgsConstructor; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.postgresql.copy.CopyManager; | ||
import org.springframework.stereotype.Component; | ||
|
||
@Component | ||
@RequiredArgsConstructor | ||
@Slf4j | ||
public class BatchInserter { | ||
|
||
private final CopyManager copyManager; | ||
|
||
public void batchCopy(String tableName, List<Pair<String, JsonNode>> dbRecords) | ||
throws DisscoRepositoryException { | ||
try (var outputStream = new ByteArrayOutputStream()) { | ||
for (var dbRecord : dbRecords) { | ||
outputStream.write(getCsvRow(dbRecord)); | ||
} | ||
var inputStream = new ByteArrayInputStream(outputStream.toByteArray()); | ||
copyManager.copyIn("COPY " + tableName | ||
+ " FROM stdin DELIMITER ','", inputStream); | ||
} catch (IOException | SQLException e) { | ||
throw new DisscoRepositoryException( | ||
String.format("An error has occurred inserting %d records into temp table %s", | ||
dbRecords.size(), tableName), e); | ||
} | ||
} | ||
|
||
private static byte[] getCsvRow(Pair<String, JsonNode> dbRecord) { | ||
return (dbRecord.getLeft() + "," + | ||
cleanString(dbRecord.getRight()) | ||
+ "\n").getBytes(StandardCharsets.UTF_8); | ||
} | ||
|
||
private static String cleanString(JsonNode jsonNode) { | ||
if (jsonNode.isEmpty()) { | ||
return "{}"; | ||
} | ||
var node = jsonNode.toString(); | ||
node = node.replace("\\u0000", ""); | ||
node = node.replace("\\", "\\\\"); | ||
node = node.replace(",", "\\,"); | ||
return node; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
145 changes: 145 additions & 0 deletions
145
src/test/java/eu/dissco/core/translator/repository/BatchInserterTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
package eu.dissco.core.translator.repository; | ||
|
||
import static eu.dissco.core.translator.TestUtils.MAPPER; | ||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import java.sql.DriverManager; | ||
import java.sql.SQLException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.stream.Stream; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.jooq.Field; | ||
import org.jooq.JSONB; | ||
import org.jooq.Record; | ||
import org.jooq.Table; | ||
import org.jooq.impl.DSL; | ||
import org.jooq.impl.SQLDataType; | ||
import org.junit.jupiter.api.AfterEach; | ||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.Arguments; | ||
import org.junit.jupiter.params.provider.MethodSource; | ||
import org.postgresql.copy.CopyManager; | ||
import org.postgresql.core.BaseConnection; | ||
import org.testcontainers.shaded.org.yaml.snakeyaml.events.Event.ID; | ||
|
||
class BatchInserterTest extends BaseRepositoryIT { | ||
|
||
private BatchInserter batchInserter; | ||
private static final String TABLE_NAME = "xxx_xxx_xxx_core"; | ||
private final Field<String> ID_FIELD = DSL.field("dwcaid", String.class); | ||
private static final Field<JSONB> DATA_FIELD = DSL.field("data", JSONB.class); | ||
private static final String RECORD_ID = "11a8a4c6-3188-4305-9688-d68942f4038e"; | ||
private static final String RECORD_ID_ALT = "32546f7b-f62a-4368-8c60-922f1cba4ab8"; | ||
@BeforeEach | ||
void setup() throws SQLException { | ||
var connection = DriverManager.getConnection(dataSource.getJdbcUrl(), dataSource.getUsername(), | ||
dataSource.getPassword()); | ||
var copyManager = new CopyManager((BaseConnection) connection); | ||
batchInserter = new BatchInserter(copyManager); | ||
context.createTable(TABLE_NAME) | ||
.column(ID_FIELD, SQLDataType.VARCHAR) | ||
.column(DATA_FIELD, SQLDataType.JSONB) | ||
.execute(); | ||
context.createIndex().on(TABLE_NAME, ID_FIELD.getName()).execute(); | ||
} | ||
|
||
@AfterEach | ||
void destroy(){ | ||
context.dropTableIfExists(getTable(TABLE_NAME)).execute(); | ||
} | ||
|
||
@Test | ||
void testBatchInsert() throws Exception { | ||
// Given | ||
var records = givenCoreRecords(); | ||
var idField = context.meta().getTables(TABLE_NAME).get(0).field(ID_FIELD); | ||
|
||
// When | ||
batchInserter.batchCopy(TABLE_NAME, records); | ||
var result = context.select(getTable(TABLE_NAME).asterisk()) | ||
.from(getTable(TABLE_NAME)) | ||
.where(idField.eq(RECORD_ID)) | ||
.fetchOne(); | ||
|
||
// Then | ||
assertThat(MAPPER.readTree(result.get(DATA_FIELD).data())).isEqualTo(givenJsonNode()); | ||
} | ||
|
||
@ParameterizedTest | ||
@MethodSource("badStrings") | ||
void testBadCharacters(String badString) throws Exception { | ||
// Given | ||
var node = MAPPER.createObjectNode(); | ||
node.put("field", badString); | ||
var pair = List.of(Pair.of(RECORD_ID, (JsonNode) node)); | ||
var idField = context.meta().getTables(TABLE_NAME).get(0).field(ID_FIELD); | ||
|
||
// When | ||
batchInserter.batchCopy(TABLE_NAME, pair); | ||
var result = context.select(getTable(TABLE_NAME).asterisk()) | ||
.from(getTable(TABLE_NAME)) | ||
.where(idField.eq(RECORD_ID)) | ||
.fetchOne(); | ||
|
||
// Then | ||
assertThat(MAPPER.readTree(result.get(DATA_FIELD).data())).isEqualTo(node); | ||
} | ||
|
||
@Test | ||
void testBadCharacters() throws Exception { | ||
// Given | ||
var node = MAPPER.createObjectNode(); | ||
node.put("field", "\u0000"); | ||
var pair = List.of(Pair.of(RECORD_ID, (JsonNode) node)); | ||
var expected = MAPPER.readTree(""" | ||
{ | ||
"field":"" | ||
} | ||
"""); | ||
var idField = context.meta().getTables(TABLE_NAME).get(0).field(ID_FIELD); | ||
|
||
// When | ||
batchInserter.batchCopy(TABLE_NAME, pair); | ||
var result = context.select(getTable(TABLE_NAME).asterisk()) | ||
.from(getTable(TABLE_NAME)) | ||
.where(idField.eq(RECORD_ID)) | ||
.fetchOne(); | ||
|
||
// Then | ||
assertThat(MAPPER.readTree(result.get(DATA_FIELD).data())).isEqualTo(expected); | ||
} | ||
|
||
private static Stream<Arguments> badStrings(){ | ||
return Stream.of( | ||
Arguments.of("bad \b string"), | ||
Arguments.of("bad \f string"), | ||
Arguments.of("bad \n string"), | ||
Arguments.of("bad \r string"), | ||
Arguments.of("bad \t string"), | ||
Arguments.of("bad, string"), | ||
Arguments.of("bad \\N string") | ||
); | ||
} | ||
|
||
private List<Pair<String, JsonNode>> givenCoreRecords() { | ||
var records = new ArrayList<Pair<String, JsonNode>>(); | ||
records.add(Pair.of(RECORD_ID, givenJsonNode())); | ||
records.add(Pair.of(RECORD_ID_ALT, MAPPER.createObjectNode())); | ||
return records; | ||
} | ||
|
||
private JsonNode givenJsonNode(){ | ||
var node = MAPPER.createObjectNode(); | ||
node.put("test", "test"); | ||
node.put("data", "value"); | ||
return node; | ||
} | ||
|
||
private Table<Record> getTable(String tableName) { | ||
return DSL.table("\"" + tableName + "\""); | ||
} | ||
} |
Oops, something went wrong.