diff --git a/src/main/java/eu/dissco/core/translator/terms/BaseDigitalObjectDirector.java b/src/main/java/eu/dissco/core/translator/terms/BaseDigitalObjectDirector.java index e7a0c7a..117cf8b 100644 --- a/src/main/java/eu/dissco/core/translator/terms/BaseDigitalObjectDirector.java +++ b/src/main/java/eu/dissco/core/translator/terms/BaseDigitalObjectDirector.java @@ -16,6 +16,7 @@ import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_PERSON; import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent; import static eu.dissco.core.translator.terms.utils.EntityRelationshipUtils.addEntityRelationship; +import static eu.dissco.core.translator.terms.utils.IdentifierUtils.addIdentifier; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -389,11 +390,7 @@ private List assembleIdentifiers(Js var identifiers = new ArrayList(); for (String identifierTerm : identifierTerms) { if (data.get(identifierTerm) != null) { - var identifier = new Identifier() - .withId(data.get(identifierTerm).asText()) - .withType("ods:Identifier") - .withDctermsTitle(identifierTerm) - .withDctermsIdentifier(data.get(identifierTerm).asText()); + var identifier = addIdentifier(data.get(identifierTerm).asText(), identifierTerm); identifiers.add(identifier); } } diff --git a/src/main/java/eu/dissco/core/translator/terms/utils/AgentsUtils.java b/src/main/java/eu/dissco/core/translator/terms/utils/AgentsUtils.java index 44b6081..01d08e4 100644 --- a/src/main/java/eu/dissco/core/translator/terms/utils/AgentsUtils.java +++ b/src/main/java/eu/dissco/core/translator/terms/utils/AgentsUtils.java @@ -1,8 +1,12 @@ package eu.dissco.core.translator.terms.utils; +import static eu.dissco.core.translator.terms.utils.IdentifierUtils.addIdentifier; + import eu.dissco.core.translator.domain.AgentRoleType; import eu.dissco.core.translator.schema.Agent; import eu.dissco.core.translator.schema.Agent.Type; +import eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus; +import eu.dissco.core.translator.schema.OdsHasRole; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -17,6 +21,11 @@ private AgentsUtils() { public static List addAgent(List currentAgents, String agentValue, String agentId, AgentRoleType role, Type type) { + return addAgent(currentAgents, agentValue, agentId, role, type, null); + } + + public static List addAgent(List currentAgents, String agentValue, String agentId, + AgentRoleType role, Type type, OdsIdentifierStatus identifierStatus) { var agents = new ArrayList(); if (currentAgents != null) { agents.addAll(currentAgents); @@ -24,16 +33,16 @@ public static List addAgent(List currentAgents, String agentValue, if (agentValue != null || agentId != null) { if ((agentValue != null && (agentValue.contains("&") || agentValue.contains("|"))) || ( agentId != null && (agentId.contains("&") || agentId.contains("|")))) { - handleMultipleAgents(agents, agentValue, agentId, role, type); + handleMultipleAgents(agents, agentValue, agentId, role, type, identifierStatus); } else { - constructAgent(agents, agentValue, agentId, role, type); + constructAgent(agents, agentValue, agentId, role, type, identifierStatus); } } return agents; } private static void constructAgent(List agents, String agentValue, String agentId, - AgentRoleType role, Type type) { + AgentRoleType role, Type type, OdsIdentifierStatus identifierStatus) { String agentName = agentValue; if (agentValue != null && agentValue.contains("http") && agentId == null) { agentId = agentValue; @@ -50,19 +59,17 @@ private static void constructAgent(List agents, String agentValue, String .withSchemaName(agentName) .withSchemaIdentifier(agentId) .withOdsHasRoles( - List.of(new eu.dissco.core.translator.schema.OdsHasRole().withType("schema:Role") + List.of(new OdsHasRole().withType("schema:Role") .withSchemaRoleName(role.getName()))); if (agentId != null) { - agent.withOdsHasIdentifiers(List.of( - new eu.dissco.core.translator.schema.Identifier().withId(agentId) - .withType("ods:Identifier") - .withDctermsIdentifier(agentId))); + agent.withOdsHasIdentifiers(List.of(addIdentifier(agentId, null, identifierStatus))); } agents.add(agent); } private static void handleMultipleAgents( - List agents, String agentValue, String agentId, AgentRoleType role, Type type) { + List agents, String agentValue, String agentId, AgentRoleType role, Type type, + OdsIdentifierStatus identifierStatus) { var ids = new String[0]; var agentValues = new String[0]; if (needsParsing(agentValue)) { @@ -74,7 +81,7 @@ private static void handleMultipleAgents( } if (agentValues.length == ids.length) { for (int i = 0; i < agentValues.length; i++) { - constructAgent(agents, agentValues[i], ids[i], role, type); + constructAgent(agents, agentValues[i], ids[i], role, type, identifierStatus); } } else if (agentValues.length > ids.length) { if (ids.length != 0) { @@ -83,7 +90,7 @@ private static void handleMultipleAgents( agentValue, agentId); } for (String agent : agentValues) { - constructAgent(agents, agent, null, role, type); + constructAgent(agents, agent, null, role, type, identifierStatus); } } else { if (agentValues.length != 0) { @@ -92,7 +99,7 @@ private static void handleMultipleAgents( agentValue, agentId); } for (String idValue : ids) { - constructAgent(agents, null, idValue, role, type); + constructAgent(agents, null, idValue, role, type, identifierStatus); } } } diff --git a/src/main/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtils.java b/src/main/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtils.java index 0cb5d9a..8ad9a4b 100644 --- a/src/main/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtils.java +++ b/src/main/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtils.java @@ -2,6 +2,7 @@ import static eu.dissco.core.translator.domain.AgentRoleType.DATA_TRANSLATOR; import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_SOFTWARE_APPLICATION; +import static eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus.PREFERRED; import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent; import eu.dissco.core.translator.domain.RelationshipType; @@ -33,7 +34,7 @@ public static EntityRelationship addEntityRelationship( .withDwcRelationshipEstablishedDate(Date.from(Instant.now())); entityRelationship.setOdsHasAgents( addAgent(entityRelationship.getOdsHasAgents(), agentName, agentId, DATA_TRANSLATOR, - SCHEMA_SOFTWARE_APPLICATION)); + SCHEMA_SOFTWARE_APPLICATION, PREFERRED)); if (relatedResource.startsWith("http")) { try { entityRelationship.setOdsRelatedResourceURI(new URI(relatedResource)); diff --git a/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java new file mode 100644 index 0000000..f69489b --- /dev/null +++ b/src/main/java/eu/dissco/core/translator/terms/utils/IdentifierUtils.java @@ -0,0 +1,95 @@ +package eu.dissco.core.translator.terms.utils; + +import static eu.dissco.core.translator.schema.Identifier.DctermsType.*; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT; +import static java.util.regex.Pattern.compile; + +import eu.dissco.core.translator.schema.Identifier; +import eu.dissco.core.translator.schema.Identifier.DctermsType; +import eu.dissco.core.translator.schema.Identifier.OdsGupriLevel; +import eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.tuple.Triple; + +@Slf4j +public class IdentifierUtils { + + private static final Map, Triple> PATTERN_MAP = patternMap(); + + private IdentifierUtils() { + // This is a Utility class + } + + private static Map, Triple> patternMap() { + var linkedMap = new LinkedHashMap, Triple>(); + linkedMap.put(List.of(compile("^https?://doi.org")), + Triple.of(DOI, "DOI", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT)); + linkedMap.put(List.of(compile("^https?://hdl.handle.net")), + Triple.of(HANDLE, "Handle", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT)); + linkedMap.put(List.of(compile("^https?://www.wikidata.org")), + Triple.of(URL, "Wikidata", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of(compile("^https?://orcid.org")), + Triple.of(URL, "ORCID", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of(compile("^https?://\\w+.\\w+/ark:/\\w+/\\w+")), + Triple.of(ARK, "ARK", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of(compile("https?://purl.org")), + Triple.of(PURL, "PURL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of(compile("^https?")), + Triple.of(URL, "URL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE)); + linkedMap.put(List.of(compile( + "(uuid:)*[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}")), + Triple.of(UUID, "UUID", GLOBALLY_UNIQUE_STABLE)); + return linkedMap; + } + + public static Identifier addIdentifier(String identifierString) { + return addIdentifier(identifierString, null, null); + } + + public static Identifier addIdentifier(String identifierString, String identifierName) { + return addIdentifier(identifierString, identifierName, null); + } + + public static Identifier addIdentifier(String identifierString, String identifierName, + OdsIdentifierStatus identifierStatus) { + if (identifierString == null) { + return null; + } + var identifier = new Identifier() + .withId(identifierString) + .withType("ods:Identifier") + .withDctermsIdentifier(identifierString) + .withOdsIdentifierStatus(identifierStatus); + for (var entry : PATTERN_MAP.entrySet()) { + for (var prefix : entry.getKey()) { + if (prefix.matcher(identifierString).find()) { + identifier.setDctermsType(entry.getValue().getLeft()); + identifier.setDctermsTitle(getDcTermsTitle(identifierName, entry.getValue().getMiddle())); + identifier.setOdsGupriLevel(entry.getValue().getRight()); + return identifier; + } + } + } + log.debug( + "Unable to recognise the type of identifier: {}. Assuming locally unique identifier", + identifierString); + identifier.setDctermsType(DctermsType.LOCALLY_UNIQUE_IDENTIFIER); + identifier.setDctermsTitle(identifierName); + identifier.setOdsGupriLevel(OdsGupriLevel.LOCALLY_UNIQUE_STABLE); + return identifier; + } + + private static String getDcTermsTitle(String identifierName, String defaultValue) { + if (identifierName != null) { + return identifierName; + } else { + return defaultValue; + } + } +} diff --git a/src/test/java/eu/dissco/core/translator/terms/utils/AgentUtilsTest.java b/src/test/java/eu/dissco/core/translator/terms/utils/AgentUtilsTest.java index 63a21fb..f6acb35 100644 --- a/src/test/java/eu/dissco/core/translator/terms/utils/AgentUtilsTest.java +++ b/src/test/java/eu/dissco/core/translator/terms/utils/AgentUtilsTest.java @@ -72,8 +72,7 @@ private static Agent createAgent(String name, String id) { .withSchemaRoleName(CREATOR.getName()))); if (id != null) { agent.withOdsHasIdentifiers( - List.of(new eu.dissco.core.translator.schema.Identifier().withType("ods:Identifier") - .withId(id).withDctermsIdentifier(id))); + List.of(IdentifierUtils.addIdentifier(id))); } return agent; } diff --git a/src/test/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtilsTest.java b/src/test/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtilsTest.java index c5a2c59..1fec948 100644 --- a/src/test/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtilsTest.java +++ b/src/test/java/eu/dissco/core/translator/terms/utils/EntityRelationshipUtilsTest.java @@ -4,7 +4,7 @@ import static eu.dissco.core.translator.domain.RelationshipType.HAS_FDO_TYPE; import static eu.dissco.core.translator.domain.RelationshipType.HAS_ORGANISATION_ID; import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_SOFTWARE_APPLICATION; -import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent; +import static eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus.PREFERRED; import static org.assertj.core.api.Assertions.assertThat; import eu.dissco.core.translator.domain.RelationshipType; @@ -74,8 +74,8 @@ private EntityRelationship createEntityRelationship(RelationshipType relationshi .withDwcRelatedResourceID(relatedResource) .withOdsRelatedResourceURI(relatedResourceURI) .withDwcRelationshipEstablishedDate(Date.from(Instant.now())) - .withOdsHasAgents(addAgent(List.of(), APP_NAME, APP_PID, - DATA_TRANSLATOR, SCHEMA_SOFTWARE_APPLICATION)); + .withOdsHasAgents(AgentsUtils.addAgent(List.of(), APP_NAME, APP_PID, + DATA_TRANSLATOR, SCHEMA_SOFTWARE_APPLICATION, PREFERRED)); } } diff --git a/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java b/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java new file mode 100644 index 0000000..ce4b3f8 --- /dev/null +++ b/src/test/java/eu/dissco/core/translator/terms/utils/IdentifierUtilsTest.java @@ -0,0 +1,84 @@ +package eu.dissco.core.translator.terms.utils; + +import static eu.dissco.core.translator.schema.Identifier.DctermsType.ARK; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.HANDLE; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.LOCALLY_UNIQUE_IDENTIFIER; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.PURL; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.URL; +import static eu.dissco.core.translator.schema.Identifier.DctermsType.UUID; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT; +import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.LOCALLY_UNIQUE_STABLE; +import static eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus.PREFERRED; +import static eu.dissco.core.translator.terms.utils.IdentifierUtils.addIdentifier; +import static org.assertj.core.api.Assertions.assertThat; + +import eu.dissco.core.translator.schema.Identifier; +import eu.dissco.core.translator.schema.Identifier.DctermsType; +import eu.dissco.core.translator.schema.Identifier.OdsGupriLevel; +import eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus; +import java.util.stream.Stream; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.junit.jupiter.MockitoExtension; + +@ExtendWith(MockitoExtension.class) +class IdentifierUtilsTest { + + + public static Stream identifierProvider() { + return Stream.of( + Arguments.of("https://www.wikidata.org/wiki/Q66581882", null, null, + createIdentifier("https://www.wikidata.org/wiki/Q66581882", URL, "Wikidata", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)), + Arguments.of("https://hdl.handle.net/XXX-XXX-XXX", null, PREFERRED, + createIdentifier("https://hdl.handle.net/XXX-XXX-XXX", HANDLE, "Handle", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT, PREFERRED)), + Arguments.of("88e320d5-c47a-4288-b265-fa3c93c57440", "dwc:catalogueNumber", null, + createIdentifier("88e320d5-c47a-4288-b265-fa3c93c57440", UUID, "dwc:catalogueNumber", + GLOBALLY_UNIQUE_STABLE, null)), + Arguments.of("urn:uuid:541fd754-17e8-43c8-ba4e-b413a1bf3a2f", "dwca:ID", null, + createIdentifier("urn:uuid:541fd754-17e8-43c8-ba4e-b413a1bf3a2f", UUID, "dwca:ID", + GLOBALLY_UNIQUE_STABLE, null)), + Arguments.of("https://geocollections.info/specimen/126758", "abcd:unitGUID", null, + createIdentifier("https://geocollections.info/specimen/126758", URL, "abcd:unitGUID", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)), + Arguments.of("http://n2t.net/ark:/65665/3173bef93-f5c6-4534-bd31-42289606938b", "dwc:catalogueNumber", null, + createIdentifier("http://n2t.net/ark:/65665/3173bef93-f5c6-4534-bd31-42289606938b", ARK, "dwc:catalogueNumber", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)), + Arguments.of("http://purl.org/dc/terms/accessRights", null, null, + createIdentifier("http://purl.org/dc/terms/accessRights", PURL, "PURL", + GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)), + Arguments.of("AVES-071259", "dwc:occurrenceID", null, + createIdentifier("AVES-071259", LOCALLY_UNIQUE_IDENTIFIER, "dwc:occurrenceID", + LOCALLY_UNIQUE_STABLE, null)), + Arguments.of(null, null, null, null) + ); + } + + private static Identifier createIdentifier(String id, DctermsType type, String title, + OdsGupriLevel gupriLevel, OdsIdentifierStatus status) { + return new Identifier() + .withId(id) + .withType("ods:Identifier") + .withDctermsIdentifier(id) + .withDctermsTitle(title) + .withDctermsType(type) + .withOdsGupriLevel(gupriLevel) + .withOdsIdentifierStatus(status); + } + + @ParameterizedTest + @MethodSource("identifierProvider") + void testAddIdentifier(String identifier, String identifierName, OdsIdentifierStatus status, + Identifier expected) { + // When + var result = addIdentifier(identifier, identifierName, status); + + // Then + assertThat(result).isEqualTo(expected); + } +}