Skip to content

Commit

Permalink
Merge pull request #71 from DiSSCo/feature/add-identifier-meta
Browse files Browse the repository at this point in the history
Feature/add identifier meta
  • Loading branch information
samleeflang authored Nov 20, 2024
2 parents e8ee889 + a4722f4 commit ea7b558
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_PERSON;
import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent;
import static eu.dissco.core.translator.terms.utils.EntityRelationshipUtils.addEntityRelationship;
import static eu.dissco.core.translator.terms.utils.IdentifierUtils.addIdentifier;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
Expand Down Expand Up @@ -389,11 +390,7 @@ private List<eu.dissco.core.translator.schema.Identifier> assembleIdentifiers(Js
var identifiers = new ArrayList<Identifier>();
for (String identifierTerm : identifierTerms) {
if (data.get(identifierTerm) != null) {
var identifier = new Identifier()
.withId(data.get(identifierTerm).asText())
.withType("ods:Identifier")
.withDctermsTitle(identifierTerm)
.withDctermsIdentifier(data.get(identifierTerm).asText());
var identifier = addIdentifier(data.get(identifierTerm).asText(), identifierTerm);
identifiers.add(identifier);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package eu.dissco.core.translator.terms.utils;

import static eu.dissco.core.translator.terms.utils.IdentifierUtils.addIdentifier;

import eu.dissco.core.translator.domain.AgentRoleType;
import eu.dissco.core.translator.schema.Agent;
import eu.dissco.core.translator.schema.Agent.Type;
import eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus;
import eu.dissco.core.translator.schema.OdsHasRole;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
Expand All @@ -17,23 +21,28 @@ private AgentsUtils() {

public static List<Agent> addAgent(List<Agent> currentAgents, String agentValue, String agentId,
AgentRoleType role, Type type) {
return addAgent(currentAgents, agentValue, agentId, role, type, null);
}

public static List<Agent> addAgent(List<Agent> currentAgents, String agentValue, String agentId,
AgentRoleType role, Type type, OdsIdentifierStatus identifierStatus) {
var agents = new ArrayList<Agent>();
if (currentAgents != null) {
agents.addAll(currentAgents);
}
if (agentValue != null || agentId != null) {
if ((agentValue != null && (agentValue.contains("&") || agentValue.contains("|"))) || (
agentId != null && (agentId.contains("&") || agentId.contains("|")))) {
handleMultipleAgents(agents, agentValue, agentId, role, type);
handleMultipleAgents(agents, agentValue, agentId, role, type, identifierStatus);
} else {
constructAgent(agents, agentValue, agentId, role, type);
constructAgent(agents, agentValue, agentId, role, type, identifierStatus);
}
}
return agents;
}

private static void constructAgent(List<Agent> agents, String agentValue, String agentId,
AgentRoleType role, Type type) {
AgentRoleType role, Type type, OdsIdentifierStatus identifierStatus) {
String agentName = agentValue;
if (agentValue != null && agentValue.contains("http") && agentId == null) {
agentId = agentValue;
Expand All @@ -50,19 +59,17 @@ private static void constructAgent(List<Agent> agents, String agentValue, String
.withSchemaName(agentName)
.withSchemaIdentifier(agentId)
.withOdsHasRoles(
List.of(new eu.dissco.core.translator.schema.OdsHasRole().withType("schema:Role")
List.of(new OdsHasRole().withType("schema:Role")
.withSchemaRoleName(role.getName())));
if (agentId != null) {
agent.withOdsHasIdentifiers(List.of(
new eu.dissco.core.translator.schema.Identifier().withId(agentId)
.withType("ods:Identifier")
.withDctermsIdentifier(agentId)));
agent.withOdsHasIdentifiers(List.of(addIdentifier(agentId, null, identifierStatus)));
}
agents.add(agent);
}

private static void handleMultipleAgents(
List<Agent> agents, String agentValue, String agentId, AgentRoleType role, Type type) {
List<Agent> agents, String agentValue, String agentId, AgentRoleType role, Type type,
OdsIdentifierStatus identifierStatus) {
var ids = new String[0];
var agentValues = new String[0];
if (needsParsing(agentValue)) {
Expand All @@ -74,7 +81,7 @@ private static void handleMultipleAgents(
}
if (agentValues.length == ids.length) {
for (int i = 0; i < agentValues.length; i++) {
constructAgent(agents, agentValues[i], ids[i], role, type);
constructAgent(agents, agentValues[i], ids[i], role, type, identifierStatus);
}
} else if (agentValues.length > ids.length) {
if (ids.length != 0) {
Expand All @@ -83,7 +90,7 @@ private static void handleMultipleAgents(
agentValue, agentId);
}
for (String agent : agentValues) {
constructAgent(agents, agent, null, role, type);
constructAgent(agents, agent, null, role, type, identifierStatus);
}
} else {
if (agentValues.length != 0) {
Expand All @@ -92,7 +99,7 @@ private static void handleMultipleAgents(
agentValue, agentId);
}
for (String idValue : ids) {
constructAgent(agents, null, idValue, role, type);
constructAgent(agents, null, idValue, role, type, identifierStatus);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static eu.dissco.core.translator.domain.AgentRoleType.DATA_TRANSLATOR;
import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_SOFTWARE_APPLICATION;
import static eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus.PREFERRED;
import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent;

import eu.dissco.core.translator.domain.RelationshipType;
Expand Down Expand Up @@ -33,7 +34,7 @@ public static EntityRelationship addEntityRelationship(
.withDwcRelationshipEstablishedDate(Date.from(Instant.now()));
entityRelationship.setOdsHasAgents(
addAgent(entityRelationship.getOdsHasAgents(), agentName, agentId, DATA_TRANSLATOR,
SCHEMA_SOFTWARE_APPLICATION));
SCHEMA_SOFTWARE_APPLICATION, PREFERRED));
if (relatedResource.startsWith("http")) {
try {
entityRelationship.setOdsRelatedResourceURI(new URI(relatedResource));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package eu.dissco.core.translator.terms.utils;

import static eu.dissco.core.translator.schema.Identifier.DctermsType.*;
import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE;
import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE;
import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT;
import static java.util.regex.Pattern.compile;

import eu.dissco.core.translator.schema.Identifier;
import eu.dissco.core.translator.schema.Identifier.DctermsType;
import eu.dissco.core.translator.schema.Identifier.OdsGupriLevel;
import eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.tuple.Triple;

@Slf4j
public class IdentifierUtils {

private static final Map<List<Pattern>, Triple<DctermsType, String, OdsGupriLevel>> PATTERN_MAP = patternMap();

private IdentifierUtils() {
// This is a Utility class
}

private static Map<List<Pattern>, Triple<DctermsType, String, OdsGupriLevel>> patternMap() {
var linkedMap = new LinkedHashMap<List<Pattern>, Triple<DctermsType, String, OdsGupriLevel>>();
linkedMap.put(List.of(compile("^https?://doi.org")),
Triple.of(DOI, "DOI", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT));
linkedMap.put(List.of(compile("^https?://hdl.handle.net")),
Triple.of(HANDLE, "Handle", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT));
linkedMap.put(List.of(compile("^https?://www.wikidata.org")),
Triple.of(URL, "Wikidata", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE));
linkedMap.put(List.of(compile("^https?://orcid.org")),
Triple.of(URL, "ORCID", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE));
linkedMap.put(List.of(compile("^https?://\\w+.\\w+/ark:/\\w+/\\w+")),
Triple.of(ARK, "ARK", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE));
linkedMap.put(List.of(compile("https?://purl.org")),
Triple.of(PURL, "PURL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE));
linkedMap.put(List.of(compile("^https?")),
Triple.of(URL, "URL", GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE));
linkedMap.put(List.of(compile(
"(uuid:)*[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}")),
Triple.of(UUID, "UUID", GLOBALLY_UNIQUE_STABLE));
return linkedMap;
}

public static Identifier addIdentifier(String identifierString) {
return addIdentifier(identifierString, null, null);
}

public static Identifier addIdentifier(String identifierString, String identifierName) {
return addIdentifier(identifierString, identifierName, null);
}

public static Identifier addIdentifier(String identifierString, String identifierName,
OdsIdentifierStatus identifierStatus) {
if (identifierString == null) {
return null;
}
var identifier = new Identifier()
.withId(identifierString)
.withType("ods:Identifier")
.withDctermsIdentifier(identifierString)
.withOdsIdentifierStatus(identifierStatus);
for (var entry : PATTERN_MAP.entrySet()) {
for (var prefix : entry.getKey()) {
if (prefix.matcher(identifierString).find()) {
identifier.setDctermsType(entry.getValue().getLeft());
identifier.setDctermsTitle(getDcTermsTitle(identifierName, entry.getValue().getMiddle()));
identifier.setOdsGupriLevel(entry.getValue().getRight());
return identifier;
}
}
}
log.debug(
"Unable to recognise the type of identifier: {}. Assuming locally unique identifier",
identifierString);
identifier.setDctermsType(DctermsType.LOCALLY_UNIQUE_IDENTIFIER);
identifier.setDctermsTitle(identifierName);
identifier.setOdsGupriLevel(OdsGupriLevel.LOCALLY_UNIQUE_STABLE);
return identifier;
}

private static String getDcTermsTitle(String identifierName, String defaultValue) {
if (identifierName != null) {
return identifierName;
} else {
return defaultValue;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ private static Agent createAgent(String name, String id) {
.withSchemaRoleName(CREATOR.getName())));
if (id != null) {
agent.withOdsHasIdentifiers(
List.of(new eu.dissco.core.translator.schema.Identifier().withType("ods:Identifier")
.withId(id).withDctermsIdentifier(id)));
List.of(IdentifierUtils.addIdentifier(id)));
}
return agent;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import static eu.dissco.core.translator.domain.RelationshipType.HAS_FDO_TYPE;
import static eu.dissco.core.translator.domain.RelationshipType.HAS_ORGANISATION_ID;
import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_SOFTWARE_APPLICATION;
import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent;
import static eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus.PREFERRED;
import static org.assertj.core.api.Assertions.assertThat;

import eu.dissco.core.translator.domain.RelationshipType;
Expand Down Expand Up @@ -74,8 +74,8 @@ private EntityRelationship createEntityRelationship(RelationshipType relationshi
.withDwcRelatedResourceID(relatedResource)
.withOdsRelatedResourceURI(relatedResourceURI)
.withDwcRelationshipEstablishedDate(Date.from(Instant.now()))
.withOdsHasAgents(addAgent(List.of(), APP_NAME, APP_PID,
DATA_TRANSLATOR, SCHEMA_SOFTWARE_APPLICATION));
.withOdsHasAgents(AgentsUtils.addAgent(List.of(), APP_NAME, APP_PID,
DATA_TRANSLATOR, SCHEMA_SOFTWARE_APPLICATION, PREFERRED));

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package eu.dissco.core.translator.terms.utils;

import static eu.dissco.core.translator.schema.Identifier.DctermsType.ARK;
import static eu.dissco.core.translator.schema.Identifier.DctermsType.HANDLE;
import static eu.dissco.core.translator.schema.Identifier.DctermsType.LOCALLY_UNIQUE_IDENTIFIER;
import static eu.dissco.core.translator.schema.Identifier.DctermsType.PURL;
import static eu.dissco.core.translator.schema.Identifier.DctermsType.URL;
import static eu.dissco.core.translator.schema.Identifier.DctermsType.UUID;
import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE;
import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE;
import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT;
import static eu.dissco.core.translator.schema.Identifier.OdsGupriLevel.LOCALLY_UNIQUE_STABLE;
import static eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus.PREFERRED;
import static eu.dissco.core.translator.terms.utils.IdentifierUtils.addIdentifier;
import static org.assertj.core.api.Assertions.assertThat;

import eu.dissco.core.translator.schema.Identifier;
import eu.dissco.core.translator.schema.Identifier.DctermsType;
import eu.dissco.core.translator.schema.Identifier.OdsGupriLevel;
import eu.dissco.core.translator.schema.Identifier.OdsIdentifierStatus;
import java.util.stream.Stream;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.junit.jupiter.MockitoExtension;

@ExtendWith(MockitoExtension.class)
class IdentifierUtilsTest {


public static Stream<Arguments> identifierProvider() {
return Stream.of(
Arguments.of("https://www.wikidata.org/wiki/Q66581882", null, null,
createIdentifier("https://www.wikidata.org/wiki/Q66581882", URL, "Wikidata",
GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)),
Arguments.of("https://hdl.handle.net/XXX-XXX-XXX", null, PREFERRED,
createIdentifier("https://hdl.handle.net/XXX-XXX-XXX", HANDLE, "Handle",
GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE_FDO_COMPLIANT, PREFERRED)),
Arguments.of("88e320d5-c47a-4288-b265-fa3c93c57440", "dwc:catalogueNumber", null,
createIdentifier("88e320d5-c47a-4288-b265-fa3c93c57440", UUID, "dwc:catalogueNumber",
GLOBALLY_UNIQUE_STABLE, null)),
Arguments.of("urn:uuid:541fd754-17e8-43c8-ba4e-b413a1bf3a2f", "dwca:ID", null,
createIdentifier("urn:uuid:541fd754-17e8-43c8-ba4e-b413a1bf3a2f", UUID, "dwca:ID",
GLOBALLY_UNIQUE_STABLE, null)),
Arguments.of("https://geocollections.info/specimen/126758", "abcd:unitGUID", null,
createIdentifier("https://geocollections.info/specimen/126758", URL, "abcd:unitGUID",
GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)),
Arguments.of("http://n2t.net/ark:/65665/3173bef93-f5c6-4534-bd31-42289606938b", "dwc:catalogueNumber", null,
createIdentifier("http://n2t.net/ark:/65665/3173bef93-f5c6-4534-bd31-42289606938b", ARK, "dwc:catalogueNumber",
GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)),
Arguments.of("http://purl.org/dc/terms/accessRights", null, null,
createIdentifier("http://purl.org/dc/terms/accessRights", PURL, "PURL",
GLOBALLY_UNIQUE_STABLE_PERSISTENT_RESOLVABLE, null)),
Arguments.of("AVES-071259", "dwc:occurrenceID", null,
createIdentifier("AVES-071259", LOCALLY_UNIQUE_IDENTIFIER, "dwc:occurrenceID",
LOCALLY_UNIQUE_STABLE, null)),
Arguments.of(null, null, null, null)
);
}

private static Identifier createIdentifier(String id, DctermsType type, String title,
OdsGupriLevel gupriLevel, OdsIdentifierStatus status) {
return new Identifier()
.withId(id)
.withType("ods:Identifier")
.withDctermsIdentifier(id)
.withDctermsTitle(title)
.withDctermsType(type)
.withOdsGupriLevel(gupriLevel)
.withOdsIdentifierStatus(status);
}

@ParameterizedTest
@MethodSource("identifierProvider")
void testAddIdentifier(String identifier, String identifierName, OdsIdentifierStatus status,
Identifier expected) {
// When
var result = addIdentifier(identifier, identifierName, status);

// Then
assertThat(result).isEqualTo(expected);
}
}

0 comments on commit ea7b558

Please sign in to comment.