Skip to content

Commit

Permalink
Merge pull request #70 from DiSSCo/feature/iiif-er
Browse files Browse the repository at this point in the history
Update EntityRelationship + IIIF mediaType
  • Loading branch information
samleeflang authored Nov 19, 2024
2 parents 71d8d2c + e8ee889 commit 78273e8
Show file tree
Hide file tree
Showing 24 changed files with 265 additions and 84 deletions.
16 changes: 15 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
<commons-beanutils.version>1.9.4</commons-beanutils.version>
<jaxb2-maven-plugin.version>3.1.0</jaxb2-maven-plugin.version>
<commons-compress.version>1.27.1</commons-compress.version>
<netty-common.version>4.1.115.Final</netty-common.version>
<jakarta.activation-api.version>2.1.3</jakarta.activation-api.version>
<jakarta.xml.bind-api.version>4.0.2</jakarta.xml.bind-api.version>
<mockito-inline.version>5.2.0</mockito-inline.version>
Expand Down Expand Up @@ -87,8 +88,15 @@
<groupId>org.gbif</groupId>
<artifactId>dwca-io</artifactId>
<version>${dwca-io.version}</version>
<!-- Needed to exclude this dep from dwca-io because of CVE-2024-47554 -->
<exclusions>
<exclusion>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- Needed to overwrite gbif dep with security issue in this lib -->
<!-- Needed to overwrite gbif dep with security issue in this lib -->
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
Expand All @@ -100,6 +108,12 @@
<artifactId>commons-compress</artifactId>
<version>${commons-compress.version}</version>
</dependency>
<!-- Necessary to mitigate CVE-2024-47535 in spring-boot 3.3.5 (18-11-2024) -->
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-common</artifactId>
<version>${netty-common.version}</version>
</dependency>
<dependency>
<groupId>org.codehaus.mojo</groupId>
<artifactId>jaxb2-maven-plugin</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ public void getSpecimenData(Set<String> ids, Archive archive, AtomicInteger proc
var specimenData = dwcaRepository.getCoreRecords(batch, getTableName(archive.getCore()));
log.info("Got specimen batch: {}", batch.size());
addExtensionsToSpecimen(archive, batch, specimenData);
log.info("Start translation and publishing of batch: {}", specimenData.values().size());
log.info("Start translation and publishing of batch: {}", specimenData.size());
processDigitalSpecimen(specimenData.values(), optionalEmlData, processedRecords);
}
} catch (ReachedMaximumLimitException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import static eu.dissco.core.translator.domain.AgentRoleType.COLLECTOR;
import static eu.dissco.core.translator.domain.AgentRoleType.CREATOR;
import static eu.dissco.core.translator.domain.AgentRoleType.DATA_TRANSLATOR;
import static eu.dissco.core.translator.domain.AgentRoleType.GEOREFERENCER;
import static eu.dissco.core.translator.domain.AgentRoleType.IDENTIFIER;
import static eu.dissco.core.translator.domain.AgentRoleType.RIGHTS_OWNER;
Expand All @@ -15,14 +14,13 @@
import static eu.dissco.core.translator.domain.RelationshipType.HAS_SOURCE_SYSTEM_ID;
import static eu.dissco.core.translator.domain.RelationshipType.HAS_URL;
import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_PERSON;
import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_SOFTWARE_APPLICATION;
import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent;
import static eu.dissco.core.translator.terms.utils.EntityRelationshipUtils.addEntityRelationship;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dissco.core.translator.component.OrganisationNameComponent;
import eu.dissco.core.translator.component.SourceSystemComponent;
import eu.dissco.core.translator.domain.RelationshipType;
import eu.dissco.core.translator.exception.OrganisationException;
import eu.dissco.core.translator.exception.UnknownPhysicalSpecimenIdType;
import eu.dissco.core.translator.properties.FdoProperties;
Expand Down Expand Up @@ -235,7 +233,6 @@
import eu.dissco.core.translator.terms.specimen.stratigraphy.lithostratigraphic.Group;
import eu.dissco.core.translator.terms.specimen.stratigraphy.lithostratigraphic.LithostratigraphicTerms;
import eu.dissco.core.translator.terms.specimen.stratigraphy.lithostratigraphic.Member;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import lombok.RequiredArgsConstructor;
Expand Down Expand Up @@ -362,38 +359,32 @@ private String getOrganisationName(String organisationId) throws OrganisationExc
private List<EntityRelationship> assembleDigitalSpecimenEntityRelationships(
DigitalSpecimen ds) {
var relationships = new ArrayList<EntityRelationship>();
relationships.add(getEntityRelationship(HAS_ORGANISATION_ID, ds.getOdsOrganisationID()));
relationships.add(getEntityRelationship(HAS_SOURCE_SYSTEM_ID, ds.getOdsSourceSystemID()));
relationships.add(getEntityRelationship(HAS_FDO_TYPE, fdoProperties.getDigitalSpecimenType()));
relationships.add(addEntityRelationship(HAS_ORGANISATION_ID, ds.getOdsOrganisationID(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
relationships.add(addEntityRelationship(HAS_SOURCE_SYSTEM_ID, ds.getOdsSourceSystemID(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
relationships.add(addEntityRelationship(HAS_FDO_TYPE, fdoProperties.getDigitalSpecimenType(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
if (ds.getOdsPhysicalSpecimenIDType().equals(OdsPhysicalSpecimenIDType.RESOLVABLE)) {
relationships.add(
getEntityRelationship(HAS_PHYSICAL_IDENTIFIER, ds.getOdsPhysicalSpecimenID()));
addEntityRelationship(HAS_PHYSICAL_IDENTIFIER, ds.getOdsPhysicalSpecimenID(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
}
if (ds.getDctermsLicense() != null && ds.getDctermsLicense().startsWith("http")) {
relationships.add(getEntityRelationship(HAS_LICENSE, ds.getDctermsLicense()));
relationships.add(addEntityRelationship(HAS_LICENSE, ds.getDctermsLicense(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
}
if (ds.getOdsHasCitations() != null) {
for (Citation citation : ds.getOdsHasCitations()) {
if (citation.getId() != null && citation.getId().startsWith("http")) {
relationships.add(getEntityRelationship(HAS_REFERENCE, citation.getId()));
relationships.add(addEntityRelationship(HAS_REFERENCE, citation.getId(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
}
}
}
return relationships;
}

private EntityRelationship getEntityRelationship(RelationshipType relationshipType,
String relatedResource) {
var entityRelationship = new EntityRelationship()
.withType("ods:EntityRelationship")
.withDwcRelationshipOfResource(relationshipType.getName())
.withDwcRelatedResourceID(relatedResource)
.withDwcRelationshipEstablishedDate(java.util.Date.from(Instant.now()));
entityRelationship.setOdsHasAgents(addAgent(entityRelationship.getOdsHasAgents(), fdoProperties.getApplicationName(),
fdoProperties.getApplicationPID(), DATA_TRANSLATOR, SCHEMA_SOFTWARE_APPLICATION));
return entityRelationship;
}

private List<eu.dissco.core.translator.schema.Identifier> assembleIdentifiers(JsonNode data) {
var identifiers = new ArrayList<Identifier>();
for (String identifierTerm : identifierTerms) {
Expand Down Expand Up @@ -461,7 +452,8 @@ protected Identification createIdentification(JsonNode data, boolean dwc) {
termMapper.retrieveTerm(new VerbatimIdentification(), data, dwc))
.withOdsHasTaxonIdentifications(List.of(mappedTaxonIdentification))
.withOdsHasCitations(assembleIdentificationCitations(data, dwc));
identification.setOdsHasAgents(addAgent(identification.getOdsHasAgents(), termMapper.retrieveTerm(new IdentifiedBy(), data, dwc),
identification.setOdsHasAgents(addAgent(identification.getOdsHasAgents(),
termMapper.retrieveTerm(new IdentifiedBy(), data, dwc),
termMapper.retrieveTerm(new IdentifiedByID(), data, dwc), IDENTIFIER, SCHEMA_PERSON));
return identification;
}
Expand Down Expand Up @@ -590,8 +582,9 @@ private List<Event> assembleEventTerms(JsonNode data, boolean dwc) {
.withDwcVitality(termMapper.retrieveTerm(new Vitality(), data, dwc))
.withOdsHasLocation(location)
.withOdsHasAssertions(assertions);
event.setOdsHasAgents(addAgent(event.getOdsHasAgents(), termMapper.retrieveTerm(new RecordedBy(), data, dwc),
termMapper.retrieveTerm(new RecordedByID(), data, dwc), COLLECTOR, SCHEMA_PERSON));
event.setOdsHasAgents(
addAgent(event.getOdsHasAgents(), termMapper.retrieveTerm(new RecordedBy(), data, dwc),
termMapper.retrieveTerm(new RecordedByID(), data, dwc), COLLECTOR, SCHEMA_PERSON));
return List.of(event);
}

Expand Down Expand Up @@ -714,19 +707,24 @@ public DigitalMedia assembleDigitalMedia(boolean dwc, JsonNode mediaRecord,
private List<EntityRelationship> assembleDigitalMediaEntityRelationships(
DigitalMedia digitalMedia) {
var relationships = new ArrayList<EntityRelationship>();
relationships.add(getEntityRelationship(HAS_URL, digitalMedia.getAcAccessURI()));
relationships.add(addEntityRelationship(HAS_URL, digitalMedia.getAcAccessURI(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
relationships.add(
getEntityRelationship(HAS_ORGANISATION_ID, digitalMedia.getOdsOrganisationID()));
addEntityRelationship(HAS_ORGANISATION_ID, digitalMedia.getOdsOrganisationID(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
relationships.add(
getEntityRelationship(HAS_FDO_TYPE, fdoProperties.getDigitalMediaType()));
addEntityRelationship(HAS_FDO_TYPE, fdoProperties.getDigitalMediaType(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
if (digitalMedia.getDctermsRights() != null && digitalMedia.getDctermsRights()
.startsWith("http")) {
relationships.add(
getEntityRelationship(HAS_LICENSE, digitalMedia.getDctermsRights()));
addEntityRelationship(HAS_LICENSE, digitalMedia.getDctermsRights(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
}
if (digitalMedia.getDctermsSource() != null && digitalMedia.getDctermsSource()
.startsWith("http")) {
relationships.add(getEntityRelationship(HAS_SOURCE, digitalMedia.getDctermsSource()));
relationships.add(addEntityRelationship(HAS_SOURCE, digitalMedia.getDctermsSource(),
fdoProperties.getApplicationName(), fdoProperties.getApplicationPID()));
}
return relationships;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ public class BiocaseDigitalObjectDirector extends BaseDigitalObjectDirector {
public BiocaseDigitalObjectDirector(ObjectMapper mapper, TermMapper termMapper,
OrganisationNameComponent rorComponent, SourceSystemComponent sourceSystemComponent,
FdoProperties fdoProperties) {
super(mapper, termMapper, rorComponent, sourceSystemComponent, fdoProperties, identifierTerms());
super(mapper, termMapper, rorComponent, sourceSystemComponent, fdoProperties,
identifierTerms());
}

private static List<String> identifierTerms() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ public class DwcaDigitalObjectDirector extends BaseDigitalObjectDirector {
public DwcaDigitalObjectDirector(ObjectMapper mapper, TermMapper termMapper,
OrganisationNameComponent rorComponent, SourceSystemComponent sourceSystemComponent,
FdoProperties fdoProperties) {
super(mapper, termMapper, rorComponent, sourceSystemComponent, fdoProperties, identifierTerms());
super(mapper, termMapper, rorComponent, sourceSystemComponent, fdoProperties,
identifierTerms());
}

private static List<String> identifierTerms() {
Expand All @@ -52,7 +53,7 @@ protected List<Citation> assembleSpecimenCitations(JsonNode data, boolean dwc) {
var references = data.get(EXTENSION).get("gbif:Reference");
for (int i = 0; i < references.size(); i++) {
var citationJson = references.get(i);
if (citationJson.properties().size() <= 1){
if (citationJson.properties().size() <= 1) {
log.debug("Skipping citation with only one property: {}", citationJson);
} else {
citations.add(createCitation(citationJson, dwc));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ public class MediaType extends Term {
private static final String STILL_IMAGE = "Still_Image";
private static final String SOUND = "Sound";
private static final String MOVING_IMAGE = "Moving_Image";
private static final String INTERACTIVE_RESOURCE = "Interactive_Resource";
private final List<String> dwcaTerms = List.of(TERM, "dc:type");
private final List<String> imageFormats = List.of("IMAGE/JPG", "JPG", "IMAGE/JPEG",
"JPEG", "IMAGE/PNG", "PNG", "IMAGE/TIF", "TIF");
private final List<String> interactiveResourceFormats = List.of("APPLICATION/JSON");

@Override
public String retrieveFromDWCA(JsonNode unit) {
Expand Down Expand Up @@ -51,6 +53,8 @@ public String retrieveFromABCD(JsonNode unit) {
format = format.toUpperCase();
if (imageFormats.contains(format)) {
return STILL_IMAGE;
} else if (interactiveResourceFormats.contains(format)) {
return INTERACTIVE_RESOURCE;
} else {
log.warn("Unable to determine media type of digital media object");
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.List;

public class DatasetID extends Term {

public static final String TERM = DWC_PREFIX + "datasetID";

private final List<String> dwcaTerms = List.of(TERM);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.List;

public class DynamicProperties extends Term {

public static final String TERM = DWC_PREFIX + "dynamicProperties";

private final List<String> dwcaTerms = List.of(TERM);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.List;

public class OrganismID extends Term {

public static final String TERM = DWC_PREFIX + "organismID";

private final List<String> dwcaTerms = List.of(TERM);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.List;

public class OrganismName extends Term {

public static final String TERM = DWC_PREFIX + "organismName";

private final List<String> dwcaTerms = List.of(TERM);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.List;

public class OrganismRemarks extends Term {

public static final String TERM = DWC_PREFIX + "organismRemarks";

private final List<String> dwcaTerms = List.of(TERM);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.List;

public class OrganismScope extends Term {

public static final String TERM = DWC_PREFIX + "organismScope";

private final List<String> dwcaTerms = List.of(TERM);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ public class SpecimenName extends Term {

public String calculate(DigitalSpecimen ds) {
var acceptedIdentification = retrieveAcceptedIdentification(ds);
if (acceptedIdentification != null && acceptedIdentification.getOdsHasTaxonIdentifications() != null
if (acceptedIdentification != null
&& acceptedIdentification.getOdsHasTaxonIdentifications() != null
&& !acceptedIdentification.getOdsHasTaxonIdentifications().isEmpty()) {
if (acceptedIdentification.getOdsHasTaxonIdentifications().get(0)
.getDwcScientificName() != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ public class TopicDiscipline extends Term {
public OdsTopicDiscipline calculate(DigitalSpecimen ds) {
var basisOfRecord = ds.getDwcBasisOfRecord();
var acceptedIdentification = retrieveAcceptedIdentification(ds);
if (acceptedIdentification != null && acceptedIdentification.getOdsHasTaxonIdentifications() != null
if (acceptedIdentification != null
&& acceptedIdentification.getOdsHasTaxonIdentifications() != null
&& !acceptedIdentification.getOdsHasTaxonIdentifications().isEmpty()) {
return getDiscipline(basisOfRecord,
acceptedIdentification.getOdsHasTaxonIdentifications().get(0).getDwcKingdom());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,30 +65,39 @@ private static void handleMultipleAgents(
List<Agent> agents, String agentValue, String agentId, AgentRoleType role, Type type) {
var ids = new String[0];
var agentValues = new String[0];
if (agentValue != null && (agentValue.contains("&") || agentValue.contains("|"))) {
agentValues = Arrays.stream(agentValue.split("[&|]")).map(String::trim).toArray(String[]::new);
if (needsParsing(agentValue)) {
agentValues = Arrays.stream(agentValue.split("[&|]")).map(String::trim)
.toArray(String[]::new);
}
if (agentId != null && (agentId.contains("&") || agentId.contains("|"))) {
if (needsParsing(agentId)) {
ids = Arrays.stream(agentId.split("[&|]")).map(String::trim).toArray(String[]::new);
}
if (agentValues.length == ids.length) {
for (int i = 0; i < agentValues.length; i++) {
constructAgent(agents, agentValues[i], ids[i], role, type);
}
} else if (agentValues.length > ids.length) {
log.warn(
"The number of agentValues values is greater than ids, ignoring ids for term: {} and agentId: {}",
agentValue, agentId);
if (ids.length != 0) {
log.warn(
"The number of agentValues values is greater than ids, ignoring ids for term: {} and agentId: {}",
agentValue, agentId);
}
for (String agent : agentValues) {
constructAgent(agents, agent, null, role, type);
}
} else {
log.warn(
"The number of ids is greater than agentValue, ignoring agentValue values for term: {} and agentId: {}",
agentValue, agentId);
if (agentValues.length != 0) {
log.warn(
"The number of ids is greater than agentValue, ignoring agentValue values for term: {} and agentId: {}",
agentValue, agentId);
}
for (String idValue : ids) {
constructAgent(agents, null, idValue, role, type);
}
}
}

private static boolean needsParsing(String value) {
return value != null && (value.contains("&") || value.contains("|"));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package eu.dissco.core.translator.terms.utils;

import static eu.dissco.core.translator.domain.AgentRoleType.DATA_TRANSLATOR;
import static eu.dissco.core.translator.schema.Agent.Type.SCHEMA_SOFTWARE_APPLICATION;
import static eu.dissco.core.translator.terms.utils.AgentsUtils.addAgent;

import eu.dissco.core.translator.domain.RelationshipType;
import eu.dissco.core.translator.schema.EntityRelationship;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.Instant;
import java.util.Date;
import lombok.extern.slf4j.Slf4j;

@Slf4j
public class EntityRelationshipUtils {

private EntityRelationshipUtils() {
// This is a Utility class
}

public static EntityRelationship addEntityRelationship(
RelationshipType relationshipType, String relatedResource, String agentName, String agentId) {
if (relatedResource == null) {
log.warn("Related resource for type {} is null. Skipping entity relationship creation.",
relationshipType.getName());
return null;
}
var entityRelationship = new EntityRelationship()
.withType("ods:EntityRelationship")
.withDwcRelationshipOfResource(relationshipType.getName())
.withDwcRelatedResourceID(relatedResource)
.withDwcRelationshipEstablishedDate(Date.from(Instant.now()));
entityRelationship.setOdsHasAgents(
addAgent(entityRelationship.getOdsHasAgents(), agentName, agentId, DATA_TRANSLATOR,
SCHEMA_SOFTWARE_APPLICATION));
if (relatedResource.startsWith("http")) {
try {
entityRelationship.setOdsRelatedResourceURI(new URI(relatedResource));
} catch (URISyntaxException e) {
log.warn("Could not create URI for related resource: " + relatedResource);
}
}
return entityRelationship;
}

}
Loading

0 comments on commit 78273e8

Please sign in to comment.