Skip to content

Commit

Permalink
MIR-1249 DeepGreen wrong pdf uploaded
Browse files Browse the repository at this point in the history
- upload all files from zip
- set pdf with same name as metadata xml as mainfile
  • Loading branch information
fa25neh committed Oct 26, 2023
1 parent 6ff8cac commit a3564f9
Showing 1 changed file with 28 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;

import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipFile;
Expand Down Expand Up @@ -50,26 +51,37 @@ public MCRObjectID ingestMetadata(Deposit deposit) throws SwordError, SwordServe
.createTempFileFromStream("dgZip", deposit.getInputStream(), deposit.getMd5());

LOGGER.info("Zip File is : " + dgZip.toAbsolutePath().toString());
LOGGER.info("Deposit Filename is : " + deposit.getFilename());
try (SeekableByteChannel sbc = Files.newByteChannel(dgZip)) {
final ZipFile zipFile = new ZipFile(sbc);
final List<ZipArchiveEntry> entriesInPhysicalOrder = Collections
.list(zipFile.getEntriesInPhysicalOrder());

final Optional<ZipArchiveEntry> metadataEntryOpt = entriesInPhysicalOrder.stream()
.filter(e -> e.getName().endsWith(".xml")).findFirst();
final Optional<ZipArchiveEntry> pdfEntryOpt = entriesInPhysicalOrder.stream()
.filter(e -> e.getName().endsWith(".pdf")).findFirst();

if (metadataEntryOpt.isEmpty()) {
throw new SwordServerException("No Metadata File Found!");
}
final ZipArchiveEntry metadataEntry = metadataEntryOpt.get();
String filenameWithoutExt = FilenameUtils.getBaseName(metadataEntry.getName());
LOGGER.info("Metadata Filename is : " + filenameWithoutExt);

Optional<ZipArchiveEntry> pdfEntryOpt = entriesInPhysicalOrder.stream()
.filter(e -> e.getName().contains(filenameWithoutExt) && e.getName().endsWith(".pdf")).findFirst();
if (pdfEntryOpt.isEmpty()) {
throw new SwordServerException("No PDF File Found!");
LOGGER.info("No PDF File Found, with Filename " + filenameWithoutExt + "! Using first PDF File!");
pdfEntryOpt = entriesInPhysicalOrder.stream()
.filter(e -> e.getName().endsWith(".pdf")).findFirst();
if (pdfEntryOpt.isEmpty()) {
throw new SwordServerException("No PDF File Found!");
}
}

final ZipArchiveEntry metadataEntry = metadataEntryOpt.get();
final ZipArchiveEntry pdfEntry = pdfEntryOpt.get();

List<ZipArchiveEntry> otherEntryList = entriesInPhysicalOrder.stream()
.filter(e -> !e.getName().equals(metadataEntry.getName())
&& !e.getName().equals(pdfEntry.getName())).collect(Collectors.toList());

final MCRThrowFunction<ZipArchiveEntry, InputStream, IOException> getIS = zipFile::getInputStream;
try (InputStream metadataIS = getIS.apply(metadataEntry); InputStream pdfIS = getIS.apply(pdfEntry)) {
final SAXBuilder saxBuilder = new SAXBuilder();
Expand Down Expand Up @@ -102,6 +114,16 @@ public MCRObjectID ingestMetadata(Deposit deposit) throws SwordError, SwordServe
);
derivate.getDerivate().getClassifications().add(derivateTypeClassification);
}
if (!otherEntryList.isEmpty()) {
otherEntryList.forEach(e -> {
try (InputStream otherDataIS = getIS.apply(e)) {
Files.copy(otherDataIS, MCRPath.getPath(derivate.getId().toString(),
FilenameUtils.getName(e.getName())));
} catch (IOException ex) {
LOGGER.error("Error while processing File " + e.getName());
}
});
}
MCRMetadataManager.update(derivate);
return newObjectId;
} catch (SAXException | JDOMException | MCRAccessException e) {
Expand Down

0 comments on commit a3564f9

Please sign in to comment.