From bf4f503724006a6615d5dcac3e336f12f4f9b524 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Wed, 3 Jan 2024 08:51:37 +0100 Subject: [PATCH] ufal/be-s3-checksum-fix (#487) * Cherry picked fix S3 optimalization fixes from Vanilla * Cherry picked adding of pagination on cleanup * Updated Sync services according to S3 optimization fix * The checksum value is compared between S3 and local assetstore and the new result is inserted to the database. * Added docs and refactoring * The checksum values are exposed by REST API * Fixed failing tests --------- Co-authored-by: Tim Donohue Co-authored-by: Luca Giamminonni --- dspace-api/pom.xml | 24 + .../org/dspace/checker/CheckerCommand.java | 49 +- .../checker/ChecksumHistoryServiceImpl.java | 3 +- .../dspace/checker/ChecksumResultCode.java | 3 +- .../checker/SimpleReporterServiceImpl.java | 2 + .../dao/impl/MostRecentChecksumDAOImpl.java | 4 +- .../dspace/content/BitstreamServiceImpl.java | 4 +- .../content/ClarinBitstreamServiceImpl.java | 19 +- .../org/dspace/content/dao/BitstreamDAO.java | 2 +- .../content/dao/impl/BitstreamDAOImpl.java | 5 +- .../content/service/BitstreamService.java | 2 +- .../src/main/java/org/dspace/core/Email.java | 6 +- .../storage/bitstore/BaseBitStoreService.java | 32 +- .../storage/bitstore/BitStoreService.java | 5 +- .../bitstore/BitstreamStorageServiceImpl.java | 159 +++--- .../storage/bitstore/DSBitStoreService.java | 13 +- .../DeleteOnCloseFileInputStream.java | 42 ++ .../storage/bitstore/S3BitStoreService.java | 155 +++--- .../SyncBitstreamStorageServiceImpl.java | 207 +++++--- .../factory/StorageServiceFactory.java | 3 + .../factory/StorageServiceFactoryImpl.java | 9 + .../service/BitstreamStorageService.java | 2 +- ...07.28__Upgrade_to_Lindat_Clarin_schema.sql | 9 +- .../storage/bitstore/S3BitStoreServiceIT.java | 434 ++++++++++++++++ .../bitstore/S3BitStoreServiceTest.java | 481 ------------------ .../converter/BitstreamChecksumConverter.java | 35 ++ .../app/rest/model/BitstreamChecksum.java | 46 ++ .../app/rest/model/BitstreamChecksumRest.java | 66 +++ .../dspace/app/rest/model/BitstreamRest.java | 5 + .../hateoas/BitstreamChecksumResource.java | 26 + .../BitstreamCheckSumLinkRepository.java | 104 ++++ .../app/rest/BitstreamRestControllerIT.java | 47 ++ .../app/rest/matcher/BitstreamMatcher.java | 6 +- 33 files changed, 1245 insertions(+), 764 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/storage/bitstore/DeleteOnCloseFileInputStream.java create mode 100644 dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceIT.java delete mode 100644 dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceTest.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamChecksumConverter.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksum.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksumRest.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/BitstreamChecksumResource.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/BitstreamCheckSumLinkRepository.java diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml index c21c348c29b7..9d75342bd231 100644 --- a/dspace-api/pom.xml +++ b/dspace-api/pom.xml @@ -844,6 +844,24 @@ + + + io.findify + s3mock_2.13 + 0.2.6 + test + + + com.amazonawsl + aws-java-sdk-s3 + + + com.amazonaws + aws-java-sdk-s3 + + + + @@ -907,6 +925,12 @@ swagger-core 1.6.2 + + org.scala-lang + scala-library + 2.13.2 + test + diff --git a/dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java b/dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java index 6b16d51bfe1e..ba503d83eb4f 100644 --- a/dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java +++ b/dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java @@ -7,10 +7,13 @@ */ package org.dspace.checker; +import static org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl.SYNCHRONIZED_STORES_NUMBER; + import java.io.IOException; import java.sql.SQLException; import java.util.Date; import java.util.Map; +import java.util.Objects; import org.apache.commons.collections4.MapUtils; import org.apache.logging.log4j.Logger; @@ -20,8 +23,8 @@ import org.dspace.checker.service.MostRecentChecksumService; import org.dspace.content.Bitstream; import org.dspace.core.Context; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; import org.dspace.storage.bitstore.factory.StorageServiceFactory; -import org.dspace.storage.bitstore.service.BitstreamStorageService; /** *

@@ -55,7 +58,7 @@ public final class CheckerCommand { * Checksum history Data access object */ private ChecksumHistoryService checksumHistoryService = null; - private BitstreamStorageService bitstreamStorageService = null; + private SyncBitstreamStorageServiceImpl bitstreamStorageService = null; private ChecksumResultService checksumResultService = null; /** @@ -86,7 +89,7 @@ public final class CheckerCommand { public CheckerCommand(Context context) { checksumService = CheckerServiceFactory.getInstance().getMostRecentChecksumService(); checksumHistoryService = CheckerServiceFactory.getInstance().getChecksumHistoryService(); - bitstreamStorageService = StorageServiceFactory.getInstance().getBitstreamStorageService(); + bitstreamStorageService = StorageServiceFactory.getInstance().getSyncBitstreamStorageService(); checksumResultService = CheckerServiceFactory.getInstance().getChecksumResultService(); this.context = context; } @@ -245,7 +248,9 @@ protected void processBitstream(MostRecentChecksum info) throws SQLException { info.setProcessStartDate(new Date()); try { - Map checksumMap = bitstreamStorageService.computeChecksum(context, info.getBitstream()); + // 1. DB - Store not match + Bitstream bitstream = info.getBitstream(); + Map checksumMap = bitstreamStorageService.computeChecksum(context, bitstream); if (MapUtils.isNotEmpty(checksumMap)) { info.setBitstreamFound(true); if (checksumMap.containsKey("checksum")) { @@ -255,10 +260,42 @@ protected void processBitstream(MostRecentChecksum info) throws SQLException { if (checksumMap.containsKey("checksum_algorithm")) { info.setChecksumAlgorithm(checksumMap.get("checksum_algorithm").toString()); } + + // compare new checksum to previous checksum + info.setChecksumResult(compareChecksums(info.getExpectedChecksum(), info.getCurrentChecksum())); + + } else { + info.setCurrentChecksum(""); + info.setChecksumResult(getChecksumResultByCode(ChecksumResultCode.BITSTREAM_NOT_FOUND)); + info.setToBeProcessed(false); + } + + // 2. Store1 - Synchronized store 2 not match + // Check checksum of synchronized store + if (bitstream.getStoreNumber() != SYNCHRONIZED_STORES_NUMBER) { + return; + } + if (Objects.equals(ChecksumResultCode.CHECKSUM_NO_MATCH, info.getChecksumResult().getResultCode())) { + return; + } + + Map syncStoreChecksumMap = + bitstreamStorageService.computeChecksumSpecStore(context, bitstream, + bitstreamStorageService.getSynchronizedStoreNumber(bitstream)); + if (MapUtils.isNotEmpty(syncStoreChecksumMap)) { + String syncStoreChecksum = ""; + if (checksumMap.containsKey("checksum")) { + syncStoreChecksum = syncStoreChecksumMap.get("checksum").toString(); + } + // compare new checksum to previous checksum + ChecksumResult checksumResult = compareChecksums(info.getCurrentChecksum(), syncStoreChecksum); + // Do not override result with synchronization info if the checksums are not matching between + // DB and store + if (!Objects.equals(checksumResult.getResultCode(), ChecksumResultCode.CHECKSUM_NO_MATCH)) { + info.setChecksumResult(getChecksumResultByCode(ChecksumResultCode.CHECKSUM_SYNC_NO_MATCH)); + } } - // compare new checksum to previous checksum - info.setChecksumResult(compareChecksums(info.getExpectedChecksum(), info.getCurrentChecksum())); } catch (IOException e) { // bitstream located, but file missing from asset store info.setChecksumResult(getChecksumResultByCode(ChecksumResultCode.BITSTREAM_NOT_FOUND)); diff --git a/dspace-api/src/main/java/org/dspace/checker/ChecksumHistoryServiceImpl.java b/dspace-api/src/main/java/org/dspace/checker/ChecksumHistoryServiceImpl.java index f8d6560e9246..f7b05d4de9d3 100644 --- a/dspace-api/src/main/java/org/dspace/checker/ChecksumHistoryServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/checker/ChecksumHistoryServiceImpl.java @@ -74,7 +74,8 @@ public void addHistory(Context context, MostRecentChecksum mostRecentChecksum) t if (mostRecentChecksum.getBitstream().isDeleted()) { checksumResult = checksumResultService.findByCode(context, ChecksumResultCode.BITSTREAM_MARKED_DELETED); } else { - checksumResult = checksumResultService.findByCode(context, ChecksumResultCode.CHECKSUM_MATCH); + checksumResult = checksumResultService.findByCode(context, + mostRecentChecksum.getChecksumResult().getResultCode()); } checksumHistory.setResult(checksumResult); diff --git a/dspace-api/src/main/java/org/dspace/checker/ChecksumResultCode.java b/dspace-api/src/main/java/org/dspace/checker/ChecksumResultCode.java index a0b532144290..a24127bb5371 100644 --- a/dspace-api/src/main/java/org/dspace/checker/ChecksumResultCode.java +++ b/dspace-api/src/main/java/org/dspace/checker/ChecksumResultCode.java @@ -24,5 +24,6 @@ public enum ChecksumResultCode { CHECKSUM_MATCH, CHECKSUM_NO_MATCH, CHECKSUM_PREV_NOT_FOUND, - CHECKSUM_ALGORITHM_INVALID + CHECKSUM_ALGORITHM_INVALID, + CHECKSUM_SYNC_NO_MATCH } diff --git a/dspace-api/src/main/java/org/dspace/checker/SimpleReporterServiceImpl.java b/dspace-api/src/main/java/org/dspace/checker/SimpleReporterServiceImpl.java index 26c102e1e78b..ddefb28e1b57 100644 --- a/dspace-api/src/main/java/org/dspace/checker/SimpleReporterServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/checker/SimpleReporterServiceImpl.java @@ -152,6 +152,7 @@ public int getBitstreamNotFoundReport(Context context, Date startDate, Date endD osw.write("\n"); osw.write(msg("bitstream-not-found-report")); + osw.write(" "); osw.write(applyDateFormatShort(startDate)); osw.write(" "); osw.write(msg("date-range-to")); @@ -230,6 +231,7 @@ public int getUncheckedBitstreamsReport(Context context, OutputStreamWriter osw) osw.write("\n"); osw.write(msg("unchecked-bitstream-report")); + osw.write(" "); osw.write(applyDateFormatShort(new Date())); osw.write("\n\n\n"); diff --git a/dspace-api/src/main/java/org/dspace/checker/dao/impl/MostRecentChecksumDAOImpl.java b/dspace-api/src/main/java/org/dspace/checker/dao/impl/MostRecentChecksumDAOImpl.java index 66ce666b9d6d..a31e02cbab4a 100644 --- a/dspace-api/src/main/java/org/dspace/checker/dao/impl/MostRecentChecksumDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/checker/dao/impl/MostRecentChecksumDAOImpl.java @@ -92,8 +92,8 @@ public List findByResultTypeInDateRange(Context context, Dat criteriaQuery.where(criteriaBuilder.and( criteriaBuilder.equal(mostRecentResult.get(ChecksumResult_.resultCode), resultCode), criteriaBuilder.lessThanOrEqualTo( - mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), startDate), - criteriaBuilder.greaterThan(mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), endDate) + mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), endDate), + criteriaBuilder.greaterThan(mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), startDate) ) ); List orderList = new LinkedList<>(); diff --git a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java index 4bde1cabb936..73b09015cd10 100644 --- a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java @@ -338,8 +338,8 @@ public void updateLastModified(Context context, Bitstream bitstream) { } @Override - public List findDeletedBitstreams(Context context) throws SQLException { - return bitstreamDAO.findDeletedBitstreams(context); + public List findDeletedBitstreams(Context context, int limit, int offset) throws SQLException { + return bitstreamDAO.findDeletedBitstreams(context, limit, offset); } @Override diff --git a/dspace-api/src/main/java/org/dspace/content/ClarinBitstreamServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/ClarinBitstreamServiceImpl.java index 3c63ada2a763..1d4af3f7abdb 100644 --- a/dspace-api/src/main/java/org/dspace/content/ClarinBitstreamServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/ClarinBitstreamServiceImpl.java @@ -9,7 +9,7 @@ import java.io.IOException; import java.sql.SQLException; -import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; @@ -25,7 +25,7 @@ import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.event.Event; -import org.dspace.storage.bitstore.DSBitStoreService; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; import org.dspace.storage.bitstore.service.BitstreamStorageService; import org.springframework.beans.factory.annotation.Autowired; @@ -48,7 +48,7 @@ public class ClarinBitstreamServiceImpl implements ClarinBitstreamService { private static final String CSA = "MD5"; @Autowired - private DSBitStoreService storeService; + private SyncBitstreamStorageServiceImpl syncBitstreamStorageService; @Autowired protected BitstreamDAO bitstreamDAO; @Autowired @@ -99,13 +99,12 @@ public boolean validation(Context context, Bitstream bitstream) } //get file from assetstore based on internal_id //recalculate check fields - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); - Map checksumMap = storeService.about(bitstream, wantedMetadata); + List wantedMetadata = List.of("size_bytes", "checksum", "checksum_algorithm"); + Map receivedMetadata = syncBitstreamStorageService + .getStore(syncBitstreamStorageService.whichStoreNumber(bitstream)) + .about(bitstream, wantedMetadata); //check that new calculated values match the expected values - if (MapUtils.isEmpty(checksumMap) || !valid(bitstream, checksumMap)) { + if (MapUtils.isEmpty(receivedMetadata) || !valid(bitstream, receivedMetadata)) { //an error occurred - expected and calculated values do not match //delete all created data bitstreamService.delete(context, bitstream); @@ -126,7 +125,7 @@ public boolean validation(Context context, Bitstream bitstream) * @param checksumMap calculated values * @return bitstream values match with expected values */ - private boolean valid(Bitstream bitstream, Map checksumMap) { + private boolean valid(Bitstream bitstream, Map checksumMap) { if (!checksumMap.containsKey("checksum") || !checksumMap.containsKey("checksum_algorithm") || !checksumMap.containsKey("size_bytes")) { log.error("Cannot validate of bitstream with id: " + bitstream.getID() + diff --git a/dspace-api/src/main/java/org/dspace/content/dao/BitstreamDAO.java b/dspace-api/src/main/java/org/dspace/content/dao/BitstreamDAO.java index c1ef92313127..0d7afaa3cd73 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/BitstreamDAO.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/BitstreamDAO.java @@ -29,7 +29,7 @@ public interface BitstreamDAO extends DSpaceObjectLegacySupportDAO { public Iterator findAll(Context context, int limit, int offset) throws SQLException; - public List findDeletedBitstreams(Context context) throws SQLException; + public List findDeletedBitstreams(Context context, int limit, int offset) throws SQLException; public List findDuplicateInternalIdentifier(Context context, Bitstream bitstream) throws SQLException; diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/BitstreamDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/BitstreamDAOImpl.java index 02e3509c311a..d6d77fe7f0c7 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/BitstreamDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/BitstreamDAOImpl.java @@ -41,13 +41,14 @@ protected BitstreamDAOImpl() { } @Override - public List findDeletedBitstreams(Context context) throws SQLException { + public List findDeletedBitstreams(Context context, int limit, int offset) throws SQLException { CriteriaBuilder criteriaBuilder = getCriteriaBuilder(context); CriteriaQuery criteriaQuery = getCriteriaQuery(criteriaBuilder, Bitstream.class); Root bitstreamRoot = criteriaQuery.from(Bitstream.class); criteriaQuery.select(bitstreamRoot); + criteriaQuery.orderBy(criteriaBuilder.desc(bitstreamRoot.get(Bitstream_.ID))); criteriaQuery.where(criteriaBuilder.equal(bitstreamRoot.get(Bitstream_.deleted), true)); - return list(context, criteriaQuery, false, Bitstream.class, -1, -1); + return list(context, criteriaQuery, false, Bitstream.class, limit, offset); } diff --git a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java index 4621c95e7c89..8effabf28435 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java @@ -183,7 +183,7 @@ public InputStream retrieve(Context context, Bitstream bitstream) * @return a list of all bitstreams that have been "deleted" * @throws SQLException if database error */ - public List findDeletedBitstreams(Context context) throws SQLException; + public List findDeletedBitstreams(Context context, int limit, int offset) throws SQLException; /** diff --git a/dspace-api/src/main/java/org/dspace/core/Email.java b/dspace-api/src/main/java/org/dspace/core/Email.java index 64da629bcc53..998d934c9558 100644 --- a/dspace-api/src/main/java/org/dspace/core/Email.java +++ b/dspace-api/src/main/java/org/dspace/core/Email.java @@ -314,6 +314,8 @@ public void send() throws MessagingException, IOException { message.addRecipient(Message.RecipientType.TO, new InternetAddress( i.next())); } + // Get headers defined by the template. + String[] templateHeaders = config.getArrayProperty("mail.message.headers"); // Format the mail message body VelocityEngine templateEngine = new VelocityEngine(); @@ -334,6 +336,7 @@ public void send() throws MessagingException, IOException { repo.putStringResource(contentName, content); // Turn content into a template. template = templateEngine.getTemplate(contentName); + templateHeaders = new String[] {}; } StringWriter writer = new StringWriter(); @@ -351,8 +354,7 @@ public void send() throws MessagingException, IOException { message.setSentDate(date); message.setFrom(new InternetAddress(from)); - // Get headers defined by the template. - for (String headerName : config.getArrayProperty("mail.message.headers")) { + for (String headerName : templateHeaders) { String headerValue = (String) vctx.get(headerName); if ("subject".equalsIgnoreCase(headerName)) { if (null != headerValue) { diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BaseBitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BaseBitStoreService.java index 209c1e21e74d..5b367d7a8136 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BaseBitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BaseBitStoreService.java @@ -14,6 +14,8 @@ import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringUtils; @@ -153,22 +155,24 @@ protected boolean isLonger(String internalId, int endIndex) { * Retrieves a map of useful metadata about the File (size, checksum, modified) * * @param file The File to analyze - * @param attrs The map where we are storing values + * @param attrs The list of requested metadata values * @return Map of updated metadatas / attrs * @throws IOException */ - public Map about(File file, Map attrs) throws IOException { + public Map about(File file, List attrs) throws IOException { + + Map metadata = new HashMap(); + try { if (file != null && file.exists()) { - this.putValueIfExistsKey(attrs, SIZE_BYTES, file.length()); - if (attrs.containsKey(CHECKSUM)) { - attrs.put(CHECKSUM, Utils.toHex(this.generateChecksumFrom(file))); - attrs.put(CHECKSUM_ALGORITHM, CSA); + this.putValueIfExistsKey(attrs, metadata, SIZE_BYTES, file.length()); + if (attrs.contains(CHECKSUM)) { + metadata.put(CHECKSUM, Utils.toHex(this.generateChecksumFrom(file))); + metadata.put(CHECKSUM_ALGORITHM, CSA); } - this.putValueIfExistsKey(attrs, MODIFIED, String.valueOf(file.lastModified())); - return attrs; + this.putValueIfExistsKey(attrs, metadata, MODIFIED, String.valueOf(file.lastModified())); } - return null; + return metadata; } catch (Exception e) { log.error("about( FilePath: " + file.getAbsolutePath() + ", Map: " + attrs.toString() + ")", e); throw new IOException(e); @@ -204,13 +208,9 @@ private byte[] generateChecksumFrom(FileInputStream fis) throws IOException, NoS } } - protected void putValueIfExistsKey(Map attrs, String key, Object value) { - this.putEntryIfExistsKey(attrs, key, Map.entry(key, value)); - } - - protected void putEntryIfExistsKey(Map attrs, String key, Map.Entry entry) { - if (attrs.containsKey(key)) { - attrs.put(entry.getKey(), entry.getValue()); + protected void putValueIfExistsKey(List attrs, Map metadata, String key, Object value) { + if (attrs.contains(key)) { + metadata.put(key, value); } } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java index b6ac540c5047..5a02ad1d5617 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.List; import java.util.Map; import org.dspace.content.Bitstream; @@ -62,13 +63,13 @@ public interface BitStoreService { * Obtain technical metadata about an asset in the asset store. * * @param bitstream The bitstream to describe - * @param attrs A Map whose keys consist of desired metadata fields + * @param attrs A List of desired metadata fields * @return attrs * A Map with key/value pairs of desired metadata * If file not found, then return null * @throws java.io.IOException If a problem occurs while obtaining metadata */ - public Map about(Bitstream bitstream, Map attrs) throws IOException; + public Map about(Bitstream bitstream, List attrs) throws IOException; /** * Remove an asset from the asset store. diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index b8a1a2e96ad4..3539496b1466 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -17,6 +17,7 @@ import java.util.UUID; import javax.annotation.Nullable; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections4.MapUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -165,12 +166,9 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, bitstream.setStoreNumber(assetstore); bitstreamService.update(context, bitstream); - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); + List wantedMetadata = List.of("size_bytes", "checksum", "checksum_algorithm"); + Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); - Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); if (MapUtils.isEmpty(receivedMetadata)) { String message = "Not able to register bitstream:" + bitstream.getID() + " at path: " + bitstreamPath; log.error(message); @@ -200,13 +198,8 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, } @Override - public Map computeChecksum(Context context, Bitstream bitstream) throws IOException { - Map wantedMetadata = new HashMap(); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); - - Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata); - return receivedMetadata; + public Map computeChecksum(Context context, Bitstream bitstream) throws IOException { + return this.getStore(bitstream.getStoreNumber()).about(bitstream, List.of("checksum", "checksum_algorithm")); } @Override @@ -224,25 +217,61 @@ public InputStream retrieve(Context context, Bitstream bitstream) @Override public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException { Context context = new Context(Context.Mode.BATCH_EDIT); - int commitCounter = 0; + + int offset = 0; + int limit = 100; + + int cleanedBitstreamCount = 0; + + int deletedBitstreamCount = bitstreamService.countDeletedBitstreams(context); + System.out.println("Found " + deletedBitstreamCount + " deleted bistream to cleanup"); try { context.turnOffAuthorisationSystem(); - List storage = bitstreamService.findDeletedBitstreams(context); - for (Bitstream bitstream : storage) { - UUID bid = bitstream.getID(); - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("modified", null); - Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata); + while (cleanedBitstreamCount < deletedBitstreamCount) { + + List storage = bitstreamService.findDeletedBitstreams(context, limit, offset); + + if (CollectionUtils.isEmpty(storage)) { + break; + } + + for (Bitstream bitstream : storage) { + UUID bid = bitstream.getID(); + List wantedMetadata = List.of("size_bytes", "modified"); + Map receivedMetadata = this.getStore(bitstream.getStoreNumber()) + .about(bitstream, wantedMetadata); + + + // Make sure entries which do not exist are removed + if (MapUtils.isEmpty(receivedMetadata)) { + log.debug("bitstore.about is empty, so file is not present"); + if (deleteDbRecords) { + log.debug("deleting record"); + if (verbose) { + System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + } + checksumHistoryService.deleteByBitstream(context, bitstream); + if (verbose) { + System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + } + bitstreamService.expunge(context, bitstream); + } + context.uncacheEntity(bitstream); + continue; + } + // This is a small chance that this is a file which is + // being stored -- get it next time. + if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { + log.debug("file is recent"); + context.uncacheEntity(bitstream); + continue; + } - // Make sure entries which do not exist are removed - if (MapUtils.isEmpty(receivedMetadata)) { - log.debug("bitstore.about is empty, so file is not present"); if (deleteDbRecords) { - log.debug("deleting record"); + log.debug("deleting db record"); if (verbose) { System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); } @@ -252,64 +281,42 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio } bitstreamService.expunge(context, bitstream); } - context.uncacheEntity(bitstream); - continue; - } - // This is a small chance that this is a file which is - // being stored -- get it next time. - if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { - log.debug("file is recent"); - context.uncacheEntity(bitstream); - continue; - } - - if (deleteDbRecords) { - log.debug("deleting db record"); - if (verbose) { - System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + if (isRegisteredBitstream(bitstream.getInternalId())) { + context.uncacheEntity(bitstream); + continue; // do not delete registered bitstreams } - checksumHistoryService.deleteByBitstream(context, bitstream); - if (verbose) { - System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + + + // Since versioning allows for multiple bitstreams, check if the internal + // identifier isn't used on + // another place + if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { + this.getStore(bitstream.getStoreNumber()).remove(bitstream); + + String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); + if (log.isDebugEnabled()) { + log.debug(message); + } + if (verbose) { + System.out.println(message); + } } - bitstreamService.expunge(context, bitstream); - } - if (isRegisteredBitstream(bitstream.getInternalId())) { context.uncacheEntity(bitstream); - continue; // do not delete registered bitstreams } + // Commit actual changes to DB after dispatch events + System.out.print("Performing incremental commit to the database..."); + context.commit(); + System.out.println(" Incremental commit done!"); - // Since versioning allows for multiple bitstreams, check if the internal identifier isn't used on - // another place - if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { - this.getStore(bitstream.getStoreNumber()).remove(bitstream); - - String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); - if (log.isDebugEnabled()) { - log.debug(message); - } - if (verbose) { - System.out.println(message); - } - } + cleanedBitstreamCount = cleanedBitstreamCount + storage.size(); - // Make sure to commit our outstanding work every 100 - // iterations. Otherwise you risk losing the entire transaction - // if we hit an exception, which isn't useful at all for large - // amounts of bitstreams. - commitCounter++; - if (commitCounter % 100 == 0) { - context.dispatchEvents(); - // Commit actual changes to DB after dispatch events - System.out.print("Performing incremental commit to the database..."); - context.commit(); - System.out.println(" Incremental commit done!"); + if (!deleteDbRecords) { + offset = offset + limit; } - context.uncacheEntity(bitstream); } System.out.print("Committing changes to the database..."); @@ -332,13 +339,11 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio @Nullable @Override public Long getLastModified(Bitstream bitstream) throws IOException { - Map attrs = new HashMap(); - attrs.put("modified", null); - attrs = this.getStore(bitstream.getStoreNumber()).about(bitstream, attrs); - if (attrs == null || !attrs.containsKey("modified")) { + Map metadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, List.of("modified")); + if (metadata == null || !metadata.containsKey("modified")) { return null; } - return Long.valueOf(attrs.get("modified").toString()); + return Long.valueOf(metadata.get("modified").toString()); } /** @@ -483,7 +488,7 @@ protected boolean isRecent(Long lastModified) { return (now - lastModified) < (1 * 60 * 1000); } - protected BitStoreService getStore(int position) throws IOException { + public BitStoreService getStore(int position) throws IOException { BitStoreService bitStoreService = this.stores.get(position); if (!bitStoreService.isInitialized()) { bitStoreService.init(); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java index 1fdf1e84e115..6fef7365e482 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java @@ -15,6 +15,7 @@ import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.List; import java.util.Map; import org.apache.logging.log4j.Logger; @@ -126,13 +127,13 @@ public void put(Bitstream bitstream, InputStream in) throws IOException { /** * Obtain technical metadata about an asset in the asset store. * - * @param bitstream The asset to describe - * @param attrs A Map whose keys consist of desired metadata fields - * @return attrs - * A Map with key/value pairs of desired metadata - * @throws java.io.IOException If a problem occurs while obtaining metadata + * @param bitstream The asset to describe + * @param attrs A List of desired metadata fields + * @return attrs A Map with key/value pairs of desired metadata + * @throws java.io.IOException If a problem occurs while obtaining + * metadata */ - public Map about(Bitstream bitstream, Map attrs) throws IOException { + public Map about(Bitstream bitstream, List attrs) throws IOException { try { // potentially expensive, since it may calculate the checksum File file = getFile(bitstream); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/DeleteOnCloseFileInputStream.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/DeleteOnCloseFileInputStream.java new file mode 100644 index 000000000000..62c24544eeac --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/DeleteOnCloseFileInputStream.java @@ -0,0 +1,42 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.storage.bitstore; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; + +/** + * When inputstream closes, then delete the file + * http://stackoverflow.com/a/4694155/368581 + */ +public class DeleteOnCloseFileInputStream extends FileInputStream { + + private File file; + + public DeleteOnCloseFileInputStream(String fileName) throws FileNotFoundException { + this(new File(fileName)); + } + + public DeleteOnCloseFileInputStream(File file) throws FileNotFoundException { + super(file); + this.file = file; + } + + public void close() throws IOException { + try { + super.close(); + } finally { + if (file != null) { + file.delete(); + file = null; + } + } + } +} diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index 1ad4c33f8213..be20f019fefd 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -7,10 +7,19 @@ */ package org.dspace.storage.bitstore; +import static java.lang.String.valueOf; + import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.function.Supplier; import javax.validation.constraints.NotNull; @@ -22,12 +31,11 @@ import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.AmazonS3Exception; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.transfer.Download; import com.amazonaws.services.s3.transfer.TransferManager; import com.amazonaws.services.s3.transfer.TransferManagerBuilder; import com.amazonaws.services.s3.transfer.Upload; @@ -37,7 +45,7 @@ import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import org.apache.commons.io.FileUtils; +import org.apache.commons.io.output.NullOutputStream; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpStatus; import org.apache.logging.log4j.LogManager; @@ -73,7 +81,7 @@ public class S3BitStoreService extends BaseBitStoreService { /** * Checksum algorithm */ - protected static final String CSA = "MD5"; + static final String CSA = "MD5"; // These settings control the way an identifier is hashed into // directory and file names @@ -160,13 +168,11 @@ public S3BitStoreService() {} /** * This constructor is used for test purpose. - * In this way is possible to use a mocked instance of AmazonS3 * - * @param s3Service mocked AmazonS3 service + * @param s3Service AmazonS3 service */ - protected S3BitStoreService(AmazonS3 s3Service, TransferManager tm) { + protected S3BitStoreService(AmazonS3 s3Service) { this.s3Service = s3Service; - this.tm = tm; } @Override @@ -283,9 +289,16 @@ public InputStream get(Bitstream bitstream) throws IOException { key = key.substring(REGISTERED_FLAG.length()); } try { - S3Object object = s3Service.getObject(new GetObjectRequest(bucketName, key)); - return (object != null) ? object.getObjectContent() : null; - } catch (AmazonClientException e) { + File tempFile = File.createTempFile("s3-disk-copy-" + UUID.randomUUID(), "temp"); + tempFile.deleteOnExit(); + + GetObjectRequest getObjectRequest = new GetObjectRequest(bucketName, key); + + Download download = tm.download(getObjectRequest, tempFile); + download.waitForCompletion(); + + return new DeleteOnCloseFileInputStream(tempFile); + } catch (AmazonClientException | InterruptedException e) { log.error("get(" + key + ")", e); throw new IOException(e); } @@ -307,24 +320,30 @@ public void put(Bitstream bitstream, InputStream in) throws IOException { String key = getFullKey(bitstream.getInternalId()); //Copy istream to temp file, and send the file, with some metadata File scratchFile = File.createTempFile(bitstream.getInternalId(), "s3bs"); - try { - FileUtils.copyInputStreamToFile(in, scratchFile); - long contentLength = scratchFile.length(); - // The ETag may or may not be and MD5 digest of the object data. - // Therefore, we precalculate before uploading - String localChecksum = org.dspace.curate.Utils.checksum(scratchFile, CSA); + try ( + FileOutputStream fos = new FileOutputStream(scratchFile); + // Read through a digest input stream that will work out the MD5 + DigestInputStream dis = new DigestInputStream(in, MessageDigest.getInstance(CSA)); + ) { + Utils.bufferedCopy(dis, fos); + in.close(); Upload upload = tm.upload(bucketName, key, scratchFile); upload.waitForUploadResult(); - bitstream.setSizeBytes(contentLength); - bitstream.setChecksum(localChecksum); + bitstream.setSizeBytes(scratchFile.length()); + // we cannot use the S3 ETAG here as it could be not a MD5 in case of multipart upload (large files) or if + // the bucket is encrypted + bitstream.setChecksum(Utils.toHex(dis.getMessageDigest().digest())); bitstream.setChecksumAlgorithm(CSA); } catch (AmazonClientException | IOException | InterruptedException e) { log.error("put(" + bitstream.getInternalId() + ", is)", e); throw new IOException(e); + } catch (NoSuchAlgorithmException nsae) { + // Should never happen + log.warn("Caught NoSuchAlgorithmException", nsae); } finally { if (!scratchFile.delete()) { scratchFile.deleteOnExit(); @@ -339,61 +358,56 @@ public void put(Bitstream bitstream, InputStream in) throws IOException { * (Does not use getContentMD5, as that is 128-bit MD5 digest calculated on caller's side) * * @param bitstream The asset to describe - * @param attrs A Map whose keys consist of desired metadata fields + * @param attrs A List of desired metadata fields * @return attrs * A Map with key/value pairs of desired metadata * If file not found, then return null * @throws java.io.IOException If a problem occurs while obtaining metadata */ @Override - public Map about(Bitstream bitstream, Map attrs) throws IOException { + public Map about(Bitstream bitstream, List attrs) throws IOException { + String key = getFullKey(bitstream.getInternalId()); // If this is a registered bitstream, strip the -R prefix before retrieving if (isRegisteredBitstream(key)) { key = key.substring(REGISTERED_FLAG.length()); } + + Map metadata = new HashMap<>(); + try { + ObjectMetadata objectMetadata = s3Service.getObjectMetadata(bucketName, key); if (objectMetadata != null) { - return this.about(objectMetadata, attrs); + putValueIfExistsKey(attrs, metadata, "size_bytes", objectMetadata.getContentLength()); + putValueIfExistsKey(attrs, metadata, "modified", valueOf(objectMetadata.getLastModified().getTime())); + } + + putValueIfExistsKey(attrs, metadata, "checksum_algorithm", CSA); + + if (attrs.contains("checksum")) { + try (InputStream in = get(bitstream); + DigestInputStream dis = new DigestInputStream(in, MessageDigest.getInstance(CSA)) + ) { + Utils.copy(dis, NullOutputStream.NULL_OUTPUT_STREAM); + byte[] md5Digest = dis.getMessageDigest().digest(); + metadata.put("checksum", Utils.toHex(md5Digest)); + } catch (NoSuchAlgorithmException nsae) { + // Should never happen + log.warn("Caught NoSuchAlgorithmException", nsae); + } } + + return metadata; } catch (AmazonS3Exception e) { if (e.getStatusCode() == HttpStatus.SC_NOT_FOUND) { - return null; + return metadata; } } catch (AmazonClientException e) { log.error("about(" + key + ", attrs)", e); throw new IOException(e); } - return null; - } - - /** - * Populates map values by checking key existence - *
- * Adds technical metadata about an asset in the asset store, like: - *

- * - * @param objectMetadata containing technical data - * @param attrs map with keys populated - * @return Map of enriched attrs with values - */ - public Map about(ObjectMetadata objectMetadata, Map attrs) { - if (objectMetadata != null) { - this.putValueIfExistsKey(attrs, SIZE_BYTES, objectMetadata.getContentLength()); - - // put CHECKSUM_ALGORITHM if exists CHECKSUM - this.putValueIfExistsKey(attrs, CHECKSUM, objectMetadata.getETag()); - this.putEntryIfExistsKey(attrs, CHECKSUM, Map.entry(CHECKSUM_ALGORITHM, CSA)); - - this.putValueIfExistsKey(attrs, MODIFIED, String.valueOf(objectMetadata.getLastModified().getTime())); - } - return attrs; + return metadata; } /** @@ -478,22 +492,6 @@ public void setAwsAccessKey(String awsAccessKey) { this.awsAccessKey = awsAccessKey; } - @Autowired(required = true) - public void setPathStyleAccessEnabled(boolean pathStyleAccessEnabled) { - this.pathStyleAccessEnabled = pathStyleAccessEnabled; - } - - public boolean getPathStyleAccessEnabled() { - return this.pathStyleAccessEnabled; - } - public void setEndpoint(String endpoint) { - this.endpoint = endpoint; - } - - public String getEndpoint() { - return this.endpoint; - } - public String getAwsSecretKey() { return awsSecretKey; } @@ -536,6 +534,22 @@ public void setUseRelativePath(boolean useRelativePath) { this.useRelativePath = useRelativePath; } + public String getEndpoint() { + return endpoint; + } + + public void setEndpoint(String endpoint) { + this.endpoint = endpoint; + } + + public boolean getPathStyleAccessEnabled() { + return pathStyleAccessEnabled; + } + + public void setPathStyleAccessEnabled(boolean pathStyleAccessEnabled) { + this.pathStyleAccessEnabled = pathStyleAccessEnabled; + } + /** * Contains a command-line testing tool. Expects arguments: * -a accessKey -s secretKey -f assetFileName @@ -573,13 +587,14 @@ public static void main(String[] args) throws Exception { String accessKey = command.getOptionValue("a"); String secretKey = command.getOptionValue("s"); - String assetFile = command.getOptionValue("f"); S3BitStoreService store = new S3BitStoreService(); AWSCredentials awsCredentials = new BasicAWSCredentials(accessKey, secretKey); - store.s3Service = new AmazonS3Client(awsCredentials); + store.s3Service = AmazonS3ClientBuilder.standard() + .withCredentials(new AWSStaticCredentialsProvider(awsCredentials)) + .build(); //Todo configurable region Region usEast1 = Region.getRegion(Regions.US_EAST_1); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java index e48487955209..d2266f02d75c 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java @@ -10,13 +10,12 @@ import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.UUID; import javax.annotation.Nullable; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections4.MapUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -41,7 +40,7 @@ public class SyncBitstreamStorageServiceImpl extends BitstreamStorageServiceImpl private static final Logger log = LogManager.getLogger(); private boolean syncEnabled = false; - private static final int SYNCHRONIZED_STORES_NUMBER = 77; + public static final int SYNCHRONIZED_STORES_NUMBER = 77; @Autowired ConfigurationService configurationService; @@ -136,12 +135,9 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, } bitstreamService.update(context, bitstream); - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); + List wantedMetadata = List.of("size_bytes", "checksum", "checksum_algorithm"); + Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); - Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); if (MapUtils.isEmpty(receivedMetadata)) { String message = "Not able to register bitstream:" + bitstream.getID() + " at path: " + bitstreamPath; log.error(message); @@ -172,13 +168,20 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, @Override public Map computeChecksum(Context context, Bitstream bitstream) throws IOException { - Map wantedMetadata = new HashMap(); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); - int storeNumber = this.whichStoreNumber(bitstream); - Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); - return receivedMetadata; + return this.getStore(storeNumber).about(bitstream, List.of("checksum", "checksum_algorithm")); + } + + /** + * Compute the checksum of a bitstream in a specific store. + * @param context DSpace Context object + * @param bitstream Bitstream to compute checksum for + * @param storeNumber Store number to compute checksum for + * @return Map with checksum and checksum algorithm + * @throws IOException if IO error + */ + public Map computeChecksumSpecStore(Context context, Bitstream bitstream, int storeNumber) throws IOException { + return this.getStore(storeNumber).about(bitstream, List.of("checksum", "checksum_algorithm")); } @Override @@ -191,27 +194,62 @@ public InputStream retrieve(Context context, Bitstream bitstream) @Override public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException { Context context = new Context(Context.Mode.BATCH_EDIT); - int commitCounter = 0; + + int offset = 0; + int limit = 100; + + int cleanedBitstreamCount = 0; + + int deletedBitstreamCount = bitstreamService.countDeletedBitstreams(context); + System.out.println("Found " + deletedBitstreamCount + " deleted bistream to cleanup"); try { context.turnOffAuthorisationSystem(); - List storage = bitstreamService.findDeletedBitstreams(context); - for (Bitstream bitstream : storage) { - UUID bid = bitstream.getID(); - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("modified", null); + while (cleanedBitstreamCount < deletedBitstreamCount) { + + List storage = bitstreamService.findDeletedBitstreams(context, limit, offset); - int storeNumber = this.whichStoreNumber(bitstream); - Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); + if (CollectionUtils.isEmpty(storage)) { + break; + } + + for (Bitstream bitstream : storage) { + UUID bid = bitstream.getID(); + List wantedMetadata = List.of("size_bytes", "modified"); + int storeNumber = this.whichStoreNumber(bitstream); + Map receivedMetadata = this.getStore(storeNumber) + .about(bitstream, wantedMetadata); + + + // Make sure entries which do not exist are removed + if (MapUtils.isEmpty(receivedMetadata)) { + log.debug("bitstore.about is empty, so file is not present"); + if (deleteDbRecords) { + log.debug("deleting record"); + if (verbose) { + System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + } + checksumHistoryService.deleteByBitstream(context, bitstream); + if (verbose) { + System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + } + bitstreamService.expunge(context, bitstream); + } + context.uncacheEntity(bitstream); + continue; + } + // This is a small chance that this is a file which is + // being stored -- get it next time. + if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { + log.debug("file is recent"); + context.uncacheEntity(bitstream); + continue; + } - // Make sure entries which do not exist are removed - if (MapUtils.isEmpty(receivedMetadata)) { - log.debug("bitstore.about is empty, so file is not present"); if (deleteDbRecords) { - log.debug("deleting record"); + log.debug("deleting db record"); if (verbose) { System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); } @@ -221,64 +259,42 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio } bitstreamService.expunge(context, bitstream); } - context.uncacheEntity(bitstream); - continue; - } - - // This is a small chance that this is a file which is - // being stored -- get it next time. - if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { - log.debug("file is recent"); - context.uncacheEntity(bitstream); - continue; - } - if (deleteDbRecords) { - log.debug("deleting db record"); - if (verbose) { - System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + if (isRegisteredBitstream(bitstream.getInternalId())) { + context.uncacheEntity(bitstream); + continue; // do not delete registered bitstreams } - checksumHistoryService.deleteByBitstream(context, bitstream); - if (verbose) { - System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + + + // Since versioning allows for multiple bitstreams, check if the internal + // identifier isn't used on + // another place + if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { + this.getStore(storeNumber).remove(bitstream); + + String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); + if (log.isDebugEnabled()) { + log.debug(message); + } + if (verbose) { + System.out.println(message); + } } - bitstreamService.expunge(context, bitstream); - } - if (isRegisteredBitstream(bitstream.getInternalId())) { context.uncacheEntity(bitstream); - continue; // do not delete registered bitstreams } + // Commit actual changes to DB after dispatch events + System.out.print("Performing incremental commit to the database..."); + context.commit(); + System.out.println(" Incremental commit done!"); - // Since versioning allows for multiple bitstreams, check if the internal identifier isn't used on - // another place - if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { - this.getStore(storeNumber).remove(bitstream); - - String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); - if (log.isDebugEnabled()) { - log.debug(message); - } - if (verbose) { - System.out.println(message); - } - } + cleanedBitstreamCount = cleanedBitstreamCount + storage.size(); - // Make sure to commit our outstanding work every 100 - // iterations. Otherwise you risk losing the entire transaction - // if we hit an exception, which isn't useful at all for large - // amounts of bitstreams. - commitCounter++; - if (commitCounter % 100 == 0) { - context.dispatchEvents(); - // Commit actual changes to DB after dispatch events - System.out.print("Performing incremental commit to the database..."); - context.commit(); - System.out.println(" Incremental commit done!"); + if (!deleteDbRecords) { + offset = offset + limit; } - context.uncacheEntity(bitstream); } System.out.print("Committing changes to the database..."); @@ -301,14 +317,12 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio @Nullable @Override public Long getLastModified(Bitstream bitstream) throws IOException { - Map attrs = new HashMap(); - attrs.put("modified", null); int storeNumber = this.whichStoreNumber(bitstream); - attrs = this.getStore(storeNumber).about(bitstream, attrs); - if (attrs == null || !attrs.containsKey("modified")) { + Map metadata = this.getStore(storeNumber).about(bitstream, List.of("modified")); + if (metadata == null || !metadata.containsKey("modified")) { return null; } - return Long.valueOf(attrs.get("modified").toString()); + return Long.valueOf(metadata.get("modified").toString()); } /** @@ -320,11 +334,44 @@ public Long getLastModified(Bitstream bitstream) throws IOException { * @return store number */ public int whichStoreNumber(Bitstream bitstream) { - if (Objects.equals(bitstream.getStoreNumber(), SYNCHRONIZED_STORES_NUMBER)) { + if (isBitstreamStoreSynchronized(bitstream)) { return getIncoming(); } else { return bitstream.getStoreNumber(); } } + /** + * Check if the bitstream is synchronized (stored in more stores) + * The bitstream is synchronized if it has the static store number. + * + * @param bitstream to check if it is synchronized + * @return true if the bitstream is synchronized + */ + public boolean isBitstreamStoreSynchronized(Bitstream bitstream) { + return bitstream.getStoreNumber() == SYNCHRONIZED_STORES_NUMBER; + } + + + /** + * Get the store number where the bitstream is synchronized. It is not active (incoming) store. + * + * @param bitstream to get the synchronized store number + * @return store number + */ + public int getSynchronizedStoreNumber(Bitstream bitstream) { + int storeNumber = -1; + if (!isBitstreamStoreSynchronized(bitstream)) { + storeNumber = bitstream.getStoreNumber(); + } + + for (Map.Entry storeEntry : getStores().entrySet()) { + if (storeEntry.getKey() == SYNCHRONIZED_STORES_NUMBER || storeEntry.getKey() == getIncoming()) { + continue; + } + storeNumber = storeEntry.getKey(); + } + return storeNumber; + } + } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactory.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactory.java index be954557fc44..e0ce37802e70 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactory.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactory.java @@ -8,6 +8,7 @@ package org.dspace.storage.bitstore.factory; import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; import org.dspace.storage.bitstore.service.BitstreamStorageService; /** @@ -20,6 +21,8 @@ public abstract class StorageServiceFactory { public abstract BitstreamStorageService getBitstreamStorageService(); + public abstract SyncBitstreamStorageServiceImpl getSyncBitstreamStorageService(); + public static StorageServiceFactory getInstance() { return DSpaceServicesFactory.getInstance().getServiceManager() .getServiceByName("storageServiceFactory", StorageServiceFactory.class); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactoryImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactoryImpl.java index 0dc67223d830..a44a4fbae73e 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactoryImpl.java @@ -7,6 +7,7 @@ */ package org.dspace.storage.bitstore.factory; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; import org.dspace.storage.bitstore.service.BitstreamStorageService; import org.springframework.beans.factory.annotation.Autowired; @@ -20,9 +21,17 @@ public class StorageServiceFactoryImpl extends StorageServiceFactory { @Autowired(required = true) private BitstreamStorageService bitstreamStorageService; + @Autowired(required = true) + private SyncBitstreamStorageServiceImpl syncBitstreamStorageService; + @Override public BitstreamStorageService getBitstreamStorageService() { return bitstreamStorageService; } + + @Override + public SyncBitstreamStorageServiceImpl getSyncBitstreamStorageService() { + return syncBitstreamStorageService; + } } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java index 209ef5d16be6..7f5ed8f9129f 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java @@ -102,7 +102,7 @@ public interface BitstreamStorageService { public UUID register(Context context, Bitstream bitstream, int assetstore, String bitstreamPath) throws SQLException, IOException, AuthorizeException; - public Map computeChecksum(Context context, Bitstream bitstream) throws IOException; + public Map computeChecksum(Context context, Bitstream bitstream) throws IOException; /** * Does the internal_id column in the bitstream row indicate the bitstream diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.2_2022.07.28__Upgrade_to_Lindat_Clarin_schema.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.2_2022.07.28__Upgrade_to_Lindat_Clarin_schema.sql index b336010262f0..86e90bad2ad9 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.2_2022.07.28__Upgrade_to_Lindat_Clarin_schema.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.2_2022.07.28__Upgrade_to_Lindat_Clarin_schema.sql @@ -487,4 +487,11 @@ ALTER COLUMN element TYPE character varying(128); ALTER TABLE eperson ADD welcome_info varchar(30); -ALTER TABLE eperson ADD can_edit_submission_metadata BOOL; \ No newline at end of file +ALTER TABLE eperson ADD can_edit_submission_metadata BOOL; + +insert into checksum_results +values +( + 'CHECKSUM_SYNC_NO_MATCH', + 'The checksum value from S3 is not matching the checksum value from the local file system' +); diff --git a/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceIT.java b/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceIT.java new file mode 100644 index 000000000000..7aae1cf2719c --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceIT.java @@ -0,0 +1,434 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.storage.bitstore; + +import static com.amazonaws.regions.Regions.DEFAULT_REGION; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.dspace.storage.bitstore.S3BitStoreService.CSA; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasEntry; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.startsWith; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.Bucket; +import com.amazonaws.services.s3.model.ObjectMetadata; +import io.findify.s3mock.S3Mock; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.dspace.AbstractIntegrationTestWithDatabase; +import org.dspace.app.matcher.LambdaMatcher; +import org.dspace.authorize.AuthorizeException; +import org.dspace.builder.BitstreamBuilder; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.ItemBuilder; +import org.dspace.content.Bitstream; +import org.dspace.content.Collection; +import org.dspace.content.Item; +import org.dspace.core.Utils; +import org.hamcrest.Matcher; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + + +/** + * @author Luca Giamminonni (luca.giamminonni at 4science.com) + */ +public class S3BitStoreServiceIT extends AbstractIntegrationTestWithDatabase { + + private static final String DEFAULT_BUCKET_NAME = "dspace-asset-localhost"; + + private S3BitStoreService s3BitStoreService; + + private AmazonS3 amazonS3Client; + + private S3Mock s3Mock; + + private Collection collection; + + private File s3Directory; + + @Before + public void setup() throws Exception { + + s3Directory = new File(System.getProperty("java.io.tmpdir"), "s3"); + + s3Mock = S3Mock.create(8001, s3Directory.getAbsolutePath()); + s3Mock.start(); + + amazonS3Client = createAmazonS3Client(); + + s3BitStoreService = new S3BitStoreService(amazonS3Client); + + context.turnOffAuthorisationSystem(); + + parentCommunity = CommunityBuilder.createCommunity(context) + .build(); + + collection = CollectionBuilder.createCollection(context, parentCommunity) + .build(); + + context.restoreAuthSystemState(); + } + + @After + public void cleanUp() throws IOException { + FileUtils.deleteDirectory(s3Directory); + s3Mock.shutdown(); + } + + @Test + public void testBitstreamPutAndGetWithAlreadyPresentBucket() throws IOException { + + String bucketName = "testbucket"; + + amazonS3Client.createBucket(bucketName); + + s3BitStoreService.setBucketName(bucketName); + s3BitStoreService.init(); + + assertThat(amazonS3Client.listBuckets(), contains(bucketNamed(bucketName))); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + String expectedChecksum = Utils.toHex(generateChecksum(content)); + + assertThat(bitstream.getSizeBytes(), is((long) content.length())); + assertThat(bitstream.getChecksum(), is(expectedChecksum)); + assertThat(bitstream.getChecksumAlgorithm(), is(CSA)); + + InputStream inputStream = s3BitStoreService.get(bitstream); + assertThat(IOUtils.toString(inputStream, UTF_8), is(content)); + + String key = s3BitStoreService.getFullKey(bitstream.getInternalId()); + ObjectMetadata objectMetadata = amazonS3Client.getObjectMetadata(bucketName, key); + assertThat(objectMetadata.getContentMD5(), is(expectedChecksum)); + + } + + @Test + public void testBitstreamPutAndGetWithoutSpecifingBucket() throws IOException { + + s3BitStoreService.init(); + + assertThat(s3BitStoreService.getBucketName(), is(DEFAULT_BUCKET_NAME)); + + assertThat(amazonS3Client.listBuckets(), contains(bucketNamed(DEFAULT_BUCKET_NAME))); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + String expectedChecksum = Utils.toHex(generateChecksum(content)); + + assertThat(bitstream.getSizeBytes(), is((long) content.length())); + assertThat(bitstream.getChecksum(), is(expectedChecksum)); + assertThat(bitstream.getChecksumAlgorithm(), is(CSA)); + + InputStream inputStream = s3BitStoreService.get(bitstream); + assertThat(IOUtils.toString(inputStream, UTF_8), is(content)); + + String key = s3BitStoreService.getFullKey(bitstream.getInternalId()); + ObjectMetadata objectMetadata = amazonS3Client.getObjectMetadata(DEFAULT_BUCKET_NAME, key); + assertThat(objectMetadata.getContentMD5(), is(expectedChecksum)); + + } + + @Test + public void testBitstreamPutAndGetWithSubFolder() throws IOException { + + s3BitStoreService.setSubfolder("test/DSpace7/"); + s3BitStoreService.init(); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + InputStream inputStream = s3BitStoreService.get(bitstream); + assertThat(IOUtils.toString(inputStream, UTF_8), is(content)); + + String key = s3BitStoreService.getFullKey(bitstream.getInternalId()); + assertThat(key, startsWith("test/DSpace7/")); + + ObjectMetadata objectMetadata = amazonS3Client.getObjectMetadata(DEFAULT_BUCKET_NAME, key); + assertThat(objectMetadata, notNullValue()); + + } + + @Test + public void testBitstreamDeletion() throws IOException { + + s3BitStoreService.init(); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + assertThat(s3BitStoreService.get(bitstream), notNullValue()); + + s3BitStoreService.remove(bitstream); + + IOException exception = assertThrows(IOException.class, () -> s3BitStoreService.get(bitstream)); + assertThat(exception.getCause(), instanceOf(AmazonS3Exception.class)); + assertThat(((AmazonS3Exception) exception.getCause()).getStatusCode(), is(404)); + + } + + @Test + public void testAbout() throws IOException { + + s3BitStoreService.init(); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + Map about = s3BitStoreService.about(bitstream, List.of()); + assertThat(about.size(), is(0)); + + about = s3BitStoreService.about(bitstream, List.of("size_bytes")); + assertThat(about, hasEntry("size_bytes", 22L)); + assertThat(about.size(), is(1)); + + about = s3BitStoreService.about(bitstream, List.of("size_bytes", "modified")); + assertThat(about, hasEntry("size_bytes", 22L)); + assertThat(about, hasEntry(is("modified"), notNullValue())); + assertThat(about.size(), is(2)); + + String expectedChecksum = Utils.toHex(generateChecksum(content)); + + about = s3BitStoreService.about(bitstream, List.of("size_bytes", "modified", "checksum")); + assertThat(about, hasEntry("size_bytes", 22L)); + assertThat(about, hasEntry(is("modified"), notNullValue())); + assertThat(about, hasEntry("checksum", expectedChecksum)); + assertThat(about.size(), is(3)); + + about = s3BitStoreService.about(bitstream, List.of("size_bytes", "modified", "checksum", "checksum_algorithm")); + assertThat(about, hasEntry("size_bytes", 22L)); + assertThat(about, hasEntry(is("modified"), notNullValue())); + assertThat(about, hasEntry("checksum", expectedChecksum)); + assertThat(about, hasEntry("checksum_algorithm", CSA)); + assertThat(about.size(), is(4)); + + } + + @Test + public void handleRegisteredIdentifierPrefixInS3() { + String trueBitStreamId = "012345"; + String registeredBitstreamId = s3BitStoreService.REGISTERED_FLAG + trueBitStreamId; + // Should be detected as registered bitstream + assertTrue(this.s3BitStoreService.isRegisteredBitstream(registeredBitstreamId)); + } + + @Test + public void stripRegisteredBitstreamPrefixWhenCalculatingPath() { + // Set paths and IDs + String s3Path = "UNIQUE_S3_PATH/test/bitstream.pdf"; + String registeredBitstreamId = s3BitStoreService.REGISTERED_FLAG + s3Path; + // Paths should be equal, since the getRelativePath method should strip the registered -R prefix + String relativeRegisteredPath = this.s3BitStoreService.getRelativePath(registeredBitstreamId); + assertEquals(s3Path, relativeRegisteredPath); + } + + @Test + public void givenBitStreamIdentifierLongerThanPossibleWhenIntermediatePathIsComputedThenIsSplittedAndTruncated() { + String path = "01234567890123456789"; + String computedPath = this.s3BitStoreService.getIntermediatePath(path); + String expectedPath = "01" + File.separator + "23" + File.separator + "45" + File.separator; + assertThat(computedPath, equalTo(expectedPath)); + } + + @Test + public void givenBitStreamIdentifierShorterThanAFolderLengthWhenIntermediatePathIsComputedThenIsSingleFolder() { + String path = "0"; + String computedPath = this.s3BitStoreService.getIntermediatePath(path); + String expectedPath = "0" + File.separator; + assertThat(computedPath, equalTo(expectedPath)); + } + + @Test + public void givenPartialBitStreamIdentifierWhenIntermediatePathIsComputedThenIsCompletlySplitted() { + String path = "01234"; + String computedPath = this.s3BitStoreService.getIntermediatePath(path); + String expectedPath = "01" + File.separator + "23" + File.separator + "4" + File.separator; + assertThat(computedPath, equalTo(expectedPath)); + } + + @Test + public void givenMaxLengthBitStreamIdentifierWhenIntermediatePathIsComputedThenIsSplittedAllAsSubfolder() { + String path = "012345"; + String computedPath = this.s3BitStoreService.getIntermediatePath(path); + String expectedPath = "01" + File.separator + "23" + File.separator + "45" + File.separator; + assertThat(computedPath, equalTo(expectedPath)); + } + + @Test + public void givenBitStreamIdentifierWhenIntermediatePathIsComputedThenNotEndingDoubleSlash() throws IOException { + StringBuilder path = new StringBuilder("01"); + String computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + int slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("2"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); + + path.append("3"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); + + path.append("4"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); + + path.append("56789"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); + } + + @Test + public void givenBitStreamIdentidierWhenIntermediatePathIsComputedThenMustBeSplitted() throws IOException { + StringBuilder path = new StringBuilder("01"); + String computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + int slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("2"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("3"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("4"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("56789"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + } + + @Test + public void givenBitStreamIdentifierWithSlashesWhenSanitizedThenSlashesMustBeRemoved() { + String sInternalId = new StringBuilder("01") + .append(File.separator) + .append("22") + .append(File.separator) + .append("33") + .append(File.separator) + .append("4455") + .toString(); + String computedPath = this.s3BitStoreService.sanitizeIdentifier(sInternalId); + assertThat(computedPath, Matchers.not(Matchers.startsWith(File.separator))); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator))); + assertThat(computedPath, Matchers.not(Matchers.containsString(File.separator))); + } + + private byte[] generateChecksum(String content) { + try { + MessageDigest m = MessageDigest.getInstance("MD5"); + m.update(content.getBytes()); + return m.digest(); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } + + private AmazonS3 createAmazonS3Client() { + return AmazonS3ClientBuilder.standard() + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .withEndpointConfiguration(new EndpointConfiguration("http://127.0.0.1:8001", DEFAULT_REGION.getName())) + .build(); + } + + private Item createItem() { + return ItemBuilder.createItem(context, collection) + .withTitle("Test item") + .build(); + } + + private Bitstream createBitstream(String content) { + try { + return BitstreamBuilder + .createBitstream(context, createItem(), toInputStream(content)) + .build(); + } catch (SQLException | AuthorizeException | IOException e) { + throw new RuntimeException(e); + } + } + + private Matcher bucketNamed(String name) { + return LambdaMatcher.matches(bucket -> bucket.getName().equals(name)); + } + + private InputStream toInputStream(String content) { + return IOUtils.toInputStream(content, UTF_8); + } + + private int computeSlashes(String internalId) { + int minimum = internalId.length(); + int slashesPerLevel = minimum / S3BitStoreService.digitsPerLevel; + int odd = Math.min(1, minimum % S3BitStoreService.digitsPerLevel); + int slashes = slashesPerLevel + odd; + return Math.min(slashes, S3BitStoreService.directoryLevels); + } + +} diff --git a/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceTest.java b/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceTest.java deleted file mode 100644 index 3a5141f2272c..000000000000 --- a/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceTest.java +++ /dev/null @@ -1,481 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.storage.bitstore; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.isEmptyOrNullString; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThrows; -import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.startsWith; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.function.Supplier; -import java.util.regex.Pattern; - -import com.amazonaws.regions.Regions; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; -import com.amazonaws.services.s3.transfer.TransferManager; -import com.amazonaws.services.s3.transfer.Upload; -import com.amazonaws.services.s3.transfer.model.UploadResult; -import org.apache.commons.io.FileUtils; -import org.dspace.AbstractUnitTest; -import org.dspace.content.Bitstream; -import org.dspace.curate.Utils; -import org.hamcrest.Matchers; -import org.junit.Before; -import org.junit.Test; -import org.mockito.ArgumentMatchers; -import org.mockito.Mock; -import org.mockito.MockedStatic; -import org.mockito.Mockito; - - - - -/** - * @author Vincenzo Mecca (vins01-4science - vincenzo.mecca at 4science.com) - * - */ -public class S3BitStoreServiceTest extends AbstractUnitTest { - - private S3BitStoreService s3BitStoreService; - - @Mock - private AmazonS3Client s3Service; - - @Mock - private TransferManager tm; - - @Mock - private Bitstream bitstream; - - @Mock - private Bitstream externalBitstream; - - @Before - public void setUp() throws Exception { - this.s3BitStoreService = new S3BitStoreService(s3Service, tm); - } - - private Supplier mockedServiceSupplier() { - return () -> this.s3Service; - } - - @Test - public void givenBucketWhenInitThenUsesSameBucket() throws IOException { - String bucketName = "Bucket0"; - s3BitStoreService.setBucketName(bucketName); - when(this.s3Service.doesBucketExistV2(bucketName)).thenReturn(false); - - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - - this.s3BitStoreService.init(); - - verify(this.s3Service).doesBucketExistV2(bucketName); - verify(this.s3Service, Mockito.times(1)).createBucket(bucketName); - assertThat(s3BitStoreService.getAwsAccessKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsSecretKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - } - - @Test - public void givenEmptyBucketWhenInitThenUsesDefaultBucket() throws IOException { - assertThat(s3BitStoreService.getBucketName(), isEmptyOrNullString()); - when(this.s3Service.doesBucketExistV2(startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX))).thenReturn(false); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - - this.s3BitStoreService.init(); - - verify(this.s3Service, Mockito.times(1)).createBucket(startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX)); - assertThat(s3BitStoreService.getBucketName(), Matchers.startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX)); - assertThat(s3BitStoreService.getAwsAccessKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsSecretKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - } - - @Test - public void givenAccessKeysWhenInitThenVerifiesCorrectBuilderCreation() throws IOException { - assertThat(s3BitStoreService.getAwsAccessKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsSecretKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getBucketName(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - when(this.s3Service.doesBucketExistV2(startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX))).thenReturn(false); - - final String awsAccessKey = "ACCESS_KEY"; - final String awsSecretKey = "SECRET_KEY"; - - this.s3BitStoreService.setAwsAccessKey(awsAccessKey); - this.s3BitStoreService.setAwsSecretKey(awsSecretKey); - - try (MockedStatic mockedS3BitStore = Mockito.mockStatic(S3BitStoreService.class)) { - mockedS3BitStore - .when(() -> - S3BitStoreService.amazonClientBuilderBy( - ArgumentMatchers.any(Regions.class), - ArgumentMatchers.argThat( - credentials -> - awsAccessKey.equals(credentials.getAWSAccessKeyId()) && - awsSecretKey.equals(credentials.getAWSSecretKey()) - ) - ) - ) - .thenReturn(this.mockedServiceSupplier()); - - this.s3BitStoreService.init(); - - mockedS3BitStore.verify( - () -> - S3BitStoreService.amazonClientBuilderBy( - ArgumentMatchers.any(Regions.class), - ArgumentMatchers.argThat( - credentials -> - awsAccessKey.equals(credentials.getAWSAccessKeyId()) && - awsSecretKey.equals(credentials.getAWSSecretKey()) - ) - ) - ); - } - - - verify(this.s3Service, Mockito.times(1)).createBucket(startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX)); - assertThat(s3BitStoreService.getBucketName(), Matchers.startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX)); - assertThat(s3BitStoreService.getAwsAccessKey(), Matchers.equalTo(awsAccessKey)); - assertThat(s3BitStoreService.getAwsSecretKey(), Matchers.equalTo(awsSecretKey)); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - } - - @Test - public void givenBucketBitStreamIdInputStreamWhenRetrievingFromS3ThenUsesBucketBitStreamId() throws IOException { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - S3Object object = Mockito.mock(S3Object.class); - S3ObjectInputStream inputStream = Mockito.mock(S3ObjectInputStream.class); - when(object.getObjectContent()).thenReturn(inputStream); - when(this.s3Service.getObject(ArgumentMatchers.any(GetObjectRequest.class))).thenReturn(object); - - this.s3BitStoreService.init(); - assertThat(this.s3BitStoreService.get(bitstream), Matchers.equalTo(inputStream)); - - verify(this.s3Service).getObject( - ArgumentMatchers.argThat( - request -> - bucketName.contentEquals(request.getBucketName()) && - bitStreamId.contentEquals(request.getKey()) - ) - ); - - } - - @Test - public void givenBucketBitStreamIdWhenNothingFoundOnS3ThenReturnsNull() throws IOException { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - when(this.s3Service.getObject(ArgumentMatchers.any(GetObjectRequest.class))).thenReturn(null); - - this.s3BitStoreService.init(); - assertThat(this.s3BitStoreService.get(bitstream), Matchers.nullValue()); - - verify(this.s3Service).getObject( - ArgumentMatchers.argThat( - request -> - bucketName.contentEquals(request.getBucketName()) && - bitStreamId.contentEquals(request.getKey()) - ) - ); - - } - - @Test - public void givenSubFolderWhenRequestsItemFromS3ThenTheIdentifierShouldHaveProperPath() throws IOException { - String bucketName = "BucketTest"; - String bitStreamId = "012345"; - String subfolder = "/test/DSpace7/"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - this.s3BitStoreService.setSubfolder(subfolder); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - S3Object object = Mockito.mock(S3Object.class); - S3ObjectInputStream inputStream = Mockito.mock(S3ObjectInputStream.class); - when(object.getObjectContent()).thenReturn(inputStream); - when(this.s3Service.getObject(ArgumentMatchers.any(GetObjectRequest.class))).thenReturn(object); - - this.s3BitStoreService.init(); - assertThat(this.s3BitStoreService.get(bitstream), Matchers.equalTo(inputStream)); - - verify(this.s3Service).getObject( - ArgumentMatchers.argThat( - request -> - bucketName.equals(request.getBucketName()) && - request.getKey().startsWith(subfolder) && - request.getKey().contains(bitStreamId) && - !request.getKey().contains(File.separator + File.separator) - ) - ); - - } - - @Test - public void handleRegisteredIdentifierPrefixInS3() { - String trueBitStreamId = "012345"; - String registeredBitstreamId = s3BitStoreService.REGISTERED_FLAG + trueBitStreamId; - // Should be detected as registered bitstream - assertTrue(this.s3BitStoreService.isRegisteredBitstream(registeredBitstreamId)); - } - - @Test - public void stripRegisteredBitstreamPrefixWhenCalculatingPath() { - // Set paths and IDs - String s3Path = "UNIQUE_S3_PATH/test/bitstream.pdf"; - String registeredBitstreamId = s3BitStoreService.REGISTERED_FLAG + s3Path; - // Paths should be equal, since the getRelativePath method should strip the registered -R prefix - String relativeRegisteredPath = this.s3BitStoreService.getRelativePath(registeredBitstreamId); - assertEquals(s3Path, relativeRegisteredPath); - } - - @Test - public void givenBitStreamIdentifierLongerThanPossibleWhenIntermediatePathIsComputedThenIsSplittedAndTruncated() { - String path = "01234567890123456789"; - String computedPath = this.s3BitStoreService.getIntermediatePath(path); - String expectedPath = "01" + File.separator + "23" + File.separator + "45" + File.separator; - assertThat(computedPath, equalTo(expectedPath)); - } - - @Test - public void givenBitStreamIdentifierShorterThanAFolderLengthWhenIntermediatePathIsComputedThenIsSingleFolder() { - String path = "0"; - String computedPath = this.s3BitStoreService.getIntermediatePath(path); - String expectedPath = "0" + File.separator; - assertThat(computedPath, equalTo(expectedPath)); - } - - @Test - public void givenPartialBitStreamIdentifierWhenIntermediatePathIsComputedThenIsCompletlySplitted() { - String path = "01234"; - String computedPath = this.s3BitStoreService.getIntermediatePath(path); - String expectedPath = "01" + File.separator + "23" + File.separator + "4" + File.separator; - assertThat(computedPath, equalTo(expectedPath)); - } - - @Test - public void givenMaxLengthBitStreamIdentifierWhenIntermediatePathIsComputedThenIsSplittedAllAsSubfolder() { - String path = "012345"; - String computedPath = this.s3BitStoreService.getIntermediatePath(path); - String expectedPath = "01" + File.separator + "23" + File.separator + "45" + File.separator; - assertThat(computedPath, equalTo(expectedPath)); - } - - @Test - public void givenBitStreamIdentifierWhenIntermediatePathIsComputedThenNotEndingDoubleSlash() throws IOException { - StringBuilder path = new StringBuilder("01"); - String computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - int slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); - assertThat(computedPath.split(Pattern.quote(File.separator)).length, Matchers.equalTo(slashes)); - - path.append("2"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); - - path.append("3"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); - - path.append("4"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); - - path.append("56789"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); - } - - @Test - public void givenBitStreamIdentidierWhenIntermediatePathIsComputedThenMustBeSplitted() throws IOException { - StringBuilder path = new StringBuilder("01"); - String computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - int slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - - path.append("2"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - - path.append("3"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - - path.append("4"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - - path.append("56789"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - } - - @Test - public void givenBitStreamIdentifierWithSlashesWhenSanitizedThenSlashesMustBeRemoved() { - String sInternalId = new StringBuilder("01") - .append(File.separator) - .append("22") - .append(File.separator) - .append("33") - .append(File.separator) - .append("4455") - .toString(); - String computedPath = this.s3BitStoreService.sanitizeIdentifier(sInternalId); - assertThat(computedPath, Matchers.not(Matchers.startsWith(File.separator))); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator))); - assertThat(computedPath, Matchers.not(Matchers.containsString(File.separator))); - } - - @Test - public void givenBitStreamWhenRemoveThenCallS3DeleteMethod() throws Exception { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - this.s3BitStoreService.init(); - this.s3BitStoreService.remove(bitstream); - - verify(this.s3Service, Mockito.times(1)).deleteObject(ArgumentMatchers.eq(bucketName), - ArgumentMatchers.eq(bitStreamId)); - - } - - @Test - public void givenBitStreamWhenPutThenCallS3PutMethodAndStoresInBitStream() throws Exception { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - File file = Mockito.mock(File.class); - InputStream in = Mockito.mock(InputStream.class); - PutObjectResult putObjectResult = Mockito.mock(PutObjectResult.class); - Upload upload = Mockito.mock(Upload.class); - UploadResult uploadResult = Mockito.mock(UploadResult.class); - when(upload.waitForUploadResult()).thenReturn(uploadResult); - String mockedTag = "1a7771d5fdd7bfdfc84033c70b1ba555"; - when(file.length()).thenReturn(8L); - try (MockedStatic fileMock = Mockito.mockStatic(File.class)) { - try (MockedStatic fileUtilsMock = Mockito.mockStatic(FileUtils.class)) { - try (MockedStatic curateUtils = Mockito.mockStatic(Utils.class)) { - curateUtils.when(() -> Utils.checksum((File) ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenReturn(mockedTag); - - fileMock - .when(() -> File.createTempFile(ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenReturn(file); - - when(this.tm.upload(ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenReturn(upload); - - this.s3BitStoreService.init(); - this.s3BitStoreService.put(bitstream, in); - } - } - - } - - verify(this.bitstream, Mockito.times(1)).setSizeBytes( - ArgumentMatchers.eq(8L) - ); - - verify(this.bitstream, Mockito.times(1)).setChecksum( - ArgumentMatchers.eq(mockedTag) - ); - - verify(this.tm, Mockito.times(1)).upload( - ArgumentMatchers.eq(bucketName), - ArgumentMatchers.eq(bitStreamId), - ArgumentMatchers.eq(file) - ); - - verify(file, Mockito.times(1)).delete(); - - } - - @Test - public void givenBitStreamWhenCallingPutFileCopyingThrowsIOExceptionPutThenFileIsRemovedAndStreamClosed() - throws Exception { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - File file = Mockito.mock(File.class); - InputStream in = Mockito.mock(InputStream.class); - try (MockedStatic fileMock = Mockito.mockStatic(File.class)) { - try (MockedStatic fileUtilsMock = Mockito.mockStatic(FileUtils.class)) { - fileUtilsMock - .when(() -> FileUtils.copyInputStreamToFile(ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenThrow(IOException.class); - fileMock - .when(() -> File.createTempFile(ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenReturn(file); - - this.s3BitStoreService.init(); - assertThrows(IOException.class, () -> this.s3BitStoreService.put(bitstream, in)); - } - - } - - verify(this.bitstream, Mockito.never()).setSizeBytes(ArgumentMatchers.any(Long.class)); - - verify(this.bitstream, Mockito.never()).setChecksum(ArgumentMatchers.any(String.class)); - - verify(this.s3Service, Mockito.never()).putObject(ArgumentMatchers.any(PutObjectRequest.class)); - - verify(file, Mockito.times(1)).delete(); - - } - - private int computeSlashes(String internalId) { - int minimum = internalId.length(); - int slashesPerLevel = minimum / S3BitStoreService.digitsPerLevel; - int odd = Math.min(1, minimum % S3BitStoreService.digitsPerLevel); - int slashes = slashesPerLevel + odd; - return Math.min(slashes, S3BitStoreService.directoryLevels); - } - -} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamChecksumConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamChecksumConverter.java new file mode 100644 index 000000000000..fbf24f4f8e74 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamChecksumConverter.java @@ -0,0 +1,35 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.converter; + +import org.dspace.app.rest.model.BitstreamChecksum; +import org.dspace.app.rest.model.BitstreamChecksumRest; +import org.dspace.app.rest.projection.Projection; +import org.springframework.stereotype.Component; + +/** + * Convert the BitstreamChecksum to appropriate REST data model + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +@Component +public class BitstreamChecksumConverter implements DSpaceConverter { + @Override + public BitstreamChecksumRest convert(BitstreamChecksum modelObject, Projection projection) { + BitstreamChecksumRest bitstreamChecksumRest = new BitstreamChecksumRest(); + bitstreamChecksumRest.setActiveStore(modelObject.getActiveStore()); + bitstreamChecksumRest.setDatabaseChecksum(modelObject.getDatabaseChecksum()); + bitstreamChecksumRest.setSynchronizedStore(modelObject.getSynchronizedStore()); + return bitstreamChecksumRest; + } + + @Override + public Class getModelClass() { + return BitstreamChecksum.class; + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksum.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksum.java new file mode 100644 index 000000000000..3c3c494df134 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksum.java @@ -0,0 +1,46 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model; + +/** + * This class handles the checksums of a bitstream (local, S3, database) + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +public class BitstreamChecksum { + CheckSumRest activeStore = null; + CheckSumRest synchronizedStore = null; + CheckSumRest databaseChecksum = null; + + public BitstreamChecksum() { + } + + public CheckSumRest getActiveStore() { + return activeStore; + } + + public void setActiveStore(CheckSumRest activeStore) { + this.activeStore = activeStore; + } + + public CheckSumRest getSynchronizedStore() { + return synchronizedStore; + } + + public void setSynchronizedStore(CheckSumRest synchronizedStore) { + this.synchronizedStore = synchronizedStore; + } + + public CheckSumRest getDatabaseChecksum() { + return databaseChecksum; + } + + public void setDatabaseChecksum(CheckSumRest databaseChecksum) { + this.databaseChecksum = databaseChecksum; + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksumRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksumRest.java new file mode 100644 index 000000000000..8195d90bbdaf --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksumRest.java @@ -0,0 +1,66 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model; + +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * The BitstreamChecksum REST Resource. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +public class BitstreamChecksumRest extends RestAddressableModel implements RestModel { + private static final long serialVersionUID = -3415049466402327251L; + public static final String NAME = "bitstreamchecksum"; + CheckSumRest activeStore = null; + CheckSumRest synchronizedStore = null; + CheckSumRest databaseChecksum = null; + + public BitstreamChecksumRest() { + } + + @Override + @JsonProperty(access = JsonProperty.Access.READ_ONLY) + public String getType() { + return NAME; + } + + public CheckSumRest getActiveStore() { + return activeStore; + } + + public void setActiveStore(CheckSumRest activeStore) { + this.activeStore = activeStore; + } + + public CheckSumRest getSynchronizedStore() { + return synchronizedStore; + } + + public void setSynchronizedStore(CheckSumRest synchronizedStore) { + this.synchronizedStore = synchronizedStore; + } + + public CheckSumRest getDatabaseChecksum() { + return databaseChecksum; + } + + public void setDatabaseChecksum(CheckSumRest databaseChecksum) { + this.databaseChecksum = databaseChecksum; + } + + @Override + public String getCategory() { + return null; + } + + @Override + public Class getController() { + return null; + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java index 232d96b044a0..a1c3156a01a9 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java @@ -27,6 +27,10 @@ @LinkRest( name = BitstreamRest.THUMBNAIL, method = "getThumbnail" + ), + @LinkRest( + name = BitstreamRest.CHECKSUM, + method = "getChecksum" ) }) public class BitstreamRest extends DSpaceObjectRest { @@ -37,6 +41,7 @@ public class BitstreamRest extends DSpaceObjectRest { public static final String BUNDLE = "bundle"; public static final String FORMAT = "format"; public static final String THUMBNAIL = "thumbnail"; + public static final String CHECKSUM = "checksum"; private String bundleName; diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/BitstreamChecksumResource.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/BitstreamChecksumResource.java new file mode 100644 index 000000000000..a83255d98491 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/BitstreamChecksumResource.java @@ -0,0 +1,26 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model.hateoas; + +import org.dspace.app.rest.model.BitstreamChecksumRest; +import org.dspace.app.rest.model.hateoas.annotations.RelNameDSpaceResource; +import org.dspace.app.rest.utils.Utils; + +/** + * Bitstream Checksum Rest HAL Resource. The HAL Resource wraps the REST Resource + * adding support for the links and embedded resources + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +@RelNameDSpaceResource(BitstreamChecksumRest.NAME) +public class BitstreamChecksumResource extends DSpaceResource { + + public BitstreamChecksumResource(BitstreamChecksumRest data, Utils utils) { + super(data, utils); + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/BitstreamCheckSumLinkRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/BitstreamCheckSumLinkRepository.java new file mode 100644 index 000000000000..77ad383624ab --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/BitstreamCheckSumLinkRepository.java @@ -0,0 +1,104 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.repository; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Map; +import java.util.Objects; +import java.util.UUID; +import javax.annotation.Nullable; +import javax.servlet.http.HttpServletRequest; + +import org.dspace.app.rest.model.BitstreamChecksum; +import org.dspace.app.rest.model.BitstreamChecksumRest; +import org.dspace.app.rest.model.BitstreamRest; +import org.dspace.app.rest.model.CheckSumRest; +import org.dspace.app.rest.projection.Projection; +import org.dspace.content.Bitstream; +import org.dspace.content.service.BitstreamService; +import org.dspace.core.Context; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.domain.Pageable; +import org.springframework.data.rest.webmvc.ResourceNotFoundException; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.stereotype.Component; + +/** + * Link repository for "checksum" subresource of an individual bitstream. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +@Component(BitstreamRest.CATEGORY + "." + BitstreamRest.NAME + "." + BitstreamRest.CHECKSUM) +public class BitstreamCheckSumLinkRepository extends AbstractDSpaceRestRepository implements LinkRestRepository { + + @Autowired + BitstreamService bitstreamService; + + @Autowired + SyncBitstreamStorageServiceImpl syncBitstreamStorageService; + + @PreAuthorize("hasPermission(#bitstreamId, 'BITSTREAM', 'READ')") + public BitstreamChecksumRest getChecksum(@Nullable HttpServletRequest request, + UUID bitstreamId, + @Nullable Pageable optionalPageable, + Projection projection) { + try { + Context context = obtainContext(); + Bitstream bitstream = bitstreamService.find(context, bitstreamId); + if (bitstream == null) { + throw new ResourceNotFoundException("No such bitstream: " + bitstreamId); + } + + CheckSumRest activeStoreChecksum = new CheckSumRest(); + CheckSumRest databaseChecksum = new CheckSumRest(); + CheckSumRest synchronizedStoreChecksum = new CheckSumRest(); + + // Get the checksum from the active store + composeChecksumRest(activeStoreChecksum, syncBitstreamStorageService.computeChecksum(context, bitstream)); + // Get the checksum from the database + databaseChecksum.setCheckSumAlgorithm(bitstream.getChecksumAlgorithm()); + databaseChecksum.setValue(bitstream.getChecksum()); + + if (syncBitstreamStorageService.isBitstreamStoreSynchronized(bitstream)) { + // Get the checksum from the synchronized store + composeChecksumRest(synchronizedStoreChecksum, + syncBitstreamStorageService.computeChecksumSpecStore(context, bitstream, + syncBitstreamStorageService.getSynchronizedStoreNumber(bitstream))); + } + + BitstreamChecksum bitstreamChecksum = new BitstreamChecksum(); + bitstreamChecksum.setActiveStore(activeStoreChecksum); + bitstreamChecksum.setDatabaseChecksum(databaseChecksum); + bitstreamChecksum.setSynchronizedStore(synchronizedStoreChecksum); + + return converter.toRest(bitstreamChecksum, projection); + } catch (SQLException e) { + throw new RuntimeException(e); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Compose the checksum rest object from the checksum map + */ + private void composeChecksumRest(CheckSumRest checksumRest, Map checksumMap) { + if (Objects.isNull(checksumMap)) { + return; + } + if (checksumMap.containsKey("checksum")) { + checksumRest.setValue(checksumMap.get("checksum").toString()); + } + + if (checksumMap.containsKey("checksum_algorithm")) { + checksumRest.setCheckSumAlgorithm(checksumMap.get("checksum_algorithm").toString()); + } + } +} diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java index 09dbdca505fe..b7ece99fed04 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java @@ -22,6 +22,7 @@ import static org.hamcrest.CoreMatchers.not; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -1178,4 +1179,50 @@ public void closeInputStreamsDownloadWithCoverPage() throws Exception { Mockito.verify(inputStreamSpy, times(1)).close(); } + @Test + public void testChecksumLinkRepository() throws Exception { + context.turnOffAuthorisationSystem(); + + //** GIVEN ** + //1. A community-collection structure with one parent community and one collections. + parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Parent Community") + .build(); + + Collection col1 = CollectionBuilder.createCollection(context, parentCommunity).withName("Collection 1").build(); + + //2. A public item with a bitstream + String bitstreamContent = "0123456789"; + + try (InputStream is = IOUtils.toInputStream(bitstreamContent, CharEncoding.UTF_8)) { + + Item publicItem1 = ItemBuilder.createItem(context, col1) + .withTitle("Public item 1") + .withIssueDate("2017-10-17") + .withAuthor("Smith, Donald").withAuthor("Doe, John") + .build(); + + bitstream = BitstreamBuilder + .createBitstream(context, publicItem1, is) + .withName("Test bitstream") + .withDescription("This is a bitstream to test range requests") + .withMimeType("text/plain") + .build(); + } + context.restoreAuthSystemState(); + + getClient() + .perform(get("/api/core/bitstreams/" + bitstream.getID() + "/checksum")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.activeStore.value", is(bitstream.getChecksum()))) + .andExpect(jsonPath("$.activeStore.checkSumAlgorithm", is(bitstream.getChecksumAlgorithm()))) + .andExpect(jsonPath("$.databaseChecksum.value", is(bitstream.getChecksum()))) + .andExpect(jsonPath("$.databaseChecksum.checkSumAlgorithm", + is(bitstream.getChecksumAlgorithm()))) + .andExpect(jsonPath("$.synchronizedStore.value", nullValue())) + .andExpect(jsonPath("$.synchronizedStore.checkSumAlgorithm", nullValue())); + + + } + } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/BitstreamMatcher.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/BitstreamMatcher.java index 36fc2f2aa131..9c9c0513c182 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/BitstreamMatcher.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/BitstreamMatcher.java @@ -102,7 +102,8 @@ public static Matcher matchFullEmbeds() { return matchEmbeds( "bundle", "format", - "thumbnail" + "thumbnail", + "checksum" ); } @@ -115,7 +116,8 @@ public static Matcher matchLinks(UUID uuid) { "content", "format", "self", - "thumbnail" + "thumbnail", + "checksum" ); }