From 906ddb4ca021320a0bc46e339d044e95f2072618 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 18 Dec 2023 16:19:21 +0100 Subject: [PATCH 01/12] The bitstream data are stored in to local store after uploading to S3 --- .../bitstore/ClarinS3BitStoreService.java | 74 +++++++++++++++++++ .../storage/bitstore/S3BitStoreService.java | 4 +- dspace/config/spring/api/bitstore.xml | 2 +- 3 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java new file mode 100644 index 000000000000..5ca5affe4cb3 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java @@ -0,0 +1,74 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.storage.bitstore; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; + +import com.amazonaws.AmazonClientException; +import com.amazonaws.services.s3.transfer.Upload; +import org.apache.commons.io.FileUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.content.Bitstream; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Override of the S3BitStoreService to store all the data also in the local assetstore. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +public class ClarinS3BitStoreService extends S3BitStoreService { + + /** + * log4j log + */ + private static final Logger log = LogManager.getLogger(ClarinS3BitStoreService.class); + + @Autowired(required = true) + DSBitStoreService dsBitStoreService; + + public ClarinS3BitStoreService() { + super(); + } + + @Override + public void put(Bitstream bitstream, InputStream in) throws IOException { + String key = getFullKey(bitstream.getInternalId()); + //Copy istream to temp file, and send the file, with some metadata + File scratchFile = File.createTempFile(bitstream.getInternalId(), "s3bs"); + try { + FileUtils.copyInputStreamToFile(in, scratchFile); + long contentLength = scratchFile.length(); + // The ETag may or may not be and MD5 digest of the object data. + // Therefore, we precalculate before uploading + String localChecksum = org.dspace.curate.Utils.checksum(scratchFile, CSA); + + Upload upload = tm.upload(getBucketName(), key, scratchFile); + + upload.waitForUploadResult(); + + bitstream.setSizeBytes(contentLength); + bitstream.setChecksum(localChecksum); + bitstream.setChecksumAlgorithm(CSA); + + // Upload file into local assetstore + File localFile = dsBitStoreService.getFile(bitstream); + FileUtils.copyFile(scratchFile, localFile); + + } catch (AmazonClientException | IOException | InterruptedException e) { + log.error("put(" + bitstream.getInternalId() + ", is)", e); + throw new IOException(e); + } finally { + if (!scratchFile.delete()) { + scratchFile.deleteOnExit(); + } + } + } +} diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index ad6c431aed9e..afae56e964cc 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -73,7 +73,7 @@ public class S3BitStoreService extends BaseBitStoreService { /** * Checksum algorithm */ - private static final String CSA = "MD5"; + protected static final String CSA = "MD5"; // These settings control the way an identifier is hashed into // directory and file names @@ -116,7 +116,7 @@ public class S3BitStoreService extends BaseBitStoreService { * S3 transfer manager * this is reused between put calls to use less resources for multiple uploads */ - private TransferManager tm = null; + protected TransferManager tm = null; private static final ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); diff --git a/dspace/config/spring/api/bitstore.xml b/dspace/config/spring/api/bitstore.xml index 1cf7d8f68a3c..255bfbf75da6 100644 --- a/dspace/config/spring/api/bitstore.xml +++ b/dspace/config/spring/api/bitstore.xml @@ -17,7 +17,7 @@ - + From 92757417f7ab1b599ef2b662d3b6580ed1a31f95 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 18 Dec 2023 16:33:28 +0100 Subject: [PATCH 02/12] The bitstream is removed from the S3 and local assetstore --- .../storage/bitstore/ClarinS3BitStoreService.java | 14 ++++++++++++++ .../dspace/storage/bitstore/S3BitStoreService.java | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java index 5ca5affe4cb3..8aeef77ae6fd 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java @@ -71,4 +71,18 @@ public void put(Bitstream bitstream, InputStream in) throws IOException { } } } + + @Override + public void remove(Bitstream bitstream) throws IOException { + String key = getFullKey(bitstream.getInternalId()); + try { + // Remove file from S3 + s3Service.deleteObject(getBucketName(), key); + // Remove file from local assetstore + dsBitStoreService.remove(bitstream); + } catch (AmazonClientException e) { + log.error("remove(" + key + ")", e); + throw new IOException(e); + } + } } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index afae56e964cc..1ad4c33f8213 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -110,7 +110,7 @@ public class S3BitStoreService extends BaseBitStoreService { /** * S3 service */ - private AmazonS3 s3Service = null; + protected AmazonS3 s3Service = null; /** * S3 transfer manager From 5828022e14c27ac0913569043ef6bc3995332f79 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 18 Dec 2023 16:50:00 +0100 Subject: [PATCH 03/12] Store number is specific if all storages are synchronized. --- .../bitstore/BitstreamStorageServiceImpl.java | 11 ++++++++ .../bitstore/ClarinS3BitStoreService.java | 25 ++++++++++++++----- dspace/config/clarin-dspace.cfg | 4 +++ 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index b8a1a2e96ad4..f86296383dba 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -28,6 +28,7 @@ import org.dspace.content.service.BitstreamService; import org.dspace.core.Context; import org.dspace.core.Utils; +import org.dspace.services.ConfigurationService; import org.dspace.storage.bitstore.service.BitstreamStorageService; import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; @@ -64,11 +65,14 @@ public class BitstreamStorageServiceImpl implements BitstreamStorageService, Ini * log4j log */ private static final Logger log = LogManager.getLogger(); + private static final int SYNCHRONIZED_STORES_NUMBER = 77; @Autowired(required = true) protected BitstreamService bitstreamService; @Autowired(required = true) protected ChecksumHistoryService checksumHistoryService; + @Autowired(required = true) + protected ConfigurationService configurationService; /** * asset stores @@ -107,10 +111,17 @@ public UUID store(Context context, Bitstream bitstream, InputStream is) throws S * other method of working out where to put a new bitstream, here's * where it should go */ + boolean isEnabled = configurationService.getBooleanProperty("sync.storage.service.enabled", false); + if (isEnabled) { + bitstream.setStoreNumber(SYNCHRONIZED_STORES_NUMBER); + } else { + bitstream.setStoreNumber(incoming); + } bitstream.setStoreNumber(incoming); bitstream.setDeleted(true); bitstream.setInternalId(id); + BitStoreService store = this.getStore(incoming); //For efficiencies sake, PUT is responsible for setting bitstream size_bytes, checksum, and checksum_algorithm store.put(bitstream, is); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java index 8aeef77ae6fd..85bbc72a1ea5 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java @@ -17,6 +17,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.dspace.content.Bitstream; +import org.dspace.services.ConfigurationService; import org.springframework.beans.factory.annotation.Autowired; /** @@ -30,14 +31,23 @@ public class ClarinS3BitStoreService extends S3BitStoreService { * log4j log */ private static final Logger log = LogManager.getLogger(ClarinS3BitStoreService.class); + private boolean syncEnabled = false; @Autowired(required = true) DSBitStoreService dsBitStoreService; + @Autowired(required = true) + ConfigurationService configurationService; + public ClarinS3BitStoreService() { super(); } + public void init() throws IOException { + super.init(); + syncEnabled = configurationService.getBooleanProperty("sync.storage.service.enabled", false); + } + @Override public void put(Bitstream bitstream, InputStream in) throws IOException { String key = getFullKey(bitstream.getInternalId()); @@ -58,10 +68,11 @@ public void put(Bitstream bitstream, InputStream in) throws IOException { bitstream.setChecksum(localChecksum); bitstream.setChecksumAlgorithm(CSA); - // Upload file into local assetstore - File localFile = dsBitStoreService.getFile(bitstream); - FileUtils.copyFile(scratchFile, localFile); - + if (syncEnabled) { + // Upload file into local assetstore + File localFile = dsBitStoreService.getFile(bitstream); + FileUtils.copyFile(scratchFile, localFile); + } } catch (AmazonClientException | IOException | InterruptedException e) { log.error("put(" + bitstream.getInternalId() + ", is)", e); throw new IOException(e); @@ -78,8 +89,10 @@ public void remove(Bitstream bitstream) throws IOException { try { // Remove file from S3 s3Service.deleteObject(getBucketName(), key); - // Remove file from local assetstore - dsBitStoreService.remove(bitstream); + if (syncEnabled) { + // Remove file from local assetstore + dsBitStoreService.remove(bitstream); + } } catch (AmazonClientException e) { log.error("remove(" + key + ")", e); throw new IOException(e); diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index 0064162af6ad..86cd4ea11448 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -238,3 +238,7 @@ shibboleth.name.conversion.outputEncoding = UTF-8 ### File preview ### # File preview is enabled by default file.preview.enabled = false + +### Storage service ### +# Synchronization is NOT enabled by default +sync.storage.service.enabled = true From df6f5b11770552e62788beacb242af2345d9b3df Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Tue, 19 Dec 2023 15:07:08 +0100 Subject: [PATCH 04/12] Return store number in the BitstreamRest object --- .../dspace/app/rest/converter/BitstreamConverter.java | 1 + .../java/org/dspace/app/rest/model/BitstreamRest.java | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamConverter.java index bb5544b3592c..e5d967afc8a4 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamConverter.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamConverter.java @@ -45,6 +45,7 @@ public BitstreamRest convert(org.dspace.content.Bitstream obj, Projection projec checksum.setValue(obj.getChecksum()); b.setCheckSum(checksum); b.setSizeBytes(obj.getSizeBytes()); + b.setStoreNumber(obj.getStoreNumber()); return b; } diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java index 8e9efc2680b7..232d96b044a0 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java @@ -46,6 +46,8 @@ public class BitstreamRest extends DSpaceObjectRest { @JsonProperty(access = Access.READ_ONLY) private Integer sequenceId; + private int storeNumber; + public String getBundleName() { return bundleName; } @@ -78,6 +80,14 @@ public void setSequenceId(Integer sequenceId) { this.sequenceId = sequenceId; } + public int getStoreNumber() { + return storeNumber; + } + + public void setStoreNumber(int storeNumber) { + this.storeNumber = storeNumber; + } + @Override public String getCategory() { return CATEGORY; From c1e11765febbb38b61d0da59d1bc0954dcc2d73e Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Tue, 19 Dec 2023 15:07:59 +0100 Subject: [PATCH 05/12] Find out which store is used - it could be synchronized stores number. --- .../bitstore/BitstreamStorageServiceImpl.java | 35 ++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index f86296383dba..0b5423dfe5fc 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -14,6 +14,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.UUID; import javax.annotation.Nullable; @@ -66,6 +67,7 @@ public class BitstreamStorageServiceImpl implements BitstreamStorageService, Ini */ private static final Logger log = LogManager.getLogger(); private static final int SYNCHRONIZED_STORES_NUMBER = 77; + private boolean syncEnabled = false; @Autowired(required = true) protected BitstreamService bitstreamService; @@ -100,6 +102,7 @@ public void afterPropertiesSet() throws Exception { storeEntry.getValue().init(); } } + this.syncEnabled = configurationService.getBooleanProperty("sync.storage.service.enabled", false); } @Override @@ -111,13 +114,11 @@ public UUID store(Context context, Bitstream bitstream, InputStream is) throws S * other method of working out where to put a new bitstream, here's * where it should go */ - boolean isEnabled = configurationService.getBooleanProperty("sync.storage.service.enabled", false); - if (isEnabled) { + if (syncEnabled) { bitstream.setStoreNumber(SYNCHRONIZED_STORES_NUMBER); } else { bitstream.setStoreNumber(incoming); } - bitstream.setStoreNumber(incoming); bitstream.setDeleted(true); bitstream.setInternalId(id); @@ -173,7 +174,11 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, // Create a deleted bitstream row, using a separate DB connection bitstream.setDeleted(true); bitstream.setInternalId(sInternalId); - bitstream.setStoreNumber(assetstore); + if (syncEnabled) { + bitstream.setStoreNumber(SYNCHRONIZED_STORES_NUMBER); + } else { + bitstream.setStoreNumber(assetstore); + } bitstreamService.update(context, bitstream); Map wantedMetadata = new HashMap(); @@ -216,7 +221,8 @@ public Map computeChecksum(Context context, Bitstream bitstream) throws IOExcept wantedMetadata.put("checksum", null); wantedMetadata.put("checksum_algorithm", null); - Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata); + int storeNumber = this.decideStoreNumber(bitstream); + Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); return receivedMetadata; } @@ -228,7 +234,7 @@ public boolean isRegisteredBitstream(String internalId) { @Override public InputStream retrieve(Context context, Bitstream bitstream) throws SQLException, IOException { - Integer storeNumber = bitstream.getStoreNumber(); + int storeNumber = this.decideStoreNumber(bitstream); return this.getStore(storeNumber).get(bitstream); } @@ -246,7 +252,9 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio Map wantedMetadata = new HashMap(); wantedMetadata.put("size_bytes", null); wantedMetadata.put("modified", null); - Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata); + + int storeNumber = this.decideStoreNumber(bitstream); + Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); // Make sure entries which do not exist are removed @@ -296,7 +304,7 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio // Since versioning allows for multiple bitstreams, check if the internal identifier isn't used on // another place if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { - this.getStore(bitstream.getStoreNumber()).remove(bitstream); + this.getStore(storeNumber).remove(bitstream); String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); if (log.isDebugEnabled()) { @@ -345,7 +353,8 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio public Long getLastModified(Bitstream bitstream) throws IOException { Map attrs = new HashMap(); attrs.put("modified", null); - attrs = this.getStore(bitstream.getStoreNumber()).about(bitstream, attrs); + int storeNumber = this.decideStoreNumber(bitstream); + attrs = this.getStore(storeNumber).about(bitstream, attrs); if (attrs == null || !attrs.containsKey("modified")) { return null; } @@ -502,4 +511,12 @@ protected BitStoreService getStore(int position) throws IOException { return bitStoreService; } + public int decideStoreNumber(Bitstream bitstream) { + if (Objects.equals(bitstream.getStoreNumber(), SYNCHRONIZED_STORES_NUMBER)) { + return incoming; + } else { + return bitstream.getStoreNumber(); + } + } + } From 230b40f754d33bdcbcc978463ba442077126ad5b Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Tue, 19 Dec 2023 16:03:18 +0100 Subject: [PATCH 06/12] Constant is moved to the Bitstream class --- dspace-api/src/main/java/org/dspace/content/Bitstream.java | 2 ++ .../dspace/storage/bitstore/BitstreamStorageServiceImpl.java | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/Bitstream.java b/dspace-api/src/main/java/org/dspace/content/Bitstream.java index f7aaf224dcde..2a94b68a0c5f 100644 --- a/dspace-api/src/main/java/org/dspace/content/Bitstream.java +++ b/dspace-api/src/main/java/org/dspace/content/Bitstream.java @@ -40,6 +40,8 @@ @Entity @Table(name = "bitstream") public class Bitstream extends DSpaceObject implements DSpaceObjectLegacySupport { + public static final int SYNCHRONIZED_STORES_NUMBER = 77; + @Column(name = "bitstream_id", insertable = false, updatable = false) private Integer legacyId; diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index 0b5423dfe5fc..3d9459ddfe10 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -7,6 +7,8 @@ */ package org.dspace.storage.bitstore; +import static org.dspace.content.Bitstream.SYNCHRONIZED_STORES_NUMBER; + import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -66,7 +68,6 @@ public class BitstreamStorageServiceImpl implements BitstreamStorageService, Ini * log4j log */ private static final Logger log = LogManager.getLogger(); - private static final int SYNCHRONIZED_STORES_NUMBER = 77; private boolean syncEnabled = false; @Autowired(required = true) From bef2c7d06ce8d09bb62e976f5230a08172281da2 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Tue, 19 Dec 2023 16:16:09 +0100 Subject: [PATCH 07/12] Synchronization of storages is not allowed by default - set up it in the test environment. --- dspace-api/src/test/data/dspaceFolder/config/local.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index de6caf8880f7..25de026c94ac 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -277,3 +277,6 @@ handle.canonical.prefix = ${dspace.ui.url}/handle/ ### File preview ### # File preview is enabled by default file.preview.enabled = true + +### Storage service ### +sync.storage.service.enabled = false \ No newline at end of file From 4797907c5f0a88d0c649a6f5013b4865037f9123 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Wed, 20 Dec 2023 07:44:56 +0100 Subject: [PATCH 08/12] Added docs --- .../storage/bitstore/BitstreamStorageServiceImpl.java | 8 ++++++++ dspace-api/src/test/data/dspaceFolder/config/local.cfg | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index 3d9459ddfe10..7b58115bcbe7 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -512,6 +512,14 @@ protected BitStoreService getStore(int position) throws IOException { return bitStoreService; } + /** + * Decide which store number should be used for the given bitstream. + * If the bitstream is synchronized (stored in to S3 and local), then the static store number is used. + * Otherwise, the bitstream's store number is used. + * + * @param bitstream bitstream + * @return store number + */ public int decideStoreNumber(Bitstream bitstream) { if (Objects.equals(bitstream.getStoreNumber(), SYNCHRONIZED_STORES_NUMBER)) { return incoming; diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index 25de026c94ac..ce3a6ccce07d 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -279,4 +279,4 @@ handle.canonical.prefix = ${dspace.ui.url}/handle/ file.preview.enabled = true ### Storage service ### -sync.storage.service.enabled = false \ No newline at end of file +sync.storage.service.enabled = false From 3d4cb07447ecf410292aedce416df8795343e8c7 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Wed, 20 Dec 2023 12:52:43 +0100 Subject: [PATCH 09/12] Removed constant from the Bitstream class - it wasn't consistent --- dspace-api/src/main/java/org/dspace/content/Bitstream.java | 1 - 1 file changed, 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/Bitstream.java b/dspace-api/src/main/java/org/dspace/content/Bitstream.java index 2a94b68a0c5f..4e71503b08f7 100644 --- a/dspace-api/src/main/java/org/dspace/content/Bitstream.java +++ b/dspace-api/src/main/java/org/dspace/content/Bitstream.java @@ -40,7 +40,6 @@ @Entity @Table(name = "bitstream") public class Bitstream extends DSpaceObject implements DSpaceObjectLegacySupport { - public static final int SYNCHRONIZED_STORES_NUMBER = 77; @Column(name = "bitstream_id", insertable = false, updatable = false) private Integer legacyId; From 95fe6fb47bc8b96e3b086c994f04089f8664edd2 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Wed, 20 Dec 2023 12:53:20 +0100 Subject: [PATCH 10/12] Overriden BitstreamStorageServiceImpl by custom SyncBitstreamStorageServiceImpl --- .../bitstore/BitstreamStorageServiceImpl.java | 51 +-- .../SyncBitstreamStorageServiceImpl.java | 317 ++++++++++++++++++ dspace/config/spring/api/bitstore.xml | 2 +- 3 files changed, 325 insertions(+), 45 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index 7b58115bcbe7..b8a1a2e96ad4 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -7,8 +7,6 @@ */ package org.dspace.storage.bitstore; -import static org.dspace.content.Bitstream.SYNCHRONIZED_STORES_NUMBER; - import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; @@ -16,7 +14,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.UUID; import javax.annotation.Nullable; @@ -31,7 +28,6 @@ import org.dspace.content.service.BitstreamService; import org.dspace.core.Context; import org.dspace.core.Utils; -import org.dspace.services.ConfigurationService; import org.dspace.storage.bitstore.service.BitstreamStorageService; import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; @@ -68,14 +64,11 @@ public class BitstreamStorageServiceImpl implements BitstreamStorageService, Ini * log4j log */ private static final Logger log = LogManager.getLogger(); - private boolean syncEnabled = false; @Autowired(required = true) protected BitstreamService bitstreamService; @Autowired(required = true) protected ChecksumHistoryService checksumHistoryService; - @Autowired(required = true) - protected ConfigurationService configurationService; /** * asset stores @@ -103,7 +96,6 @@ public void afterPropertiesSet() throws Exception { storeEntry.getValue().init(); } } - this.syncEnabled = configurationService.getBooleanProperty("sync.storage.service.enabled", false); } @Override @@ -115,15 +107,10 @@ public UUID store(Context context, Bitstream bitstream, InputStream is) throws S * other method of working out where to put a new bitstream, here's * where it should go */ - if (syncEnabled) { - bitstream.setStoreNumber(SYNCHRONIZED_STORES_NUMBER); - } else { - bitstream.setStoreNumber(incoming); - } + bitstream.setStoreNumber(incoming); bitstream.setDeleted(true); bitstream.setInternalId(id); - BitStoreService store = this.getStore(incoming); //For efficiencies sake, PUT is responsible for setting bitstream size_bytes, checksum, and checksum_algorithm store.put(bitstream, is); @@ -175,11 +162,7 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, // Create a deleted bitstream row, using a separate DB connection bitstream.setDeleted(true); bitstream.setInternalId(sInternalId); - if (syncEnabled) { - bitstream.setStoreNumber(SYNCHRONIZED_STORES_NUMBER); - } else { - bitstream.setStoreNumber(assetstore); - } + bitstream.setStoreNumber(assetstore); bitstreamService.update(context, bitstream); Map wantedMetadata = new HashMap(); @@ -222,8 +205,7 @@ public Map computeChecksum(Context context, Bitstream bitstream) throws IOExcept wantedMetadata.put("checksum", null); wantedMetadata.put("checksum_algorithm", null); - int storeNumber = this.decideStoreNumber(bitstream); - Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); + Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata); return receivedMetadata; } @@ -235,7 +217,7 @@ public boolean isRegisteredBitstream(String internalId) { @Override public InputStream retrieve(Context context, Bitstream bitstream) throws SQLException, IOException { - int storeNumber = this.decideStoreNumber(bitstream); + Integer storeNumber = bitstream.getStoreNumber(); return this.getStore(storeNumber).get(bitstream); } @@ -253,9 +235,7 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio Map wantedMetadata = new HashMap(); wantedMetadata.put("size_bytes", null); wantedMetadata.put("modified", null); - - int storeNumber = this.decideStoreNumber(bitstream); - Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); + Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata); // Make sure entries which do not exist are removed @@ -305,7 +285,7 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio // Since versioning allows for multiple bitstreams, check if the internal identifier isn't used on // another place if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { - this.getStore(storeNumber).remove(bitstream); + this.getStore(bitstream.getStoreNumber()).remove(bitstream); String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); if (log.isDebugEnabled()) { @@ -354,8 +334,7 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio public Long getLastModified(Bitstream bitstream) throws IOException { Map attrs = new HashMap(); attrs.put("modified", null); - int storeNumber = this.decideStoreNumber(bitstream); - attrs = this.getStore(storeNumber).about(bitstream, attrs); + attrs = this.getStore(bitstream.getStoreNumber()).about(bitstream, attrs); if (attrs == null || !attrs.containsKey("modified")) { return null; } @@ -512,20 +491,4 @@ protected BitStoreService getStore(int position) throws IOException { return bitStoreService; } - /** - * Decide which store number should be used for the given bitstream. - * If the bitstream is synchronized (stored in to S3 and local), then the static store number is used. - * Otherwise, the bitstream's store number is used. - * - * @param bitstream bitstream - * @return store number - */ - public int decideStoreNumber(Bitstream bitstream) { - if (Objects.equals(bitstream.getStoreNumber(), SYNCHRONIZED_STORES_NUMBER)) { - return incoming; - } else { - return bitstream.getStoreNumber(); - } - } - } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java new file mode 100644 index 000000000000..1120207aecb5 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java @@ -0,0 +1,317 @@ +package org.dspace.storage.bitstore; + +import java.io.IOException; +import java.io.InputStream; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.UUID; +import javax.annotation.Nullable; + +import org.apache.commons.collections4.MapUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.Bitstream; +import org.dspace.core.Context; +import org.dspace.core.Utils; +import org.dspace.services.ConfigurationService; +import org.springframework.beans.factory.annotation.Autowired; + +public class SyncBitstreamStorageServiceImpl extends BitstreamStorageServiceImpl { + + /** + * log4j log + */ + private static final Logger log = LogManager.getLogger(); + private boolean syncEnabled = false; + + private static final int SYNCHRONIZED_STORES_NUMBER = 77; + + @Autowired + ConfigurationService configurationService; + + public SyncBitstreamStorageServiceImpl() { + super(); + } + + @Override + public void afterPropertiesSet() throws Exception { + for (Map.Entry storeEntry : getStores().entrySet()) { + if (storeEntry.getValue().isEnabled() && !storeEntry.getValue().isInitialized()) { + storeEntry.getValue().init(); + } + } + this.syncEnabled = configurationService.getBooleanProperty("sync.storage.service.enabled", false); + } + + @Override + public UUID store(Context context, Bitstream bitstream, InputStream is) throws SQLException, IOException { + // Create internal ID + String id = Utils.generateKey(); + /* + * Set the store number of the new bitstream If you want to use some + * other method of working out where to put a new bitstream, here's + * where it should go + */ + if (syncEnabled) { + bitstream.setStoreNumber(SYNCHRONIZED_STORES_NUMBER); + } else { + bitstream.setStoreNumber(getIncoming()); + } + bitstream.setDeleted(true); + bitstream.setInternalId(id); + + + BitStoreService store = this.getStore(getIncoming()); + //For efficiencies sake, PUT is responsible for setting bitstream size_bytes, checksum, and checksum_algorithm + store.put(bitstream, is); + //bitstream.setSizeBytes(file.length()); + //bitstream.setChecksum(Utils.toHex(dis.getMessageDigest().digest())); + //bitstream.setChecksumAlgorithm("MD5"); + + bitstream.setDeleted(false); + try { + //Update our bitstream but turn off the authorization system since permissions haven't been set at this + // point in time. + context.turnOffAuthorisationSystem(); + bitstreamService.update(context, bitstream); + } catch (AuthorizeException e) { + log.error(e); + //Can never happen since we turn off authorization before we update + } finally { + context.restoreAuthSystemState(); + } + + UUID bitstreamId = bitstream.getID(); + + if (log.isDebugEnabled()) { + log.debug("Stored bitstreamID " + bitstreamId); + } + + return bitstreamId; + } + + /** + * Register a bitstream already in storage. + * + * @param context The current context + * @param assetstore The assetstore number for the bitstream to be + * registered + * @param bitstreamPath The relative path of the bitstream to be registered. + * The path is relative to the path of ths assetstore. + * @return The ID of the registered bitstream + * @throws SQLException If a problem occurs accessing the RDBMS + * @throws IOException if IO error + */ + @Override + public UUID register(Context context, Bitstream bitstream, int assetstore, + String bitstreamPath) throws SQLException, IOException, AuthorizeException { + + // mark this bitstream as a registered bitstream + String sInternalId = REGISTERED_FLAG + bitstreamPath; + + // Create a deleted bitstream row, using a separate DB connection + bitstream.setDeleted(true); + bitstream.setInternalId(sInternalId); + if (syncEnabled) { + bitstream.setStoreNumber(SYNCHRONIZED_STORES_NUMBER); + } else { + bitstream.setStoreNumber(assetstore); + } + bitstreamService.update(context, bitstream); + + Map wantedMetadata = new HashMap(); + wantedMetadata.put("size_bytes", null); + wantedMetadata.put("checksum", null); + wantedMetadata.put("checksum_algorithm", null); + + Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); + if (MapUtils.isEmpty(receivedMetadata)) { + String message = "Not able to register bitstream:" + bitstream.getID() + " at path: " + bitstreamPath; + log.error(message); + throw new IOException(message); + } else { + if (receivedMetadata.containsKey("checksum_algorithm")) { + bitstream.setChecksumAlgorithm(receivedMetadata.get("checksum_algorithm").toString()); + } + + if (receivedMetadata.containsKey("checksum")) { + bitstream.setChecksum(receivedMetadata.get("checksum").toString()); + } + + if (receivedMetadata.containsKey("size_bytes")) { + bitstream.setSizeBytes(Long.valueOf(receivedMetadata.get("size_bytes").toString())); + } + } + + bitstream.setDeleted(false); + bitstreamService.update(context, bitstream); + + UUID bitstreamId = bitstream.getID(); + if (log.isDebugEnabled()) { + log.debug("Registered bitstream " + bitstreamId + " at location " + bitstreamPath); + } + return bitstreamId; + } + + @Override + public Map computeChecksum(Context context, Bitstream bitstream) throws IOException { + Map wantedMetadata = new HashMap(); + wantedMetadata.put("checksum", null); + wantedMetadata.put("checksum_algorithm", null); + + int storeNumber = this.whichStoreNumber(bitstream); + Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); + return receivedMetadata; + } + + @Override + public InputStream retrieve(Context context, Bitstream bitstream) + throws SQLException, IOException { + int storeNumber = this.whichStoreNumber(bitstream); + return this.getStore(storeNumber).get(bitstream); + } + + @Override + public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException { + Context context = new Context(Context.Mode.BATCH_EDIT); + int commitCounter = 0; + + try { + context.turnOffAuthorisationSystem(); + + List storage = bitstreamService.findDeletedBitstreams(context); + for (Bitstream bitstream : storage) { + UUID bid = bitstream.getID(); + Map wantedMetadata = new HashMap(); + wantedMetadata.put("size_bytes", null); + wantedMetadata.put("modified", null); + + int storeNumber = this.whichStoreNumber(bitstream); + Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); + + + // Make sure entries which do not exist are removed + if (MapUtils.isEmpty(receivedMetadata)) { + log.debug("bitstore.about is empty, so file is not present"); + if (deleteDbRecords) { + log.debug("deleting record"); + if (verbose) { + System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + } + checksumHistoryService.deleteByBitstream(context, bitstream); + if (verbose) { + System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + } + bitstreamService.expunge(context, bitstream); + } + context.uncacheEntity(bitstream); + continue; + } + + // This is a small chance that this is a file which is + // being stored -- get it next time. + if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { + log.debug("file is recent"); + context.uncacheEntity(bitstream); + continue; + } + + if (deleteDbRecords) { + log.debug("deleting db record"); + if (verbose) { + System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + } + checksumHistoryService.deleteByBitstream(context, bitstream); + if (verbose) { + System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + } + bitstreamService.expunge(context, bitstream); + } + + if (isRegisteredBitstream(bitstream.getInternalId())) { + context.uncacheEntity(bitstream); + continue; // do not delete registered bitstreams + } + + + // Since versioning allows for multiple bitstreams, check if the internal identifier isn't used on + // another place + if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { + this.getStore(storeNumber).remove(bitstream); + + String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); + if (log.isDebugEnabled()) { + log.debug(message); + } + if (verbose) { + System.out.println(message); + } + } + + // Make sure to commit our outstanding work every 100 + // iterations. Otherwise you risk losing the entire transaction + // if we hit an exception, which isn't useful at all for large + // amounts of bitstreams. + commitCounter++; + if (commitCounter % 100 == 0) { + context.dispatchEvents(); + // Commit actual changes to DB after dispatch events + System.out.print("Performing incremental commit to the database..."); + context.commit(); + System.out.println(" Incremental commit done!"); + } + + context.uncacheEntity(bitstream); + } + + System.out.print("Committing changes to the database..."); + context.complete(); + System.out.println(" Done!"); + } catch (SQLException | IOException sqle) { + // Aborting will leave the DB objects around, even if the + // bitstreams are deleted. This is OK; deleting them next + // time around will be a no-op. + if (verbose) { + System.err.println("Error: " + sqle.getMessage()); + } + context.abort(); + throw sqle; + } finally { + context.restoreAuthSystemState(); + } + } + + @Nullable + @Override + public Long getLastModified(Bitstream bitstream) throws IOException { + Map attrs = new HashMap(); + attrs.put("modified", null); + int storeNumber = this.whichStoreNumber(bitstream); + attrs = this.getStore(storeNumber).about(bitstream, attrs); + if (attrs == null || !attrs.containsKey("modified")) { + return null; + } + return Long.valueOf(attrs.get("modified").toString()); + } + + /** + * Decide which store number should be used for the given bitstream. + * If the bitstream is synchronized (stored in to S3 and local), then the static store number is used. + * Otherwise, the bitstream's store number is used. + * + * @param bitstream bitstream + * @return store number + */ + public int whichStoreNumber(Bitstream bitstream) { + if (Objects.equals(bitstream.getStoreNumber(), SYNCHRONIZED_STORES_NUMBER)) { + return getIncoming(); + } else { + return bitstream.getStoreNumber(); + } + } + +} diff --git a/dspace/config/spring/api/bitstore.xml b/dspace/config/spring/api/bitstore.xml index 255bfbf75da6..0dab64f4c75e 100644 --- a/dspace/config/spring/api/bitstore.xml +++ b/dspace/config/spring/api/bitstore.xml @@ -3,7 +3,7 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd" default-lazy-init="true"> - + From bbd5bd87a1953d18d38327ba013c92d9e1fad851 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Wed, 20 Dec 2023 12:54:22 +0100 Subject: [PATCH 11/12] Removed ClarinS3BitStoreService.java to SyncS3BitStoreService --- ...rinS3BitStoreService.java => SyncS3BitStoreService.java} | 6 +++--- dspace/config/spring/api/bitstore.xml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) rename dspace-api/src/main/java/org/dspace/storage/bitstore/{ClarinS3BitStoreService.java => SyncS3BitStoreService.java} (94%) diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncS3BitStoreService.java similarity index 94% rename from dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java rename to dspace-api/src/main/java/org/dspace/storage/bitstore/SyncS3BitStoreService.java index 85bbc72a1ea5..cae46a512a56 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/ClarinS3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncS3BitStoreService.java @@ -25,12 +25,12 @@ * * @author Milan Majchrak (milan.majchrak at dataquest.sk) */ -public class ClarinS3BitStoreService extends S3BitStoreService { +public class SyncS3BitStoreService extends S3BitStoreService { /** * log4j log */ - private static final Logger log = LogManager.getLogger(ClarinS3BitStoreService.class); + private static final Logger log = LogManager.getLogger(SyncS3BitStoreService.class); private boolean syncEnabled = false; @Autowired(required = true) @@ -39,7 +39,7 @@ public class ClarinS3BitStoreService extends S3BitStoreService { @Autowired(required = true) ConfigurationService configurationService; - public ClarinS3BitStoreService() { + public SyncS3BitStoreService() { super(); } diff --git a/dspace/config/spring/api/bitstore.xml b/dspace/config/spring/api/bitstore.xml index 0dab64f4c75e..f02edcbc0807 100644 --- a/dspace/config/spring/api/bitstore.xml +++ b/dspace/config/spring/api/bitstore.xml @@ -17,7 +17,7 @@ - + From 17cc19cec067a2b0895e0388d390031763d29558 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Wed, 20 Dec 2023 12:59:08 +0100 Subject: [PATCH 12/12] Added doc and refactoring. --- .../src/main/java/org/dspace/content/Bitstream.java | 1 - .../bitstore/SyncBitstreamStorageServiceImpl.java | 13 +++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/Bitstream.java b/dspace-api/src/main/java/org/dspace/content/Bitstream.java index 4e71503b08f7..f7aaf224dcde 100644 --- a/dspace-api/src/main/java/org/dspace/content/Bitstream.java +++ b/dspace-api/src/main/java/org/dspace/content/Bitstream.java @@ -40,7 +40,6 @@ @Entity @Table(name = "bitstream") public class Bitstream extends DSpaceObject implements DSpaceObjectLegacySupport { - @Column(name = "bitstream_id", insertable = false, updatable = false) private Integer legacyId; diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java index 1120207aecb5..e48487955209 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java @@ -1,3 +1,10 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ package org.dspace.storage.bitstore; import java.io.IOException; @@ -20,6 +27,12 @@ import org.dspace.services.ConfigurationService; import org.springframework.beans.factory.annotation.Autowired; +/** + * This class is customization of the BitstreamStorageServiceImpl class. + * The bitstream is synchronized if it is stored in both S3 and local assetstore. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ public class SyncBitstreamStorageServiceImpl extends BitstreamStorageServiceImpl { /**