Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ufal/be-s3-checksum-fix #487

Merged
merged 9 commits into from
Jan 3, 2024
Merged
24 changes: 24 additions & 0 deletions dspace-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,24 @@
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>io.findify</groupId>
<artifactId>s3mock_2.13</artifactId>
<version>0.2.6</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>com.amazonawsl</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
</exclusion>
<exclusion>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
</exclusion>
</exclusions>
</dependency>

</dependencies>

<dependencyManagement>
Expand Down Expand Up @@ -907,6 +925,12 @@
<artifactId>swagger-core</artifactId>
<version>1.6.2</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
</dependencyManagement>

Expand Down
49 changes: 43 additions & 6 deletions dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
*/
package org.dspace.checker;

import static org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl.SYNCHRONIZED_STORES_NUMBER;

import java.io.IOException;
import java.sql.SQLException;
import java.util.Date;
import java.util.Map;
import java.util.Objects;

import org.apache.commons.collections4.MapUtils;
import org.apache.logging.log4j.Logger;
Expand All @@ -20,8 +23,8 @@
import org.dspace.checker.service.MostRecentChecksumService;
import org.dspace.content.Bitstream;
import org.dspace.core.Context;
import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl;
import org.dspace.storage.bitstore.factory.StorageServiceFactory;
import org.dspace.storage.bitstore.service.BitstreamStorageService;

/**
* <p>
Expand Down Expand Up @@ -55,7 +58,7 @@ public final class CheckerCommand {
* Checksum history Data access object
*/
private ChecksumHistoryService checksumHistoryService = null;
private BitstreamStorageService bitstreamStorageService = null;
private SyncBitstreamStorageServiceImpl bitstreamStorageService = null;
private ChecksumResultService checksumResultService = null;

/**
Expand Down Expand Up @@ -86,7 +89,7 @@ public final class CheckerCommand {
public CheckerCommand(Context context) {
checksumService = CheckerServiceFactory.getInstance().getMostRecentChecksumService();
checksumHistoryService = CheckerServiceFactory.getInstance().getChecksumHistoryService();
bitstreamStorageService = StorageServiceFactory.getInstance().getBitstreamStorageService();
bitstreamStorageService = StorageServiceFactory.getInstance().getSyncBitstreamStorageService();
checksumResultService = CheckerServiceFactory.getInstance().getChecksumResultService();
this.context = context;
}
Expand Down Expand Up @@ -245,7 +248,9 @@ protected void processBitstream(MostRecentChecksum info) throws SQLException {
info.setProcessStartDate(new Date());

try {
Map checksumMap = bitstreamStorageService.computeChecksum(context, info.getBitstream());
// 1. DB - Store not match
Bitstream bitstream = info.getBitstream();
Map<String, Object> checksumMap = bitstreamStorageService.computeChecksum(context, bitstream);
if (MapUtils.isNotEmpty(checksumMap)) {
info.setBitstreamFound(true);
if (checksumMap.containsKey("checksum")) {
Expand All @@ -255,10 +260,42 @@ protected void processBitstream(MostRecentChecksum info) throws SQLException {
if (checksumMap.containsKey("checksum_algorithm")) {
info.setChecksumAlgorithm(checksumMap.get("checksum_algorithm").toString());
}

// compare new checksum to previous checksum
info.setChecksumResult(compareChecksums(info.getExpectedChecksum(), info.getCurrentChecksum()));

} else {
info.setCurrentChecksum("");
info.setChecksumResult(getChecksumResultByCode(ChecksumResultCode.BITSTREAM_NOT_FOUND));
info.setToBeProcessed(false);
}

// 2. Store1 - Synchronized store 2 not match
// Check checksum of synchronized store
if (bitstream.getStoreNumber() != SYNCHRONIZED_STORES_NUMBER) {
return;
}
if (Objects.equals(ChecksumResultCode.CHECKSUM_NO_MATCH, info.getChecksumResult().getResultCode())) {
return;
}

Map<String, Object> syncStoreChecksumMap =
bitstreamStorageService.computeChecksumSpecStore(context, bitstream,
bitstreamStorageService.getSynchronizedStoreNumber(bitstream));
if (MapUtils.isNotEmpty(syncStoreChecksumMap)) {
String syncStoreChecksum = "";
if (checksumMap.containsKey("checksum")) {
syncStoreChecksum = syncStoreChecksumMap.get("checksum").toString();
}
// compare new checksum to previous checksum
ChecksumResult checksumResult = compareChecksums(info.getCurrentChecksum(), syncStoreChecksum);
// Do not override result with synchronization info if the checksums are not matching between
// DB and store
if (!Objects.equals(checksumResult.getResultCode(), ChecksumResultCode.CHECKSUM_NO_MATCH)) {
info.setChecksumResult(getChecksumResultByCode(ChecksumResultCode.CHECKSUM_SYNC_NO_MATCH));
}
}

// compare new checksum to previous checksum
info.setChecksumResult(compareChecksums(info.getExpectedChecksum(), info.getCurrentChecksum()));
} catch (IOException e) {
// bitstream located, but file missing from asset store
info.setChecksumResult(getChecksumResultByCode(ChecksumResultCode.BITSTREAM_NOT_FOUND));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ public void addHistory(Context context, MostRecentChecksum mostRecentChecksum) t
if (mostRecentChecksum.getBitstream().isDeleted()) {
checksumResult = checksumResultService.findByCode(context, ChecksumResultCode.BITSTREAM_MARKED_DELETED);
} else {
checksumResult = checksumResultService.findByCode(context, ChecksumResultCode.CHECKSUM_MATCH);
checksumResult = checksumResultService.findByCode(context,
mostRecentChecksum.getChecksumResult().getResultCode());
}

checksumHistory.setResult(checksumResult);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@ public enum ChecksumResultCode {
CHECKSUM_MATCH,
CHECKSUM_NO_MATCH,
CHECKSUM_PREV_NOT_FOUND,
CHECKSUM_ALGORITHM_INVALID
CHECKSUM_ALGORITHM_INVALID,
CHECKSUM_SYNC_NO_MATCH
}
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ public int getBitstreamNotFoundReport(Context context, Date startDate, Date endD

osw.write("\n");
osw.write(msg("bitstream-not-found-report"));
osw.write(" ");
osw.write(applyDateFormatShort(startDate));
osw.write(" ");
osw.write(msg("date-range-to"));
Expand Down Expand Up @@ -230,6 +231,7 @@ public int getUncheckedBitstreamsReport(Context context, OutputStreamWriter osw)

osw.write("\n");
osw.write(msg("unchecked-bitstream-report"));
osw.write(" ");
osw.write(applyDateFormatShort(new Date()));
osw.write("\n\n\n");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ public List<MostRecentChecksum> findByResultTypeInDateRange(Context context, Dat
criteriaQuery.where(criteriaBuilder.and(
criteriaBuilder.equal(mostRecentResult.get(ChecksumResult_.resultCode), resultCode),
criteriaBuilder.lessThanOrEqualTo(
mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), startDate),
criteriaBuilder.greaterThan(mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), endDate)
mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), endDate),
criteriaBuilder.greaterThan(mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), startDate)
)
);
List<Order> orderList = new LinkedList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,8 @@ public void updateLastModified(Context context, Bitstream bitstream) {
}

@Override
public List<Bitstream> findDeletedBitstreams(Context context) throws SQLException {
return bitstreamDAO.findDeletedBitstreams(context);
public List<Bitstream> findDeletedBitstreams(Context context, int limit, int offset) throws SQLException {
return bitstreamDAO.findDeletedBitstreams(context, limit, offset);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import java.io.IOException;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;

Expand All @@ -25,7 +25,7 @@
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.event.Event;
import org.dspace.storage.bitstore.DSBitStoreService;
import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl;
import org.dspace.storage.bitstore.service.BitstreamStorageService;
import org.springframework.beans.factory.annotation.Autowired;

Expand All @@ -48,7 +48,7 @@ public class ClarinBitstreamServiceImpl implements ClarinBitstreamService {
private static final String CSA = "MD5";

@Autowired
private DSBitStoreService storeService;
private SyncBitstreamStorageServiceImpl syncBitstreamStorageService;
@Autowired
protected BitstreamDAO bitstreamDAO;
@Autowired
Expand Down Expand Up @@ -99,13 +99,12 @@ public boolean validation(Context context, Bitstream bitstream)
}
//get file from assetstore based on internal_id
//recalculate check fields
Map wantedMetadata = new HashMap();
wantedMetadata.put("size_bytes", null);
wantedMetadata.put("checksum", null);
wantedMetadata.put("checksum_algorithm", null);
Map checksumMap = storeService.about(bitstream, wantedMetadata);
List<String> wantedMetadata = List.of("size_bytes", "checksum", "checksum_algorithm");
Map<String, Object> receivedMetadata = syncBitstreamStorageService
.getStore(syncBitstreamStorageService.whichStoreNumber(bitstream))
.about(bitstream, wantedMetadata);
//check that new calculated values match the expected values
if (MapUtils.isEmpty(checksumMap) || !valid(bitstream, checksumMap)) {
if (MapUtils.isEmpty(receivedMetadata) || !valid(bitstream, receivedMetadata)) {
//an error occurred - expected and calculated values do not match
//delete all created data
bitstreamService.delete(context, bitstream);
Expand All @@ -126,7 +125,7 @@ public boolean validation(Context context, Bitstream bitstream)
* @param checksumMap calculated values
* @return bitstream values match with expected values
*/
private boolean valid(Bitstream bitstream, Map checksumMap) {
private boolean valid(Bitstream bitstream, Map<String, Object> checksumMap) {
if (!checksumMap.containsKey("checksum") || !checksumMap.containsKey("checksum_algorithm") ||
!checksumMap.containsKey("size_bytes")) {
log.error("Cannot validate of bitstream with id: " + bitstream.getID() +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public interface BitstreamDAO extends DSpaceObjectLegacySupportDAO<Bitstream> {

public Iterator<Bitstream> findAll(Context context, int limit, int offset) throws SQLException;

public List<Bitstream> findDeletedBitstreams(Context context) throws SQLException;
public List<Bitstream> findDeletedBitstreams(Context context, int limit, int offset) throws SQLException;

public List<Bitstream> findDuplicateInternalIdentifier(Context context, Bitstream bitstream) throws SQLException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ protected BitstreamDAOImpl() {
}

@Override
public List<Bitstream> findDeletedBitstreams(Context context) throws SQLException {
public List<Bitstream> findDeletedBitstreams(Context context, int limit, int offset) throws SQLException {
CriteriaBuilder criteriaBuilder = getCriteriaBuilder(context);
CriteriaQuery criteriaQuery = getCriteriaQuery(criteriaBuilder, Bitstream.class);
Root<Bitstream> bitstreamRoot = criteriaQuery.from(Bitstream.class);
criteriaQuery.select(bitstreamRoot);
criteriaQuery.orderBy(criteriaBuilder.desc(bitstreamRoot.get(Bitstream_.ID)));
criteriaQuery.where(criteriaBuilder.equal(bitstreamRoot.get(Bitstream_.deleted), true));
return list(context, criteriaQuery, false, Bitstream.class, -1, -1);
return list(context, criteriaQuery, false, Bitstream.class, limit, offset);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ public InputStream retrieve(Context context, Bitstream bitstream)
* @return a list of all bitstreams that have been "deleted"
* @throws SQLException if database error
*/
public List<Bitstream> findDeletedBitstreams(Context context) throws SQLException;
public List<Bitstream> findDeletedBitstreams(Context context, int limit, int offset) throws SQLException;


/**
Expand Down
6 changes: 4 additions & 2 deletions dspace-api/src/main/java/org/dspace/core/Email.java
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,8 @@ public void send() throws MessagingException, IOException {
message.addRecipient(Message.RecipientType.TO, new InternetAddress(
i.next()));
}
// Get headers defined by the template.
String[] templateHeaders = config.getArrayProperty("mail.message.headers");

// Format the mail message body
VelocityEngine templateEngine = new VelocityEngine();
Expand All @@ -334,6 +336,7 @@ public void send() throws MessagingException, IOException {
repo.putStringResource(contentName, content);
// Turn content into a template.
template = templateEngine.getTemplate(contentName);
templateHeaders = new String[] {};
}

StringWriter writer = new StringWriter();
Expand All @@ -351,8 +354,7 @@ public void send() throws MessagingException, IOException {
message.setSentDate(date);
message.setFrom(new InternetAddress(from));

// Get headers defined by the template.
for (String headerName : config.getArrayProperty("mail.message.headers")) {
for (String headerName : templateHeaders) {
String headerValue = (String) vctx.get(headerName);
if ("subject".equalsIgnoreCase(headerName)) {
if (null != headerValue) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -153,22 +155,24 @@ protected boolean isLonger(String internalId, int endIndex) {
* Retrieves a map of useful metadata about the File (size, checksum, modified)
*
* @param file The File to analyze
* @param attrs The map where we are storing values
* @param attrs The list of requested metadata values
* @return Map of updated metadatas / attrs
* @throws IOException
*/
public Map about(File file, Map attrs) throws IOException {
public Map<String, Object> about(File file, List<String> attrs) throws IOException {

Map<String, Object> metadata = new HashMap<String, Object>();

try {
if (file != null && file.exists()) {
this.putValueIfExistsKey(attrs, SIZE_BYTES, file.length());
if (attrs.containsKey(CHECKSUM)) {
attrs.put(CHECKSUM, Utils.toHex(this.generateChecksumFrom(file)));
attrs.put(CHECKSUM_ALGORITHM, CSA);
this.putValueIfExistsKey(attrs, metadata, SIZE_BYTES, file.length());
if (attrs.contains(CHECKSUM)) {
metadata.put(CHECKSUM, Utils.toHex(this.generateChecksumFrom(file)));
metadata.put(CHECKSUM_ALGORITHM, CSA);
}
this.putValueIfExistsKey(attrs, MODIFIED, String.valueOf(file.lastModified()));
return attrs;
this.putValueIfExistsKey(attrs, metadata, MODIFIED, String.valueOf(file.lastModified()));
}
return null;
return metadata;
} catch (Exception e) {
log.error("about( FilePath: " + file.getAbsolutePath() + ", Map: " + attrs.toString() + ")", e);
throw new IOException(e);
Expand Down Expand Up @@ -204,13 +208,9 @@ private byte[] generateChecksumFrom(FileInputStream fis) throws IOException, NoS
}
}

protected void putValueIfExistsKey(Map attrs, String key, Object value) {
this.putEntryIfExistsKey(attrs, key, Map.entry(key, value));
}

protected void putEntryIfExistsKey(Map attrs, String key, Map.Entry entry) {
if (attrs.containsKey(key)) {
attrs.put(entry.getKey(), entry.getValue());
protected void putValueIfExistsKey(List<String> attrs, Map<String, Object> metadata, String key, Object value) {
if (attrs.contains(key)) {
metadata.put(key, value);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;

import org.dspace.content.Bitstream;
Expand Down Expand Up @@ -62,13 +63,13 @@ public interface BitStoreService {
* Obtain technical metadata about an asset in the asset store.
*
* @param bitstream The bitstream to describe
* @param attrs A Map whose keys consist of desired metadata fields
* @param attrs A List of desired metadata fields
* @return attrs
* A Map with key/value pairs of desired metadata
* If file not found, then return null
* @throws java.io.IOException If a problem occurs while obtaining metadata
*/
public Map about(Bitstream bitstream, Map attrs) throws IOException;
public Map<String, Object> about(Bitstream bitstream, List<String> attrs) throws IOException;

/**
* Remove an asset from the asset store.
Expand Down
Loading
Loading