From dc628c96de2c01703a47edeece1ca002bbac0a71 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Wed, 15 Nov 2023 12:29:15 +0100 Subject: [PATCH 01/19] ufal/downloading-restricted-bitstreams-not-working-properly (#457) * The admin could download restricted bitstreams. * The user cannot download bitstream if he/she is not signed in. * Cannot obtain context with user. * The user is already fetched from the context. * Added docs. * Do not use endpoint for downloading the single file because it already exists - remove it. * Fixed AuthorizationRestControllerIT tests. --- .../AuthorizationBitstreamUtils.java | 24 ++- .../app/rest/AuthorizationRestController.java | 15 +- .../app/rest/MetadataBitstreamController.java | 146 +++--------------- .../rest/AuthorizationRestControllerIT.java | 53 +++++-- .../rest/MetadataBitstreamControllerIT.java | 36 +---- 5 files changed, 85 insertions(+), 189 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/authorize/AuthorizationBitstreamUtils.java b/dspace-api/src/main/java/org/dspace/authorize/AuthorizationBitstreamUtils.java index adada5ad7c28..1b98556ecad0 100644 --- a/dspace-api/src/main/java/org/dspace/authorize/AuthorizationBitstreamUtils.java +++ b/dspace-api/src/main/java/org/dspace/authorize/AuthorizationBitstreamUtils.java @@ -18,7 +18,6 @@ import org.dspace.content.Bitstream; import org.dspace.content.Item; import org.dspace.content.clarin.ClarinLicense; -import org.dspace.content.clarin.ClarinLicenseLabel; import org.dspace.content.clarin.ClarinLicenseResourceMapping; import org.dspace.content.service.BitstreamService; import org.dspace.content.service.clarin.ClarinLicenseResourceMappingService; @@ -93,15 +92,15 @@ public boolean authorizeBitstream(Context context, Bitstream bitstream) throws S } /** - * If the bitstream has RES or ACA license and the user is Anonymous do not authorize that user. - * The user will be redirected to the login. + * Do not allow download for anonymous users. Allow it only if the bitstream has Clarin License and the license has + * confirmation = 3 (allow anonymous). + * * @param context DSpace context object * @param bitstreamID downloading Bitstream UUID * @return if the current user is authorized */ public boolean authorizeLicenseWithUser(Context context, UUID bitstreamID) throws SQLException { - // If the current user is null that means that the user is not signed in and cannot download the bitstream - // with RES or ACA license + // If the current user is null that means that the user is not signed if (Objects.nonNull(context.getCurrentUser())) { // User is signed return true; @@ -118,16 +117,13 @@ public boolean authorizeLicenseWithUser(Context context, UUID bitstreamID) throw // Bitstream should have only one type of the Clarin license, so we could get first record ClarinLicense clarinLicense = Objects.requireNonNull(clarinLicenseResourceMappings.get(0)).getLicense(); - // Get License Labels from clarin license and check if one of them is ACA or RES - List clarinLicenseLabels = clarinLicense.getLicenseLabels(); - for (ClarinLicenseLabel clarinLicenseLabel : clarinLicenseLabels) { - if (StringUtils.equals(clarinLicenseLabel.getLabel(), "RES") || - StringUtils.equals(clarinLicenseLabel.getLabel(), "ACA")) { - return false; - } + // 3 - Allow download for anonymous users, but with license confirmation + // 0 - License confirmation is not required + if (Objects.equals(clarinLicense.getConfirmation(), 3) || + Objects.equals(clarinLicense.getConfirmation(), 0)) { + return true; } - - return true; + return false; } private boolean userIsSubmitter(Context context, Bitstream bitstream, EPerson currentUser, UUID userID) { diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/AuthorizationRestController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/AuthorizationRestController.java index e91242bae75c..5b23d5cb893a 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/AuthorizationRestController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/AuthorizationRestController.java @@ -27,6 +27,7 @@ import org.dspace.content.Bitstream; import org.dspace.content.service.BitstreamService; import org.dspace.content.service.clarin.ClarinLicenseResourceMappingService; +import org.dspace.core.Constants; import org.dspace.core.Context; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -84,28 +85,30 @@ public ResponseEntity authrn(@PathVariable String id, HttpServletResponse respon return null; } - // If the bitstream has RES or ACA license and the user is Anonymous return NotAuthorized exception + // Do not allow download for anonymous users. Allow it only if the bitstream has Clarin License and the license + // has confirmation = 3 (allow anonymous). if (!authorizationBitstreamUtils.authorizeLicenseWithUser(context, bitstream.getID())) { response.sendError(HttpStatus.UNAUTHORIZED.value(), - "Anonymous user cannot download bitstream with REC or ACA license"); + "Anonymous user cannot download this bitstream"); return null; } // Wrap exceptions to the AuthrnRest object. - String errorMessage = "User is not authorized to download the bitstream."; - boolean isAuthorized = false; + String errorMessage = ""; try { - isAuthorized = authorizationBitstreamUtils.authorizeBitstream(context, bitstream); + authorizeService.authorizeAction(context, bitstream, Constants.READ); } catch (AuthorizeException e) { if (e instanceof MissingLicenseAgreementException) { errorMessage = MissingLicenseAgreementException.NAME; } else if (e instanceof DownloadTokenExpiredException) { errorMessage = DownloadTokenExpiredException.NAME; + } else { + errorMessage = e.getMessage(); } } - if (!isAuthorized) { + if (StringUtils.isNotBlank(errorMessage)) { // If the user is not authorized return response with the error message response.sendError(HttpStatus.UNAUTHORIZED.value(), errorMessage); return null; diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/MetadataBitstreamController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/MetadataBitstreamController.java index ac26a2c69551..5e6700306959 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/MetadataBitstreamController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/MetadataBitstreamController.java @@ -7,16 +7,18 @@ */ package org.dspace.app.rest; +import static org.dspace.app.rest.utils.RegexUtils.REGEX_REQUESTMAPPING_IDENTIFIER_AS_UUID; + import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; import java.util.List; import java.util.Objects; +import java.util.UUID; import java.util.zip.Deflater; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; import org.apache.commons.compress.utils.IOUtils; @@ -24,38 +26,34 @@ import org.apache.logging.log4j.Logger; import org.dspace.app.rest.exception.DSpaceBadRequestException; import org.dspace.app.rest.exception.UnprocessableEntityException; -import org.dspace.app.rest.model.BitstreamRest; +import org.dspace.app.rest.model.ItemRest; import org.dspace.app.rest.utils.ContextUtil; +import org.dspace.authorize.AuthorizationBitstreamUtils; import org.dspace.authorize.AuthorizeException; -import org.dspace.authorize.MissingLicenseAgreementException; import org.dspace.authorize.service.AuthorizeService; import org.dspace.content.Bitstream; -import org.dspace.content.BitstreamFormat; import org.dspace.content.Bundle; import org.dspace.content.DSpaceObject; import org.dspace.content.Item; import org.dspace.content.service.BitstreamService; -import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.handle.service.HandleService; import org.dspace.services.ConfigurationService; +import org.dspace.services.RequestService; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.core.io.InputStreamResource; -import org.springframework.core.io.Resource; import org.springframework.http.HttpHeaders; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; -/** + /** * This CLARIN Controller download a single file or a ZIP file from the Item's bitstream. */ @RestController -@RequestMapping("/api/" + BitstreamRest.CATEGORY + "/" + BitstreamRest.PLURAL_NAME) +@RequestMapping("/api/" + ItemRest.CATEGORY + "/" + ItemRest.PLURAL_NAME + REGEX_REQUESTMAPPING_IDENTIFIER_AS_UUID) public class MetadataBitstreamController { private static Logger log = org.apache.logging.log4j.LogManager.getLogger(MetadataBitstreamController.class); @@ -69,90 +67,17 @@ public class MetadataBitstreamController { private AuthorizeService authorizeService; @Autowired private ConfigurationService configurationService; - - - @GetMapping("/handle/{id}/{subId}/{fileName}") - public ResponseEntity downloadSingleFile(@PathVariable("id") String id, - @PathVariable("subId") String subId, - @PathVariable("fileName") String fileName, - HttpServletRequest request, HttpServletResponse response) - throws IOException { - String handleID = id + "/" + subId; - if (StringUtils.isBlank(id) || StringUtils.isBlank(subId)) { - log.error("Handle cannot be null! PathVariable `id` or `subId` is null."); - throw new DSpaceBadRequestException("Handle cannot be null!"); - } - - Context context = ContextUtil.obtainContext(request); - if (Objects.isNull(context)) { - log.error("Cannot obtain the context from the request."); - throw new RuntimeException("Cannot obtain the context from the request."); - } - - DSpaceObject dso = null; - try { - dso = handleService.resolveToObject(context, handleID); - } catch (Exception e) { - log.error("Cannot resolve handle: " + handleID); - throw new RuntimeException("Cannot resolve handle: " + handleID); - } - - - if (Objects.isNull(dso)) { - log.error("DSO is null"); - return null; - } - - if (!(dso instanceof Item)) { - log.error("DSO is not instance of Item"); - return null; - } - - Item item = (Item) dso; - List bundles = item.getBundles(); - // Find bitstream and start downloading. - for (Bundle bundle: bundles) { - for (Bitstream bitstream: bundle.getBitstreams()) { - // Authorize the action - it will send response redirect if something gets wrong. - authorizeBitstreamAction(context, bitstream, response); - - String btName = bitstream.getName(); - if (!(btName.equalsIgnoreCase(fileName))) { - continue; - } - try { - BitstreamFormat bitstreamFormat = bitstream.getFormat(context); - // Check if the bitstream has some extensions e.g., `.txt, .jpg,..` - checkBitstreamExtensions(bitstreamFormat); - - // Get content of the bitstream - InputStream inputStream = bitstreamService.retrieve(context, bitstream); - InputStreamResource resource = new InputStreamResource(inputStream); - HttpHeaders header = new HttpHeaders(); - header.add(HttpHeaders.CONTENT_DISPOSITION, - "attachment; filename=" + fileName); - header.add("Cache-Control", "no-cache, no-store, must-revalidate"); - header.add("Pragma", "no-cache"); - header.add("Expires", "0"); - return ResponseEntity.ok() - .headers(header) - .contentLength(inputStream.available()) - .contentType(MediaType.APPLICATION_OCTET_STREAM) - .body(resource); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - } - - return null; - } + @Autowired + AuthorizationBitstreamUtils authorizationBitstreamUtils; + @Autowired + private RequestService requestService; /** * Download all Item's bitstreams as single ZIP file. */ - @GetMapping("/allzip") - public void downloadFileZip(@RequestParam("handleId") String handleId, + @PreAuthorize("hasPermission(#uuid, 'ITEM', 'READ')") + @RequestMapping( method = {RequestMethod.GET, RequestMethod.HEAD}, value = "allzip") + public void downloadFileZip(@PathVariable UUID uuid, @RequestParam("handleId") String handleId, HttpServletResponse response, HttpServletRequest request) throws IOException, SQLException, AuthorizeException { if (StringUtils.isBlank(handleId)) { @@ -195,11 +120,11 @@ public void downloadFileZip(@RequestParam("handleId") String handleId, for (Bundle original : bundles) { List bss = original.getBitstreams(); for (Bitstream bitstream : bss) { - authorizeBitstreamAction(context, bitstream, response); - String filename = bitstream.getName(); ZipArchiveEntry ze = new ZipArchiveEntry(filename); zip.putArchiveEntry(ze); + // Get content of the bitstream + // Retrieve method authorize bitstream download action. InputStream is = bitstreamService.retrieve(context, bitstream); IOUtils.copy(is, zip); zip.closeArchiveEntry(); @@ -209,37 +134,4 @@ public void downloadFileZip(@RequestParam("handleId") String handleId, zip.close(); response.getOutputStream().flush(); } - - /** - * Could the user download that bitstream? - * @param context DSpace context object - * @param bitstream Bitstream to download - * @param response for possibility to redirect - */ - private void authorizeBitstreamAction(Context context, Bitstream bitstream, HttpServletResponse response) - throws IOException { - - String uiURL = configurationService.getProperty("dspace.ui.url"); - if (StringUtils.isBlank(uiURL)) { - log.error("Configuration property `dspace.ui.url` cannot be empty or null!"); - throw new RuntimeException("Configuration property `dspace.ui.url` cannot be empty or null!"); - } - try { - authorizeService.authorizeAction(context, bitstream, Constants.READ); - } catch (MissingLicenseAgreementException e) { - response.sendRedirect(uiURL + "/bitstream/" + bitstream.getID() + "/download"); - } catch (AuthorizeException | SQLException e) { - response.sendRedirect(uiURL + "/login"); - } - } - - /** - * Check if the bitstream has file extension. - */ - private void checkBitstreamExtensions(BitstreamFormat bitstreamFormat) { - if ( Objects.isNull(bitstreamFormat) || CollectionUtils.isEmpty(bitstreamFormat.getExtensions())) { - log.error("Bitstream Extensions cannot be empty for downloading/previewing bitstreams."); - throw new RuntimeException("Bitstream Extensions cannot be empty for downloading/previewing bitstreams."); - } - } } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthorizationRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthorizationRestControllerIT.java index 03a5ae2ecc11..82546b0dacff 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthorizationRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthorizationRestControllerIT.java @@ -39,6 +39,8 @@ import org.dspace.builder.ClarinUserRegistrationBuilder; import org.dspace.builder.CollectionBuilder; import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.EPersonBuilder; +import org.dspace.builder.ResourcePolicyBuilder; import org.dspace.builder.WorkspaceItemBuilder; import org.dspace.content.Bitstream; import org.dspace.content.Collection; @@ -52,6 +54,11 @@ import org.dspace.content.service.clarin.ClarinLicenseLabelService; import org.dspace.content.service.clarin.ClarinLicenseResourceMappingService; import org.dspace.content.service.clarin.ClarinLicenseService; +import org.dspace.core.Constants; +import org.dspace.eperson.EPerson; +import org.dspace.eperson.Group; +import org.dspace.eperson.factory.EPersonServiceFactory; +import org.dspace.eperson.service.GroupService; import org.hamcrest.Matchers; import org.junit.Before; import org.junit.Test; @@ -59,8 +66,8 @@ public class AuthorizationRestControllerIT extends AbstractControllerIntegrationTest { - private static final String CLARIN_LICENSE_NAME = "Test Clarin License"; - private static final String TEXT_PLAIN_UTF_8 = "text/plain;charset=UTF-8"; + public static final String CLARIN_LICENSE_NAME = "Test Clarin License"; + public static final String TEXT_PLAIN_UTF_8 = "text/plain;charset=UTF-8"; @Autowired ClarinLicenseService clarinLicenseService; @@ -72,6 +79,8 @@ public class AuthorizationRestControllerIT extends AbstractControllerIntegration Item item; WorkspaceItem witem; ClarinLicense clarinLicense; + EPerson ePerson2; + String eperson2Password = "secret"; @Before public void setup() throws Exception { @@ -89,6 +98,19 @@ public void setup() throws Exception { context.setCurrentUser(eperson); InputStream pdf = getClass().getResourceAsStream("simple-article.pdf"); + // Create another person to try downloading the bitstream as anonymous user. + ePerson2 = EPersonBuilder.createEPerson(context) + .withCanLogin(true) + .withEmail("test@dtq.com") + .withPassword("secret") + .withNameInMetadata("Test", "User") + .build(); + + // The new user is not added into anonymous group by default + GroupService groupService = EPersonServiceFactory.getInstance().getGroupService(); + Group anonymousGroup = groupService.findByName(context, Group.ANONYMOUS); + groupService.addMember(context, anonymousGroup, ePerson2); + witem = WorkspaceItemBuilder.createWorkspaceItem(context, col1) .withTitle("Test WorkspaceItem") .withIssueDate("2017-10-17") @@ -96,9 +118,16 @@ public void setup() throws Exception { .build(); item = witem.getItem(); + Bitstream bitstream = item.getBundles().get(0).getBitstreams().get(0); + + // Create resource policy to allow anonymous user download the bitstream + ResourcePolicyBuilder.createResourcePolicy(context).withUser(ePerson2) + .withAction(Constants.READ) + .withGroup(anonymousGroup) + .withDspaceObject(bitstream).build(); // Create clarin license with clarin license label - clarinLicense = createClarinLicense(CLARIN_LICENSE_NAME, "Test Def", "Test R Info", 1); + clarinLicense = createClarinLicense(CLARIN_LICENSE_NAME, "Test Def", "Test R Info", 3); context.restoreAuthSystemState(); } @@ -118,12 +147,12 @@ public void shouldAuthorizeUserAsSubmitter() throws Exception { // DownloadTokenExpiredException, 401 @Test public void shouldNotAuthorizeUserByWrongToken() throws Exception { - // Admin is not the submitter. - String authTokenAdmin = getAuthToken(admin.getEmail(), password); + // The user is not submitter. + String authTokenUser = getAuthToken(ePerson2.getEmail(), eperson2Password); // Load bitstream from the item. Bitstream bitstream = item.getBundles().get(0).getBitstreams().get(0); - getClient(authTokenAdmin).perform(get("/api/authrn/" + + getClient(authTokenUser).perform(get("/api/authrn/" + bitstream.getID().toString() + "?dtoken=wrongToken")) .andExpect(status().isUnauthorized()) .andExpect(status().reason(is(Matchers.is(DownloadTokenExpiredException.NAME)))); @@ -192,10 +221,10 @@ public void shouldNotAuthorizeByExpiredToken() throws Exception { context.restoreAuthSystemState(); Bitstream bitstream = witem.getItem().getBundles().get(0).getBitstreams().get(0); - // Admin is not the submitter - String authTokenAdmin = getAuthToken(admin.getEmail(), password); - // The admin should be authorized to download the bitstream with token - getClient(authTokenAdmin).perform(get("/api/authrn/" + + // The user is not submitter + String authTokenUser = getAuthToken(ePerson2.getEmail(), eperson2Password); + // The user should not be authorized to download the bitstream with expired token + getClient(authTokenUser).perform(get("/api/authrn/" + bitstream.getID().toString() + "?dtoken=" + token)) .andExpect(status().isUnauthorized()) .andExpect(status().reason(is(Matchers.is(DownloadTokenExpiredException.NAME)))); @@ -254,8 +283,8 @@ public void shouldNotAuthorizeWhenUserMetadataAreNotFilledIn() throws Exception Bitstream bitstream = witem.getItem().getBundles().get(0).getBitstreams().get(0); // Admin is not the submitter - String authTokenAdmin = getAuthToken(admin.getEmail(), password); - getClient(authTokenAdmin).perform(get("/api/authrn/" + bitstream.getID().toString())) + String eperson2Token = getAuthToken(ePerson2.getEmail(), eperson2Password); + getClient(eperson2Token).perform(get("/api/authrn/" + bitstream.getID().toString())) .andExpect(status().isUnauthorized()) .andExpect(status().reason(is(Matchers.is(MissingLicenseAgreementException.NAME)))); } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/MetadataBitstreamControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/MetadataBitstreamControllerIT.java index d397062e0200..63f7e62206f8 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/MetadataBitstreamControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/MetadataBitstreamControllerIT.java @@ -19,6 +19,7 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; import org.apache.commons.io.IOUtils; +import org.dspace.app.rest.model.ItemRest; import org.dspace.app.rest.test.AbstractControllerIntegrationTest; import org.dspace.authorize.service.AuthorizeService; import org.dspace.builder.BitstreamBuilder; @@ -33,9 +34,9 @@ import org.springframework.beans.factory.annotation.Autowired; public class MetadataBitstreamControllerIT extends AbstractControllerIntegrationTest { - private static final String METADATABITSTREAM_ENDPOINT = "/api/core/bitstreams/"; - private static final String METADATABITSTREAM_DOWNLOAD_SINGLE_ENDPOINT = METADATABITSTREAM_ENDPOINT + "/handle"; - private static final String METADATABITSTREAM_DOWNLOAD_ALL_ENDPOINT = METADATABITSTREAM_ENDPOINT + "/allzip"; + private static final String METADATABITSTREAM_ENDPOINT = "/api/" + ItemRest.CATEGORY + "/" + ItemRest.PLURAL_NAME; + private static final String ALL_ZIP_PATH = "allzip"; + private static final String HANDLE_PARAM = "handleId"; private static final String AUTHOR = "Test author name"; private Collection col; @@ -75,29 +76,6 @@ public void setUp() throws Exception { context.restoreAuthSystemState(); } - @Test - public void downloadSingleFileNullPathVariable() throws Exception { - getClient().perform(get(METADATABITSTREAM_DOWNLOAD_SINGLE_ENDPOINT)).andExpect(status().is4xxClientError()); - } - - @Test - public void downloadSingleFileWithAuthorize() throws Exception { - InputStream ip = bitstreamService.retrieve(context, bts); - String token = getAuthToken(admin.getEmail(), password); - getClient(token).perform(get(METADATABITSTREAM_DOWNLOAD_SINGLE_ENDPOINT + - "/" + publicItem.getHandle() + "/" + bts.getName())) - .andExpect(status().isOk()) - .andExpect(content().contentType("application/octet-stream;charset=UTF-8")) - .andExpect(content().bytes(IOUtils.toByteArray(ip))); - } - - @Test - public void downloadSingleFileWithNoAuthorize() throws Exception { - getClient().perform(get(METADATABITSTREAM_DOWNLOAD_SINGLE_ENDPOINT + - "/" + publicItem.getHandle() + "/" + bts.getName())) - .andExpect(status().isOk()); - } - @Test public void downloadAllZip() throws Exception { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); @@ -113,12 +91,10 @@ public void downloadAllZip() throws Exception { zip.close(); String token = getAuthToken(admin.getEmail(), password); - getClient(token).perform(get(METADATABITSTREAM_DOWNLOAD_ALL_ENDPOINT ).param("handleId", - publicItem.getHandle())) + getClient(token).perform(get(METADATABITSTREAM_ENDPOINT + "/" + publicItem.getID() + + "/" + ALL_ZIP_PATH).param(HANDLE_PARAM, publicItem.getHandle())) .andExpect(status().isOk()) .andExpect(content().bytes(byteArrayOutputStream.toByteArray())); } - - } From 2f8a81d9095093f091f82633bec0152f68323408 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Wed, 15 Nov 2023 12:55:58 +0100 Subject: [PATCH 02/19] ufal/be-cannot-download-and-preview-files-after-migration (#454) * Find bitstream format using mimetype not ID. * Updated tests according to fix. --- .../rest/ClarinBitstreamImportController.java | 7 +++-- .../ClarinBitstreamImportControllerIT.java | 26 +++++++++---------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinBitstreamImportController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinBitstreamImportController.java index 97ac1494d160..63380a756c2f 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinBitstreamImportController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinBitstreamImportController.java @@ -127,11 +127,10 @@ public BitstreamRest importBitstreamForExistingFile(HttpServletRequest request) log.info("SequenceId is null. Bitstream UUID: " + bitstream.getID()); } //add bitstream format - String bitstreamFormatIdString = request.getParameter("bitstreamFormat"); - Integer bitstreamFormatId = getIntegerFromString(bitstreamFormatIdString); + String bitstreamFormatMimeType = request.getParameter("bitstreamFormat"); BitstreamFormat bitstreamFormat = null; - if (!Objects.isNull(bitstreamFormatId)) { - bitstreamFormat = bitstreamFormatService.find(context, bitstreamFormatId); + if (StringUtils.isNotBlank(bitstreamFormatMimeType)) { + bitstreamFormat = bitstreamFormatService.findByMIMEType(context, bitstreamFormatMimeType); } bitstream.setFormat(context, bitstreamFormat); String deletedString = request.getParameter("deleted"); diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinBitstreamImportControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinBitstreamImportControllerIT.java index e16260565dc4..9e0c0f339a7c 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinBitstreamImportControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinBitstreamImportControllerIT.java @@ -95,7 +95,7 @@ public void setup() throws Exception { .withName("TESTINGBUNDLE") .build(); token = getAuthToken(admin.getEmail(), password); - bitstreamFormat = BitstreamFormatBuilder.createBitstreamFormat(context).build(); + bitstreamFormat = BitstreamFormatBuilder.createBitstreamFormat(context).withMimeType("application/pdf").build(); String input = "Hello, World!"; MockMultipartFile file = new MockMultipartFile("file", "hello.txt", MediaType.TEXT_PLAIN_VALUE, @@ -147,7 +147,7 @@ public void importBitstreamForExistingFileWithBundleTest() throws Exception { .contentType(contentType) .param("internal_id", internalId) .param("storeNumber", Integer.toString(storeNumber)) - .param("bitstreamFormat", Integer.toString(bitstreamFormat.getID())) + .param("bitstreamFormat", bitstreamFormat.getMIMEType()) .param("deleted", Boolean.toString(deleted)) .param("sequenceId", Integer.toString(sequence)) .param("primaryBundle_id", "") @@ -156,8 +156,8 @@ public void importBitstreamForExistingFileWithBundleTest() throws Exception { .andReturn().getResponse().getContentAsString(), "$.id")); - checkCreatedBitstream(uuid, internalId, storeNumber, bitstreamFormat.getID(), sequence, deleted, sizeBytes, - checkSum); + checkCreatedBitstream(uuid, internalId, storeNumber, bitstreamFormat.getMIMEType(), sequence, deleted, + sizeBytes, checkSum); //clean all context.turnOffAuthorisationSystem(); @@ -182,7 +182,7 @@ public void importBitstreamForExistingFileWithoutBundleTest() throws Exception { .contentType(contentType) .param("internal_id", internalId) .param("storeNumber", Integer.toString(storeNumber)) - .param("bitstreamFormat", Integer.toString(bitstreamFormat.getID())) + .param("bitstreamFormat", bitstreamFormat.getMIMEType()) .param("deleted", Boolean.toString(deleted)) .param("sequenceId", Integer.toString(sequence)) .param("primaryBundle_id", "") @@ -191,8 +191,8 @@ public void importBitstreamForExistingFileWithoutBundleTest() throws Exception { .andReturn().getResponse().getContentAsString(), "$.id")); - checkCreatedBitstream(uuid, internalId, storeNumber, bitstreamFormat.getID(), sequence, deleted, sizeBytes, - checkSum); + checkCreatedBitstream(uuid, internalId, storeNumber, bitstreamFormat.getMIMEType(), sequence, deleted, + sizeBytes, checkSum); //clean all context.turnOffAuthorisationSystem(); @@ -217,7 +217,7 @@ public void importBitstreamForExistingFileAsPrimaryBitstreamOfBundleTest() throw .contentType(contentType) .param("internal_id", internalId) .param("storeNumber", Integer.toString(storeNumber)) - .param("bitstreamFormat", Integer.toString(bitstreamFormat.getID())) + .param("bitstreamFormat", bitstreamFormat.getMIMEType()) .param("deleted", Boolean.toString(deleted)) .param("sequenceId", Integer.toString(sequence)) .param("primaryBundle_id", bundle.getID().toString()) @@ -226,8 +226,8 @@ public void importBitstreamForExistingFileAsPrimaryBitstreamOfBundleTest() throw .andReturn().getResponse().getContentAsString(), "$.id")); - checkCreatedBitstream(uuid, internalId, storeNumber, bitstreamFormat.getID(), sequence, deleted, sizeBytes, - checkSum); + checkCreatedBitstream(uuid, internalId, storeNumber, bitstreamFormat.getMIMEType(), sequence, deleted, + sizeBytes, checkSum); bundle = bundleService.find(context, bundle.getID()); assertEquals(bundle.getPrimaryBitstream().getID(), bitstream.getID()); @@ -257,7 +257,7 @@ public void importBitstreamForExistingFileValidationErrorTest() throws Exception .contentType(contentType) .param("internal_id", internalId) .param("storeNumber", Integer.toString(storeNumber)) - .param("bitstreamFormat", Integer.toString(bitstreamFormat.getID())) + .param("bitstreamFormat", bitstreamFormat.getMIMEType()) .param("deleted", Boolean.toString(deleted)) .param("sequenceId", Integer.toString(sequence)) .param("primaryBundle_id", "") @@ -271,12 +271,12 @@ public void importBitstreamForExistingFileValidationErrorTest() throws Exception } private void checkCreatedBitstream(UUID uuid, String internalId, int storeNumber, - Integer bitstreamFormat, int sequence, boolean deleted, long sizeBytes, + String bitstreamFormat, int sequence, boolean deleted, long sizeBytes, String checkSum) throws SQLException { bitstream = bitstreamService.find(context, uuid); assertEquals(bitstream.getChecksum(), checkSum); assertEquals(bitstream.getSizeBytes(), sizeBytes); - assertEquals(bitstream.getFormat(context).getID(), bitstreamFormat); + assertEquals(bitstream.getFormat(context).getMIMEType(), bitstreamFormat); assertEquals(bitstream.getInternalId(), internalId); assertEquals(bitstream.getStoreNumber(), storeNumber); assertEquals(bitstream.getSequenceID(), sequence); From 7d1fdb868d689f29c6163fbb28218082f073b150 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Thu, 23 Nov 2023 10:01:16 +0100 Subject: [PATCH 03/19] ufal/update-canonical-prefix-to-hdl (#460) * Updated `handle.canonical.prefix` to hdl handle. * Integration test cannot have changed handle.canonical.prefix. --- dspace-api/src/test/data/dspaceFolder/config/local.cfg | 2 ++ dspace/config/dspace.cfg | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index 84f89de4398f..c4c61bf77ff6 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -271,3 +271,5 @@ lr.pid.community.configurations = community=*, prefix=123456789, type=local, can #### Authority configuration `authority.cfg` authority.controlled.dc.relation = true + +handle.canonical.prefix = ${dspace.ui.url}/handle/ \ No newline at end of file diff --git a/dspace/config/dspace.cfg b/dspace/config/dspace.cfg index 20e010d72bf2..40ab09083b41 100644 --- a/dspace/config/dspace.cfg +++ b/dspace/config/dspace.cfg @@ -305,7 +305,7 @@ identifier.doi.namespaceseparator = dspace/ # # Items in DSpace receive a unique URL, stored in dc.identifier.uri # after it is generated during the submission process. -handle.canonical.prefix = ${dspace.ui.url}/handle/ +handle.canonical.prefix = http://hdl.handle.net/ # If you register with CNRI's handle service at https://www.handle.net/, # these links can be generated as permalinks using https://hdl.handle.net/ From 29880ed71c5765bfcd1126090570c983bbdd1e47 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Fri, 1 Dec 2023 12:45:40 +0100 Subject: [PATCH 04/19] ufal/be-user-registration-missing (#463) * The user registration is added when the eperson is created by the ui. * try using newer checkout, same as upstream, since error is with git --------- Co-authored-by: MajoBerger <88670521+MajoBerger@users.noreply.github.com> --- .../clarin/ClarinUserRegistration.java | 4 ++ .../repository/EPersonRestRepository.java | 15 +++++++ .../app/rest/EPersonRestRepositoryIT.java | 45 ++++++++++++++++++- 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/main/java/org/dspace/content/clarin/ClarinUserRegistration.java b/dspace-api/src/main/java/org/dspace/content/clarin/ClarinUserRegistration.java index a3e3666a01f4..8c8fd8def9b6 100644 --- a/dspace-api/src/main/java/org/dspace/content/clarin/ClarinUserRegistration.java +++ b/dspace-api/src/main/java/org/dspace/content/clarin/ClarinUserRegistration.java @@ -28,8 +28,12 @@ @Table(name = "user_registration") public class ClarinUserRegistration implements ReloadableEntity { + // Anonymous user public static final String ANONYMOUS_USER_REGISTRATION = "anonymous"; + // Registered user without organization + public static final String UNKNOWN_USER_REGISTRATION = "Unknown"; + private static Logger log = org.apache.logging.log4j.LogManager.getLogger(ClarinUserRegistration.class); @Id diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/EPersonRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/EPersonRestRepository.java index 8dd6ed90b1b0..2d381a6abb55 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/EPersonRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/EPersonRestRepository.java @@ -7,6 +7,8 @@ */ package org.dspace.app.rest.repository; +import static org.dspace.content.clarin.ClarinUserRegistration.UNKNOWN_USER_REGISTRATION; + import java.io.IOException; import java.sql.SQLException; import java.util.Arrays; @@ -35,6 +37,8 @@ import org.dspace.authorize.AuthorizeException; import org.dspace.authorize.service.AuthorizeService; import org.dspace.authorize.service.ValidatePasswordService; +import org.dspace.content.clarin.ClarinUserRegistration; +import org.dspace.content.service.clarin.ClarinUserRegistrationService; import org.dspace.core.Context; import org.dspace.eperson.EPerson; import org.dspace.eperson.EmptyWorkflowGroupException; @@ -79,6 +83,9 @@ public class EPersonRestRepository extends DSpaceObjectRestRepository idRef = new AtomicReference(); AtomicReference idRefNoEmbeds = new AtomicReference(); + AtomicReference idRefUserDataReg = new AtomicReference(); + AtomicReference idRefUserDataFullReg = new AtomicReference(); String authToken = getAuthToken(admin.getEmail(), password); @@ -155,11 +158,51 @@ public void createTest() throws Exception { .andExpect(content().contentType(contentType)) .andExpect(jsonPath("$", HalMatcher.matchNoEmbeds())) .andDo(result -> idRefNoEmbeds - .set(UUID.fromString(read(result.getResponse().getContentAsString(), "$.id"))));; + .set(UUID.fromString(read(result.getResponse().getContentAsString(), "$.id")))); + + // Check that the user registration for test data user has been created + getClient(authToken).perform(get("/api/core/clarinuserregistration/search/byEPerson") + .param("userUUID", String.valueOf(idRef.get())) + .contentType(contentType)) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.page.totalElements", is(1))) + .andExpect(jsonPath( + "$._embedded.clarinuserregistrations[0].id", is(not(empty())))) + .andExpect(jsonPath( + "$._embedded.clarinuserregistrations[0].email", is("createtest@example.com"))) + .andExpect(jsonPath( + "$._embedded.clarinuserregistrations[0].confirmation", is(true))) + .andExpect(jsonPath( + "$._embedded.clarinuserregistrations[0].ePersonID", is(idRef.get().toString()))) + .andDo(result -> idRefUserDataReg + .set(read(result.getResponse().getContentAsString(), + "$._embedded.clarinuserregistrations[0].id"))); + + // Check that the user registration for test data full user has been created + getClient(authToken).perform(get("/api/core/clarinuserregistration/search/byEPerson") + .param("userUUID", String.valueOf(idRefNoEmbeds.get())) + .contentType(contentType)) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.page.totalElements", is(1))) + .andExpect(jsonPath( + "$._embedded.clarinuserregistrations[0].id", is(not(empty())))) + .andExpect(jsonPath( + "$._embedded.clarinuserregistrations[0].email", + is("createtestfull@example.com"))) + .andExpect(jsonPath( + "$._embedded.clarinuserregistrations[0].confirmation", is(true))) + .andExpect(jsonPath( + "$._embedded.clarinuserregistrations[0].ePersonID", + is(idRefNoEmbeds.get().toString()))) + .andDo(result -> idRefUserDataFullReg + .set(read(result.getResponse().getContentAsString(), + "$._embedded.clarinuserregistrations[0].id"))); } finally { EPersonBuilder.deleteEPerson(idRef.get()); EPersonBuilder.deleteEPerson(idRefNoEmbeds.get()); + ClarinUserRegistrationBuilder.deleteClarinUserRegistration(idRefUserDataReg.get()); + ClarinUserRegistrationBuilder.deleteClarinUserRegistration(idRefUserDataFullReg.get()); } } From e2322c5de2cea49940c3df6772530e8300206af1 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Fri, 1 Dec 2023 12:46:41 +0100 Subject: [PATCH 05/19] ufal/be-license-download-statistics (#462) * Add logging of downloading restricted bitstreams. * Log downloading of every bitstream not only restricted ones. * Added docs * Refactored logging. --- .../clarin/ClarinMatomoBitstreamTracker.java | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/dspace-api/src/main/java/org/dspace/app/statistics/clarin/ClarinMatomoBitstreamTracker.java b/dspace-api/src/main/java/org/dspace/app/statistics/clarin/ClarinMatomoBitstreamTracker.java index f3b6dc9ea4c4..08287caa6ce8 100644 --- a/dspace-api/src/main/java/org/dspace/app/statistics/clarin/ClarinMatomoBitstreamTracker.java +++ b/dspace-api/src/main/java/org/dspace/app/statistics/clarin/ClarinMatomoBitstreamTracker.java @@ -8,6 +8,7 @@ package org.dspace.app.statistics.clarin; import java.sql.SQLException; +import java.text.MessageFormat; import java.util.List; import java.util.Objects; import javax.servlet.http.HttpServletRequest; @@ -22,6 +23,7 @@ import org.dspace.content.service.ItemService; import org.dspace.content.service.clarin.ClarinItemService; import org.dspace.core.Context; +import org.dspace.eperson.EPerson; import org.dspace.services.ConfigurationService; import org.dspace.services.factory.DSpaceServicesFactory; import org.matomo.java.tracking.CustomVariable; @@ -132,6 +134,24 @@ public void trackBitstreamDownload(Context context, HttpServletRequest request, return; } + // Log the user which is downloading the bitstream + this.logUserDownloadingBitstream(context, bit); + // Track the bitstream downloading event trackPage(context, request, item, "Bitstream Download / Single File"); } + + /** + * Log the user which is downloading the bitstream + * @param context DSpace context object + * @param bit Bitstream which is downloading + */ + private void logUserDownloadingBitstream(Context context, Bitstream bit) { + EPerson eperson = context.getCurrentUser(); + String pattern = "The user name: {0}, uuid: {1} is downloading bitstream name: {2}, uuid: {3}."; + String logMessage = Objects.isNull(eperson) + ? MessageFormat.format(pattern, "ANONYMOUS", "null", bit.getName(), bit.getID()) + : MessageFormat.format(pattern, eperson.getFullName(), eperson.getID(), bit.getName(), bit.getID()); + + log.info(logMessage); + } } From c01af03e588ec447bc0fd10b21391cbb1f52e9ea Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Wed, 6 Dec 2023 07:30:58 +0100 Subject: [PATCH 06/19] ufal/be-shibboleth-headers-encoding (#464) * givenname and last name values are converted to UTF-8 encoding * Delete eperson in tests --- .../clarin/ClarinShibAuthentication.java | 8 +-- .../dspace/authenticate/clarin/Headers.java | 58 +++++++++++++++--- .../ClarinShibbolethLoginFilterIT.java | 61 +++++++++++++++++-- dspace/config/clarin-dspace.cfg | 4 ++ 4 files changed, 115 insertions(+), 16 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/authenticate/clarin/ClarinShibAuthentication.java b/dspace-api/src/main/java/org/dspace/authenticate/clarin/ClarinShibAuthentication.java index a10c291ff10f..d553d67308c6 100644 --- a/dspace-api/src/main/java/org/dspace/authenticate/clarin/ClarinShibAuthentication.java +++ b/dspace-api/src/main/java/org/dspace/authenticate/clarin/ClarinShibAuthentication.java @@ -695,8 +695,8 @@ protected EPerson registerNewEPerson(Context context, HttpServletRequest request // Header values String netid = Util.formatNetId(findSingleAttribute(request, netidHeader), org); String email = findSingleAttribute(request, emailHeader); - String fname = findSingleAttribute(request, fnameHeader); - String lname = findSingleAttribute(request, lnameHeader); + String fname = Headers.updateValueByCharset(findSingleAttribute(request, fnameHeader)); + String lname = Headers.updateValueByCharset(findSingleAttribute(request, lnameHeader)); // If the values are not in the request headers try to retrieve it from `shibheaders`. if (StringUtils.isEmpty(netid)) { @@ -817,8 +817,8 @@ protected void updateEPerson(Context context, HttpServletRequest request, EPerso String netid = Util.formatNetId(findSingleAttribute(request, netidHeader), shibheaders.get_idp()); String email = findSingleAttribute(request, emailHeader); - String fname = findSingleAttribute(request, fnameHeader); - String lname = findSingleAttribute(request, lnameHeader); + String fname = Headers.updateValueByCharset(findSingleAttribute(request, fnameHeader)); + String lname = Headers.updateValueByCharset(findSingleAttribute(request, lnameHeader)); // If the values are not in the request headers try to retrieve it from `shibheaders`. if (StringUtils.isEmpty(netid)) { diff --git a/dspace-api/src/main/java/org/dspace/authenticate/clarin/Headers.java b/dspace-api/src/main/java/org/dspace/authenticate/clarin/Headers.java index e683ac2e140d..3305661d194f 100644 --- a/dspace-api/src/main/java/org/dspace/authenticate/clarin/Headers.java +++ b/dspace-api/src/main/java/org/dspace/authenticate/clarin/Headers.java @@ -9,6 +9,7 @@ package org.dspace.authenticate.clarin; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Enumeration; import java.util.HashMap; @@ -16,8 +17,11 @@ import java.util.Map; import javax.servlet.http.HttpServletRequest; +import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.dspace.services.ConfigurationService; +import org.dspace.utils.DSpace; /** * Helper class for request headers. @@ -29,9 +33,11 @@ public class Headers { private static final Logger log = LogManager.getLogger(org.dspace.authenticate.clarin.Headers.class); // variables // + private static ConfigurationService configurationService = new DSpace().getConfigurationService(); private HashMap> headers_ = new HashMap>(); private String header_separator_ = null; + private static String EMPTY_STRING = ""; // ctors @@ -157,17 +163,53 @@ private List header2values(String header) { /** - * Convert ISO header value to UTF-8 - * @param value ISO header value String - * @return + * Convert ISO header value to UTF-8 or return UTF-8 value if it is not ISO. + * @param value ISO/UTF-8 header value String + * @return Converted ISO value to UTF-8 or UTF-8 value from input */ - private String updateValueByCharset(String value) { + public static String updateValueByCharset(String value) { + String inputEncoding = configurationService.getProperty("shibboleth.name.conversion.inputEncoding", + "ISO-8859-1"); + String outputEncoding = configurationService.getProperty("shibboleth.name.conversion.outputEncoding", + "UTF-8"); + + if (StringUtils.isBlank(value)) { + value = EMPTY_STRING; + } + + // If the value is not ISO-8859-1, then it is already UTF-8 + if (!isISOType(value)) { + return value; + } + try { - return new String(value.getBytes("ISO-8859-1"), "UTF-8"); + // Encode the string to UTF-8 + return new String(value.getBytes(inputEncoding), outputEncoding); } catch (UnsupportedEncodingException ex) { - log.warn("Failed to reconvert shibboleth attribute with value (" - + value + ").", ex); + log.warn("Cannot convert the value: " + value + " from " + inputEncoding + " to " + outputEncoding + + " because of: " + ex.getMessage()); + return value; + } + } + + /** + * Check if the value is ISO-8859-1 encoded. + * @param value String to check + * @return true if the value is ISO-8859-1 encoded, false otherwise + */ + private static boolean isISOType(String value) { + try { + // Encode the string to ISO-8859-1 + byte[] iso8859Bytes = value.getBytes(StandardCharsets.ISO_8859_1); + + // Decode the bytes back to a string using ISO-8859-1 + String decodedString = new String(iso8859Bytes, StandardCharsets.ISO_8859_1); + + // Compare the original string with the decoded string + return StringUtils.equals(value, decodedString); + } catch (Exception e) { + // An exception occurred, so the input is not ISO-8859-1 + return false; } - return value; } } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/security/ClarinShibbolethLoginFilterIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/security/ClarinShibbolethLoginFilterIT.java index ba793b45a9c9..8b62e95bed79 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/security/ClarinShibbolethLoginFilterIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/security/ClarinShibbolethLoginFilterIT.java @@ -446,15 +446,18 @@ public void testRedirectToGivenUntrustedUrl() throws Exception { } @Test - public void testUTF8ShibHeaders() throws Exception { + public void testISOShibHeaders() throws Exception { + String testMail = "test@email.edu"; + String testIdp = IDP_TEST_EPERSON + "test"; + String testNetId = NET_ID_TEST_EPERSON + "000"; // NOTE: The initial call to /shibboleth comes *from* an external Shibboleth site. So, it is always // unauthenticated, but it must include some expected SHIB attributes. // SHIB-MAIL attribute is the default email header sent from Shibboleth after a successful login. // In this test we are simply mocking that behavior by setting it to an existing EPerson. String token = getClient().perform(get("/api/authn/shibboleth") - .header("SHIB-MAIL", clarinEperson.getEmail()) - .header("Shib-Identity-Provider", IDP_TEST_EPERSON) - .header("SHIB-NETID", NET_ID_TEST_EPERSON) + .header("SHIB-MAIL", testMail) + .header("Shib-Identity-Provider", testIdp) + .header("SHIB-NETID", testNetId) .header("SHIB-GIVENNAME", "knihovna KůÅ\u0088 test ŽluÅ¥ouÄ\u008Dký")) .andExpect(status().is3xxRedirection()) .andExpect(redirectedUrl("http://localhost:4000")) @@ -466,6 +469,56 @@ public void testUTF8ShibHeaders() throws Exception { .andExpect(jsonPath("$.authenticated", is(true))) .andExpect(jsonPath("$.authenticationMethod", is("shibboleth"))); + // Check if was created a user with such email and netid. + EPerson ePerson = ePersonService.findByNetid(context, Util.formatNetId(testNetId, testIdp)); + assertTrue(Objects.nonNull(ePerson)); + assertEquals(ePerson.getEmail(), testMail); + assertEquals(ePerson.getFirstName(), "knihovna Kůň test Žluťoučký"); + + EPersonBuilder.deleteEPerson(ePerson.getID()); + + getClient(token).perform( + get("/api/authz/authorizations/search/object") + .param("embed", "feature") + .param("feature", feature) + .param("uri", utils.linkToSingleResource(ePersonRest, "self").getHref())) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.page.totalElements", is(0))) + .andExpect(jsonPath("$._embedded").doesNotExist()); + } + + @Test + public void testUTF8ShibHeaders() throws Exception { + String testMail = "test@email.edu"; + String testIdp = IDP_TEST_EPERSON + "test"; + String testNetId = NET_ID_TEST_EPERSON + "000"; + // NOTE: The initial call to /shibboleth comes *from* an external Shibboleth site. So, it is always + // unauthenticated, but it must include some expected SHIB attributes. + // SHIB-MAIL attribute is the default email header sent from Shibboleth after a successful login. + // In this test we are simply mocking that behavior by setting it to an existing EPerson. + String token = getClient().perform(get("/api/authn/shibboleth") + .header("SHIB-MAIL", testMail) + .header("Shib-Identity-Provider", testIdp) + .header("SHIB-NETID", testNetId) + .header("SHIB-GIVENNAME", "knihovna Kůň test Žluťoučký")) + .andExpect(status().is3xxRedirection()) + .andExpect(redirectedUrl("http://localhost:4000")) + .andReturn().getResponse().getHeader("Authorization"); + + + getClient(token).perform(get("/api/authn/status")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.authenticated", is(true))) + .andExpect(jsonPath("$.authenticationMethod", is("shibboleth"))); + + // Check if was created a user with such email and netid. + EPerson ePerson = ePersonService.findByNetid(context, Util.formatNetId(testNetId, testIdp)); + assertTrue(Objects.nonNull(ePerson)); + assertEquals(ePerson.getEmail(), testMail); + assertEquals(ePerson.getFirstName(), "knihovna Kůň test Žluťoučký"); + + EPersonBuilder.deleteEPerson(ePerson.getID()); + getClient(token).perform( get("/api/authz/authorizations/search/object") .param("embed", "feature") diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index 2c50811ec00a..3d317c77b41a 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -224,3 +224,7 @@ themed.by.company.name = dataquest s.r.o. #### Authority configuration `authority.cfg` ## dc.relation authority is configured only because of correct item importing, but it is not used anymore. authority.controlled.dc.relation = true + +#nameConversion +shibboleth.name.conversion.inputEncoding = ISO-8859-1 +shibboleth.name.conversion.outputEncoding = UTF-8 From 6eb3726bf52f7de73a995899bc6769239869aac1 Mon Sep 17 00:00:00 2001 From: MajoBerger Date: Wed, 6 Dec 2023 09:51:12 +0100 Subject: [PATCH 07/19] using our dspace-dependencies because of https://github.com/dataquest-dev/DSpace/issues/466 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 7a1a3fcff6ae..79cb24038666 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ ARG JDK_VERSION=11 # Step 1 - Run Maven Build -FROM dspace/dspace-dependencies:dspace-7_x as build +FROM dataquest/dspace-dependencies:dspace-7_x as build ARG TARGET_DIR=dspace-installer WORKDIR /app # The dspace-installer directory will be written to /install From 4b9e7fc07d285cad4ce064e770350e94cd09b453 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Wed, 13 Dec 2023 14:18:52 +0100 Subject: [PATCH 08/19] ufal/be-fix-email-parameters Fixed emails sending - added arguments and cfg properties that are sent in the email. (#470) --- .../ClarinAutoRegistrationController.java | 12 ++++++- .../ClarinUserMetadataRestController.java | 18 ++++++++-- dspace/config/clarin-dspace.cfg | 6 ++++ dspace/config/emails/clarin_autoregistration | 35 +++++++++++++------ dspace/config/emails/clarin_download_link | 23 +++++++----- 5 files changed, 71 insertions(+), 23 deletions(-) diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinAutoRegistrationController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinAutoRegistrationController.java index 31e234ec2c0c..de2dccb9e866 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinAutoRegistrationController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinAutoRegistrationController.java @@ -84,12 +84,17 @@ public ResponseEntity sendEmail(HttpServletRequest request, HttpServletResponse return null; } + // Fetch DSpace main cfg info and send it in the email String uiUrl = configurationService.getProperty("dspace.ui.url"); + String helpDeskEmail = configurationService.getProperty("lr.help.mail", ""); + String helpDeskPhoneNum = configurationService.getProperty("lr.help.phone", ""); + String dspaceName = configurationService.getProperty("dspace.name", ""); + String dspaceNameShort = configurationService.getProperty("dspace.name.short", ""); + if (StringUtils.isEmpty(uiUrl)) { log.error("Cannot load the `dspace.ui.url` property from the cfg."); throw new RuntimeException("Cannot load the `dspace.ui.url` property from the cfg."); } - // Generate token and create ClarinVerificationToken record with the token and user email. String verificationToken = Utils.generateHexKey(); clarinVerificationToken.setEmail(email); @@ -103,6 +108,11 @@ public ResponseEntity sendEmail(HttpServletRequest request, HttpServletResponse Locale locale = context.getCurrentLocale(); Email bean = Email.getEmail(I18nUtil.getEmailFilename(locale, "clarin_autoregistration")); bean.addArgument(autoregistrationURL); + bean.addArgument(helpDeskEmail); + bean.addArgument(helpDeskPhoneNum); + bean.addArgument(dspaceNameShort); + bean.addArgument(dspaceName); + bean.addArgument(uiUrl); bean.addRecipient(email); bean.send(); } catch (Exception e) { diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ClarinUserMetadataRestController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ClarinUserMetadataRestController.java index 170e85bd6035..e726c600ce80 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ClarinUserMetadataRestController.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/ClarinUserMetadataRestController.java @@ -173,9 +173,18 @@ private void sendEmailWithDownloadLink(Context context, UUID bitstreamUUID, throw new BadRequestException("Cannot find the clarin license for the bitstream with ID: " + bitstreamUUID); } + // Fetch DSpace main cfg info and send it in the email + String uiUrl = configurationService.getProperty("dspace.ui.url", ""); + String helpDeskEmail = configurationService.getProperty("lr.help.mail", ""); + String helpDeskPhoneNum = configurationService.getProperty("lr.help.phone", ""); + String dspaceName = configurationService.getProperty("dspace.name", ""); + String dspaceNameShort = configurationService.getProperty("dspace.name.short", ""); + + if (StringUtils.isEmpty(uiUrl)) { + log.error("Cannot load the `dspace.ui.url` property from the cfg."); + throw new RuntimeException("Cannot load the `dspace.ui.url` property from the cfg."); + } // Compose download link - // Get UI url - String uiUrl = configurationService.getProperty("dspace.ui.url"); String downloadLink = uiUrl + "/" + BitstreamRest.PLURAL_NAME + "/" + bitstream.getID() + "/download?dtoken=" + downloadToken; @@ -185,6 +194,11 @@ private void sendEmailWithDownloadLink(Context context, UUID bitstreamUUID, bean.addArgument(bitstream.getName()); bean.addArgument(downloadLink); bean.addArgument(clarinLicense.getDefinition()); + bean.addArgument(helpDeskEmail); + bean.addArgument(helpDeskPhoneNum); + bean.addArgument(dspaceNameShort); + bean.addArgument(dspaceName); + bean.addArgument(uiUrl); bean.addRecipient(email); bean.send(); } catch (MessagingException e) { diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index 3d317c77b41a..e51dca06ba8e 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -2,6 +2,12 @@ # one day similar to this # https://github.com/ufal/clarin-dspace/blob/clarin/utilities/project_helpers/config/local.conf.dist +#------------------------------------------------------------------# +#---------------------------DSpace---------------------------------# +#------------------------------------------------------------------# +dspace.name.short = DSpace +dspace.name = CLARIN DSpace + #------------------------------------------------------------------# #---------------------------UPLOAD FILE----------------------------# #------------------------------------------------------------------# diff --git a/dspace/config/emails/clarin_autoregistration b/dspace/config/emails/clarin_autoregistration index f39770cfd258..c05ba67d9e78 100644 --- a/dspace/config/emails/clarin_autoregistration +++ b/dspace/config/emails/clarin_autoregistration @@ -1,14 +1,27 @@ -## E-mail sent to confirm authenticity of the user's e-mail. -## -## Parameters: {0} confirmation e-mail -## -## See org.dspace.core.Email for information on the format of this file. -## +# E-mail sent to confirm authenticity of the user's e-mail. +# +# Parameters: {0} confirmation e-mail +# {1} helpdesk email +# {2} helpdesk phone number +# {3} DSpace name short +# {4} DSpace name +# {5} DSpace UI url +# +# See org.dspace.core.Email for information on the format of this file. +# +Subject: ${params[3]}: Account Registration +To complete registration for a ${params[3]} repository account at {params[5]}, please click the link below: -Confirmation e-mail: ${params[0]} + ${params[0]} + +If you need assistance with your account, please email +${params[1]} or call us at ${params[2]} + + +${params[3]} Team _____________________________________ -${dspace.name}, -WWW: ${dspace.url} -Email: ${lr.help.mail} -Tel.: ${lr.help.phone} +${params[4]}, +WWW: ${params[5]} +Email: ${params[1]} +Tel.: ${params[2]} diff --git a/dspace/config/emails/clarin_download_link b/dspace/config/emails/clarin_download_link index d44e18cca7c0..bf0bb380f3b5 100644 --- a/dspace/config/emails/clarin_download_link +++ b/dspace/config/emails/clarin_download_link @@ -3,24 +3,29 @@ # Parameters: {0} is expanded to filename # {1} to a download URL # {2} to license url +# {3} helpdesk email +# {4} helpdesk phone number +# {5} DSpace name short +# {6} DSpace name +# {7} DSpace UI url # # See org.dspace.core.Email for information on the format of this file. # -Subject: ${lr.dspace.name.short}: Download instructions for {0} +Subject: ${params[5]}: Download instructions for ${params[0]} To download the file you have requested, please click the link below: - {1} + ${params[1]} Remember, the file is distributed under specific license: - {2} + ${params[2]} If you have trouble downloading the file or if you did not request this download please contact -${lr.help.mail} or call us at ${lr.help.phone} +${params[3]} or call us at ${params[4]} -${lr.dspace.name.short} Team +${params[5]} Team _____________________________________ -${dspace.name}, -WWW: ${dspace.url} -Email: ${lr.help.mail} -Tel.: ${lr.help.phone} \ No newline at end of file +${params[6]}, +WWW: ${params[7]} +Email: ${params[3]} +Tel.: ${params[4]} \ No newline at end of file From 989e6e81c1cb905e8da93560b2308830cf1215ef Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Wed, 13 Dec 2023 14:19:44 +0100 Subject: [PATCH 09/19] ufal/be-provenance-subbmitter-missing (#469) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * #8585 Add submitter information to provenance metadata * Cherry picked fix from vanilla and added test to check if the Item provenance metadata is added --------- Co-authored-by: Adán Román Ruiz --- .../xmlworkflow/XmlWorkflowServiceImpl.java | 25 ++++--- .../ClarinWorkflowItemRestRepositoryIT.java | 70 +++++++++++++++++++ 2 files changed, 86 insertions(+), 9 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/xmlworkflow/XmlWorkflowServiceImpl.java b/dspace-api/src/main/java/org/dspace/xmlworkflow/XmlWorkflowServiceImpl.java index da7910da29f2..51292fd4773a 100644 --- a/dspace-api/src/main/java/org/dspace/xmlworkflow/XmlWorkflowServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/xmlworkflow/XmlWorkflowServiceImpl.java @@ -221,6 +221,8 @@ public XmlWorkflowItem start(Context context, WorkspaceItem wsi) //Get our next step, if none is found, archive our item firstStep = wf.getNextStep(context, wfi, firstStep, ActionResult.OUTCOME_COMPLETE); if (firstStep == null) { + // record the submitted provenance message + recordStart(context, wfi.getItem(),null); archive(context, wfi); } else { activateFirstStep(context, wf, firstStep, wfi); @@ -334,7 +336,7 @@ protected void activateFirstStep(Context context, Workflow wf, Step firstStep, X + "item_id=" + wfi.getItem().getID() + "collection_id=" + wfi.getCollection().getID())); - // record the start of the workflow w/provenance message +// record the start of the workflow w/provenance message recordStart(context, wfi.getItem(), firstActionConfig.getProcessingAction()); //Fire an event ! @@ -1187,25 +1189,30 @@ protected void recordStart(Context context, Item myitem, Action action) DCDate now = DCDate.getCurrent(); // Create provenance description - String provmessage = ""; + StringBuffer provmessage = new StringBuffer(); if (myitem.getSubmitter() != null) { - provmessage = "Submitted by " + myitem.getSubmitter().getFullName() - + " (" + myitem.getSubmitter().getEmail() + ") on " - + now.toString() + " workflow start=" + action.getProvenanceStartId() + "\n"; + provmessage.append("Submitted by ").append(myitem.getSubmitter().getFullName()) + .append(" (").append(myitem.getSubmitter().getEmail()).append(") on ") + .append(now.toString()); } else { // else, null submitter - provmessage = "Submitted by unknown (probably automated) on" - + now.toString() + " workflow start=" + action.getProvenanceStartId() + "\n"; + provmessage.append("Submitted by unknown (probably automated) on") + .append(now.toString()); + } + if (action != null) { + provmessage.append(" workflow start=").append(action.getProvenanceStartId()).append("\n"); + } else { + provmessage.append("\n"); } // add sizes and checksums of bitstreams - provmessage += installItemService.getBitstreamProvenanceMessage(context, myitem); + provmessage.append(installItemService.getBitstreamProvenanceMessage(context, myitem)); // Add message to the DC itemService .addMetadata(context, myitem, MetadataSchemaEnum.DC.getName(), - "description", "provenance", "en", provmessage); + "description", "provenance", "en", provmessage.toString()); itemService.update(context, myitem); } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinWorkflowItemRestRepositoryIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinWorkflowItemRestRepositoryIT.java index 2181bedcf08c..20ec8dc38f24 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinWorkflowItemRestRepositoryIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinWorkflowItemRestRepositoryIT.java @@ -10,13 +10,16 @@ import static com.jayway.jsonpath.JsonPath.read; import static com.jayway.jsonpath.matchers.JsonPathMatchers.hasJsonPath; import static org.dspace.app.rest.matcher.MetadataMatcher.matchMetadata; +import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import static org.junit.Assert.assertFalse; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; +import java.util.List; import java.util.UUID; import java.util.concurrent.atomic.AtomicReference; @@ -24,17 +27,22 @@ import org.dspace.app.rest.test.AbstractControllerIntegrationTest; import org.dspace.builder.CollectionBuilder; import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.EPersonBuilder; import org.dspace.builder.ItemBuilder; import org.dspace.builder.VersionBuilder; import org.dspace.builder.WorkspaceItemBuilder; import org.dspace.content.Collection; import org.dspace.content.Item; +import org.dspace.content.MetadataValue; +import org.dspace.content.WorkspaceItem; import org.dspace.content.service.ItemService; import org.dspace.content.service.WorkspaceItemService; +import org.dspace.eperson.EPerson; import org.dspace.license.service.CreativeCommonsService; import org.dspace.services.ConfigurationService; import org.dspace.xmlworkflow.factory.XmlWorkflowFactory; import org.dspace.xmlworkflow.storedcomponents.service.CollectionRoleService; +import org.dspace.xmlworkflow.storedcomponents.service.XmlWorkflowItemService; import org.hamcrest.Matchers; import org.junit.Assert; import org.junit.Before; @@ -64,6 +72,8 @@ public class ClarinWorkflowItemRestRepositoryIT extends AbstractControllerIntegr @Autowired private CreativeCommonsService creativeCommonsService; + @Autowired + private XmlWorkflowItemService xmlWorkflowItemService; @Autowired private ItemService itemService; @@ -250,4 +260,64 @@ public void shouldAddNewHandleToItemMetadata() throws Exception { WorkspaceItemBuilder.deleteWorkspaceItem(idWorkspaceItemRef.get()); } } + + + @Test + public void shouldCreateProvenanceMessageOnItemSubmit() throws Exception { + context.turnOffAuthorisationSystem(); + + //** GIVEN ** + //1. A community with one collection. + parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Parent Community") + .build(); + + //2. create a normal user to use as submitter + EPerson submitter = EPersonBuilder.createEPerson(context) + .withEmail("submitter@example.com") + .withPassword("dspace") + .build(); + + // Submitter group - allow deposit a new item without workflow + Collection col1 = CollectionBuilder.createCollection(context, parentCommunity) + .withName("Collection 2") + .withSubmitterGroup(submitter) + .build(); + context.setCurrentUser(submitter); + + //3. a workspace item + WorkspaceItem wsitem = WorkspaceItemBuilder.createWorkspaceItem(context, col1) + .withTitle("Submission Item") + .withIssueDate("2017-10-17") + .grantLicense() + .build(); + + context.restoreAuthSystemState(); + + // get the submitter auth token + String authToken = getAuthToken(submitter.getEmail(), "dspace"); + + // submit the workspaceitem to start the workflow + getClient(authToken) + .perform(post(BASE_REST_SERVER_URL + "/api/workflow/workflowitems") + .content("/api/submission/workspaceitems/" + wsitem.getID()) + .contentType(textUriContentType)) + .andExpect(status().isCreated()); + + // Load deposited item and check the provenance metadata + Item depositedItem = itemService.find(context, wsitem.getItem().getID()); + List mvList = itemService.getMetadata(depositedItem, "dc", "description", + "provenance", Item.ANY); + assertFalse(mvList.isEmpty()); + + // Check if the provenance contains the submitter info + boolean containsSubmitterProvenance = false; + for (MetadataValue mv: mvList) { + if (mv.getValue().contains("Submitted by " + submitter.getEmail())) { + containsSubmitterProvenance = true; + break; + } + } + assertThat(containsSubmitterProvenance, is(true)); + } } From 85225d27a042d597427d1a165f4b2fed605740e9 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:29:40 +0100 Subject: [PATCH 10/19] ufal/be-get-user-ip-address (#468) * Created endpoint for fetching IP address with tests. * Made the code more understable --- .../app/rest/ClarinUserInfoController.java | 56 +++++++++++++++++++ .../app/rest/ClarinUserInfoControllerIT.java | 34 +++++++++++ 2 files changed, 90 insertions(+) create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinUserInfoController.java create mode 100644 dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinUserInfoControllerIT.java diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinUserInfoController.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinUserInfoController.java new file mode 100644 index 000000000000..fc4bf5f19310 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/ClarinUserInfoController.java @@ -0,0 +1,56 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest; + +import java.io.IOException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.apache.commons.lang3.StringUtils; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; +import org.springframework.web.bind.annotation.RestController; + +/** + * This class is a REST controller that returns information about the client user. + * E.g. the client's IP address. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +@RequestMapping(value = "/api/userinfo") +@RestController +public class ClarinUserInfoController { + + private final ObjectMapper objectMapper = new ObjectMapper(); + /** + * This method returns the client's IP address. + * @param request The HttpServletRequest object. + * @return The client's IP address. + */ + @RequestMapping(method = RequestMethod.GET, path = "/ipaddress") + public ResponseEntity getUserIPAddress(HttpServletRequest request, HttpServletResponse response) + throws IOException { + // Get client's IP address + String ipAddress = request.getRemoteAddr(); + if (StringUtils.isBlank(ipAddress)) { + String errorMessage = "Cannot get user's IP address"; + response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, errorMessage); + return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(errorMessage); + } + + // Create JSON object using Jackson's ObjectNode + ObjectNode jsonObject = objectMapper.createObjectNode(); + jsonObject.put("ipAddress", ipAddress); + + return ResponseEntity.ok().body(jsonObject); + } +} diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinUserInfoControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinUserInfoControllerIT.java new file mode 100644 index 000000000000..be32cc55a656 --- /dev/null +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinUserInfoControllerIT.java @@ -0,0 +1,34 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.content; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +import javax.ws.rs.core.MediaType; + +import org.dspace.app.rest.test.AbstractControllerIntegrationTest; +import org.junit.Test; + +/** + * This class test the REST controller that returns information about the client user. + * E.g. the client's IP address. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +public class ClarinUserInfoControllerIT extends AbstractControllerIntegrationTest { + + @Test + public void getUserIPAddress() throws Exception { + getClient().perform(get("/api/userinfo/ipaddress") + .contentType(MediaType.APPLICATION_JSON)) + .andExpect(status().isOk()) + .andExpect(content().json("{\"ipAddress\":\"127.0.0.1\"}")); + } +} From f85e76ad60759be44f094dd4bb2978bfa1eb7c0b Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:30:01 +0100 Subject: [PATCH 11/19] ufal/publisher-ok-fix (#473) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fixes ufal/clarin-dspace#1096 (#471) * Added publisher filter in the integration test check --------- Co-authored-by: Ondřej Košarko --- .../app/rest/ClarinDiscoveryRestControllerIT.java | 1 + .../dspace/app/rest/matcher/SearchFilterMatcher.java | 11 +++++++++++ dspace/config/spring/api/discovery.xml | 12 ++++++++++++ 3 files changed, 24 insertions(+) diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinDiscoveryRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinDiscoveryRestControllerIT.java index 95051697a462..72b12c7962af 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinDiscoveryRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/ClarinDiscoveryRestControllerIT.java @@ -984,6 +984,7 @@ public void discoverSearchTest() throws Exception { SearchFilterMatcher.authorFilter(), SearchFilterMatcher.subjectFilter(), // SearchFilterMatcher.dateIssuedFilter(), + SearchFilterMatcher.publisherFilter(), SearchFilterMatcher.hasContentInOriginalBundleFilter(), SearchFilterMatcher.hasFileNameInOriginalBundleFilter(), SearchFilterMatcher.hasFileDescriptionInOriginalBundleFilter(), diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/SearchFilterMatcher.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/SearchFilterMatcher.java index 2d38aeb12968..7700d5291249 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/SearchFilterMatcher.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/SearchFilterMatcher.java @@ -195,4 +195,15 @@ public static Matcher clarinItemsTypeFilter() { checkOperators() ); } + + public static Matcher publisherFilter() { + return allOf( + hasJsonPath("$.filter", is("publisher")), + hasJsonPath("$.hasFacets", is(false)), + hasJsonPath("$.type", is("text")), + hasJsonPath("$.openByDefault", is(false)), + checkOperators() + + ); + } } diff --git a/dspace/config/spring/api/discovery.xml b/dspace/config/spring/api/discovery.xml index 9b8b8351fe55..d40242314260 100644 --- a/dspace/config/spring/api/discovery.xml +++ b/dspace/config/spring/api/discovery.xml @@ -200,6 +200,7 @@ + @@ -2488,6 +2489,17 @@ + + + + + dc.publisher + + + + + + From 27abb8aa8deaceea8318109166096cf06eb4f2ec Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Fri, 15 Dec 2023 14:04:21 +0100 Subject: [PATCH 12/19] ufal/zip-preview-configurable (#475) * Made previewing of the file configurable. * The default value of the previewing the file must be true in the integration tests. --- .../test/data/dspaceFolder/config/local.cfg | 6 ++- .../MetadataBitstreamRestRepository.java | 11 ++++++ .../MetadataBitstreamRestRepositoryIT.java | 38 +++++++++++++++++++ dspace/config/clarin-dspace.cfg | 4 ++ 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index c4c61bf77ff6..de6caf8880f7 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -272,4 +272,8 @@ lr.pid.community.configurations = community=*, prefix=123456789, type=local, can #### Authority configuration `authority.cfg` authority.controlled.dc.relation = true -handle.canonical.prefix = ${dspace.ui.url}/handle/ \ No newline at end of file +handle.canonical.prefix = ${dspace.ui.url}/handle/ + +### File preview ### +# File preview is enabled by default +file.preview.enabled = true diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java index 8208a662ce79..d2daabe9a4da 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/MetadataBitstreamRestRepository.java @@ -55,6 +55,7 @@ import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.handle.service.HandleService; +import org.dspace.services.ConfigurationService; import org.dspace.storage.bitstore.service.BitstreamStorageService; import org.dspace.util.FileInfo; import org.dspace.util.FileTreeViewGenerator; @@ -98,6 +99,9 @@ public class MetadataBitstreamRestRepository extends DSpaceRestRepository findByHandle(@Parameter(value = "handle", required = true) String handle, @Parameter(value = "fileGrpType") String fileGrpType, @@ -398,6 +402,13 @@ public String getFileContent(InputStream inputStream) throws IOException { */ private boolean findOutCanPreview(Context context, Bitstream bitstream) throws SQLException, AuthorizeException { try { + // Check it is allowed by configuration + boolean isAllowedByCfg = configurationService.getBooleanProperty("file.preview.enabled", true); + if (!isAllowedByCfg) { + return false; + } + + // Check it is allowed by license authorizeService.authorizeAction(context, bitstream, Constants.READ); return true; } catch (MissingLicenseAgreementException e) { diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/MetadataBitstreamRestRepositoryIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/MetadataBitstreamRestRepositoryIT.java index 94f98998b8d3..59e39285bea1 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/MetadataBitstreamRestRepositoryIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/MetadataBitstreamRestRepositoryIT.java @@ -33,6 +33,7 @@ import org.dspace.content.Item; import org.dspace.content.service.clarin.ClarinLicenseResourceMappingService; import org.dspace.core.Constants; +import org.dspace.services.ConfigurationService; import org.dspace.util.FileTreeViewGenerator; import org.hamcrest.Matchers; import org.junit.Before; @@ -57,6 +58,9 @@ public class MetadataBitstreamRestRepositoryIT extends AbstractControllerIntegra @Autowired AuthorizeService authorizeService; + @Autowired + ConfigurationService configurationService; + @Before public void setup() throws Exception { context.turnOffAuthorisationSystem(); @@ -125,6 +129,40 @@ public void findByHandle() throws Exception { } + @Test + public void previewingIsDisabledByCfg() throws Exception { + boolean canPreview = configurationService.getBooleanProperty("file.preview.enabled", true); + // Disable previewing + configurationService.setProperty("file.preview.enabled", false); + // There is no restriction, so the user could preview the file + getClient().perform(get(METADATABITSTREAM_SEARCH_BY_HANDLE_ENDPOINT) + .param("handle", publicItem.getHandle()) + .param("fileGrpType", FILE_GRP_TYPE)) + .andExpect(status().isOk()) + .andExpect(content().contentType(contentType)) + .andExpect(jsonPath("$._embedded.metadatabitstreams").exists()) + .andExpect(jsonPath("$._embedded.metadatabitstreams").isArray()) + .andExpect(jsonPath("$._embedded.metadatabitstreams[*].name") + .value(Matchers.containsInAnyOrder(Matchers.containsString("Bitstream")))) + .andExpect(jsonPath("$._embedded.metadatabitstreams[*].description") + .value(Matchers.containsInAnyOrder(Matchers.containsString(bts.getFormatDescription(context))))) + .andExpect(jsonPath("$._embedded.metadatabitstreams[*].format") + .value(Matchers.containsInAnyOrder(Matchers.containsString( + bts.getFormat(context).getMIMEType())))) + .andExpect(jsonPath("$._embedded.metadatabitstreams[*].fileSize") + .value(Matchers.containsInAnyOrder(Matchers.containsString( + FileTreeViewGenerator.humanReadableFileSize(bts.getSizeBytes()))))) + .andExpect(jsonPath("$._embedded.metadatabitstreams[*].canPreview") + .value(Matchers.containsInAnyOrder(Matchers.is(false)))) + .andExpect(jsonPath("$._embedded.metadatabitstreams[*].fileInfo").exists()) + .andExpect(jsonPath("$._embedded.metadatabitstreams[*].checksum") + .value(Matchers.containsInAnyOrder(Matchers.containsString(bts.getChecksum())))) + .andExpect(jsonPath("$._embedded.metadatabitstreams[*].href") + .value(Matchers.containsInAnyOrder(Matchers.containsString(url)))); + + configurationService.setProperty("file.preview.enabled", canPreview); + } + @Test public void findByHandleEmptyFileGrpType() throws Exception { getClient().perform(get(METADATABITSTREAM_SEARCH_BY_HANDLE_ENDPOINT) diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index e51dca06ba8e..0064162af6ad 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -234,3 +234,7 @@ authority.controlled.dc.relation = true #nameConversion shibboleth.name.conversion.inputEncoding = ISO-8859-1 shibboleth.name.conversion.outputEncoding = UTF-8 + +### File preview ### +# File preview is enabled by default +file.preview.enabled = false From de08f2612b2acdea7349b983ef7c2f5060c72f0d Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Fri, 15 Dec 2023 14:04:50 +0100 Subject: [PATCH 13/19] ufal/fe-oversized-file-upload-message Exposed max file limit for upload from the cfg. (#477) --- dspace/config/modules/rest.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/dspace/config/modules/rest.cfg b/dspace/config/modules/rest.cfg index 48a543163464..344ce8cecdf0 100644 --- a/dspace/config/modules/rest.cfg +++ b/dspace/config/modules/rest.cfg @@ -61,6 +61,7 @@ rest.properties.exposed = statistics.cache-server.uri rest.properties.exposed = themed.by.url rest.properties.exposed = themed.by.company.name rest.properties.exposed = identifier.doi.resolver +rest.properties.exposed = spring.servlet.multipart.max-file-size # TUL rest.properties.exposed = dspace.ui.url rest.properties.exposed = versioning.item.history.include.submitter From bad4a91501ac98f76490aa36105aba4bdca6da41 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Thu, 21 Dec 2023 09:19:40 +0100 Subject: [PATCH 14/19] ufal/be-email-restricted-download * Fixed comments in the clarin emails (#484) --- dspace/config/emails/clarin_autoregistration | 22 ++++++++--------- dspace/config/emails/clarin_download_link | 26 ++++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/dspace/config/emails/clarin_autoregistration b/dspace/config/emails/clarin_autoregistration index c05ba67d9e78..694151db102c 100644 --- a/dspace/config/emails/clarin_autoregistration +++ b/dspace/config/emails/clarin_autoregistration @@ -1,14 +1,14 @@ -# E-mail sent to confirm authenticity of the user's e-mail. -# -# Parameters: {0} confirmation e-mail -# {1} helpdesk email -# {2} helpdesk phone number -# {3} DSpace name short -# {4} DSpace name -# {5} DSpace UI url -# -# See org.dspace.core.Email for information on the format of this file. -# +## E-mail sent to confirm authenticity of the user's e-mail. +## +## Parameters: {0} confirmation e-mail +## {1} helpdesk email +## {2} helpdesk phone number +## {3} DSpace name short +## {4} DSpace name +## {5} DSpace UI url +## +## See org.dspace.core.Email for information on the format of this file. +## Subject: ${params[3]}: Account Registration To complete registration for a ${params[3]} repository account at {params[5]}, please click the link below: diff --git a/dspace/config/emails/clarin_download_link b/dspace/config/emails/clarin_download_link index bf0bb380f3b5..dfc9e07df4a6 100644 --- a/dspace/config/emails/clarin_download_link +++ b/dspace/config/emails/clarin_download_link @@ -1,16 +1,16 @@ -# E-mail with a download link -# -# Parameters: {0} is expanded to filename -# {1} to a download URL -# {2} to license url -# {3} helpdesk email -# {4} helpdesk phone number -# {5} DSpace name short -# {6} DSpace name -# {7} DSpace UI url -# -# See org.dspace.core.Email for information on the format of this file. -# +## E-mail with a download link +## +## Parameters: {0} is expanded to filename +## {1} to a download URL +## {2} to license url +## {3} helpdesk email +## {4} helpdesk phone number +## {5} DSpace name short +## {6} DSpace name +## {7} DSpace UI url +## +## See org.dspace.core.Email for information on the format of this file. +## Subject: ${params[5]}: Download instructions for ${params[0]} To download the file you have requested, please click the link below: ${params[1]} From 7b4bf52165c3d853a602dc7c21267e2a5a16fa32 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Thu, 21 Dec 2023 12:12:48 +0100 Subject: [PATCH 15/19] ufal/be-not-show-shib-welcome-page (#485) * Added cfg property to enable/disable showing of the page with attributes passed from the idp. --- dspace/config/clarin-dspace.cfg | 4 ++++ dspace/config/modules/rest.cfg | 2 ++ 2 files changed, 6 insertions(+) diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index 0064162af6ad..fe7fd16fbea0 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -111,6 +111,10 @@ authentication-shibboleth.role.ufal.mff.cuni.cz = UFAL # Authorization is assigned to the user in the `EPersonRestAuthenticationProvider` class. authentication-shibboleth.clarin.custom.groups = UFAL,UFAL_MEMBER +# Show attributes which are passed by IdP on the first login +# Default - false +authentication-shibboleth.show.idp-attributes = false + ############### # diff --git a/dspace/config/modules/rest.cfg b/dspace/config/modules/rest.cfg index 344ce8cecdf0..c612e4286a8f 100644 --- a/dspace/config/modules/rest.cfg +++ b/dspace/config/modules/rest.cfg @@ -62,6 +62,8 @@ rest.properties.exposed = themed.by.url rest.properties.exposed = themed.by.company.name rest.properties.exposed = identifier.doi.resolver rest.properties.exposed = spring.servlet.multipart.max-file-size +rest.properties.exposed = authentication-shibboleth.show.idp-attributes + # TUL rest.properties.exposed = dspace.ui.url rest.properties.exposed = versioning.item.history.include.submitter From 3f2ecbb00a4ceca77b09ac83d97e1a4709daa32a Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Thu, 21 Dec 2023 17:30:25 +0100 Subject: [PATCH 16/19] ufal/be-s3-customization (#481) * The bitstream data are stored in to local store after uploading to S3 * The bitstream is removed from the S3 and local assetstore * Store number is specific if all storages are synchronized. * Return store number in the BitstreamRest object * Find out which store is used - it could be synchronized stores number. * Constant is moved to the Bitstream class * Synchronization of storages is not allowed by default - set up it in the test environment. * Added docs * Removed constant from the Bitstream class - it wasn't consistent * Overriden BitstreamStorageServiceImpl by custom SyncBitstreamStorageServiceImpl * Removed ClarinS3BitStoreService.java to SyncS3BitStoreService * Added doc and refactoring. --- .../storage/bitstore/S3BitStoreService.java | 6 +- .../SyncBitstreamStorageServiceImpl.java | 330 ++++++++++++++++++ .../bitstore/SyncS3BitStoreService.java | 101 ++++++ .../test/data/dspaceFolder/config/local.cfg | 3 + .../rest/converter/BitstreamConverter.java | 1 + .../dspace/app/rest/model/BitstreamRest.java | 10 + dspace/config/clarin-dspace.cfg | 4 + dspace/config/spring/api/bitstore.xml | 4 +- 8 files changed, 454 insertions(+), 5 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java create mode 100644 dspace-api/src/main/java/org/dspace/storage/bitstore/SyncS3BitStoreService.java diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index ad6c431aed9e..1ad4c33f8213 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -73,7 +73,7 @@ public class S3BitStoreService extends BaseBitStoreService { /** * Checksum algorithm */ - private static final String CSA = "MD5"; + protected static final String CSA = "MD5"; // These settings control the way an identifier is hashed into // directory and file names @@ -110,13 +110,13 @@ public class S3BitStoreService extends BaseBitStoreService { /** * S3 service */ - private AmazonS3 s3Service = null; + protected AmazonS3 s3Service = null; /** * S3 transfer manager * this is reused between put calls to use less resources for multiple uploads */ - private TransferManager tm = null; + protected TransferManager tm = null; private static final ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java new file mode 100644 index 000000000000..e48487955209 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java @@ -0,0 +1,330 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.storage.bitstore; + +import java.io.IOException; +import java.io.InputStream; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.UUID; +import javax.annotation.Nullable; + +import org.apache.commons.collections4.MapUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.authorize.AuthorizeException; +import org.dspace.content.Bitstream; +import org.dspace.core.Context; +import org.dspace.core.Utils; +import org.dspace.services.ConfigurationService; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * This class is customization of the BitstreamStorageServiceImpl class. + * The bitstream is synchronized if it is stored in both S3 and local assetstore. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +public class SyncBitstreamStorageServiceImpl extends BitstreamStorageServiceImpl { + + /** + * log4j log + */ + private static final Logger log = LogManager.getLogger(); + private boolean syncEnabled = false; + + private static final int SYNCHRONIZED_STORES_NUMBER = 77; + + @Autowired + ConfigurationService configurationService; + + public SyncBitstreamStorageServiceImpl() { + super(); + } + + @Override + public void afterPropertiesSet() throws Exception { + for (Map.Entry storeEntry : getStores().entrySet()) { + if (storeEntry.getValue().isEnabled() && !storeEntry.getValue().isInitialized()) { + storeEntry.getValue().init(); + } + } + this.syncEnabled = configurationService.getBooleanProperty("sync.storage.service.enabled", false); + } + + @Override + public UUID store(Context context, Bitstream bitstream, InputStream is) throws SQLException, IOException { + // Create internal ID + String id = Utils.generateKey(); + /* + * Set the store number of the new bitstream If you want to use some + * other method of working out where to put a new bitstream, here's + * where it should go + */ + if (syncEnabled) { + bitstream.setStoreNumber(SYNCHRONIZED_STORES_NUMBER); + } else { + bitstream.setStoreNumber(getIncoming()); + } + bitstream.setDeleted(true); + bitstream.setInternalId(id); + + + BitStoreService store = this.getStore(getIncoming()); + //For efficiencies sake, PUT is responsible for setting bitstream size_bytes, checksum, and checksum_algorithm + store.put(bitstream, is); + //bitstream.setSizeBytes(file.length()); + //bitstream.setChecksum(Utils.toHex(dis.getMessageDigest().digest())); + //bitstream.setChecksumAlgorithm("MD5"); + + bitstream.setDeleted(false); + try { + //Update our bitstream but turn off the authorization system since permissions haven't been set at this + // point in time. + context.turnOffAuthorisationSystem(); + bitstreamService.update(context, bitstream); + } catch (AuthorizeException e) { + log.error(e); + //Can never happen since we turn off authorization before we update + } finally { + context.restoreAuthSystemState(); + } + + UUID bitstreamId = bitstream.getID(); + + if (log.isDebugEnabled()) { + log.debug("Stored bitstreamID " + bitstreamId); + } + + return bitstreamId; + } + + /** + * Register a bitstream already in storage. + * + * @param context The current context + * @param assetstore The assetstore number for the bitstream to be + * registered + * @param bitstreamPath The relative path of the bitstream to be registered. + * The path is relative to the path of ths assetstore. + * @return The ID of the registered bitstream + * @throws SQLException If a problem occurs accessing the RDBMS + * @throws IOException if IO error + */ + @Override + public UUID register(Context context, Bitstream bitstream, int assetstore, + String bitstreamPath) throws SQLException, IOException, AuthorizeException { + + // mark this bitstream as a registered bitstream + String sInternalId = REGISTERED_FLAG + bitstreamPath; + + // Create a deleted bitstream row, using a separate DB connection + bitstream.setDeleted(true); + bitstream.setInternalId(sInternalId); + if (syncEnabled) { + bitstream.setStoreNumber(SYNCHRONIZED_STORES_NUMBER); + } else { + bitstream.setStoreNumber(assetstore); + } + bitstreamService.update(context, bitstream); + + Map wantedMetadata = new HashMap(); + wantedMetadata.put("size_bytes", null); + wantedMetadata.put("checksum", null); + wantedMetadata.put("checksum_algorithm", null); + + Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); + if (MapUtils.isEmpty(receivedMetadata)) { + String message = "Not able to register bitstream:" + bitstream.getID() + " at path: " + bitstreamPath; + log.error(message); + throw new IOException(message); + } else { + if (receivedMetadata.containsKey("checksum_algorithm")) { + bitstream.setChecksumAlgorithm(receivedMetadata.get("checksum_algorithm").toString()); + } + + if (receivedMetadata.containsKey("checksum")) { + bitstream.setChecksum(receivedMetadata.get("checksum").toString()); + } + + if (receivedMetadata.containsKey("size_bytes")) { + bitstream.setSizeBytes(Long.valueOf(receivedMetadata.get("size_bytes").toString())); + } + } + + bitstream.setDeleted(false); + bitstreamService.update(context, bitstream); + + UUID bitstreamId = bitstream.getID(); + if (log.isDebugEnabled()) { + log.debug("Registered bitstream " + bitstreamId + " at location " + bitstreamPath); + } + return bitstreamId; + } + + @Override + public Map computeChecksum(Context context, Bitstream bitstream) throws IOException { + Map wantedMetadata = new HashMap(); + wantedMetadata.put("checksum", null); + wantedMetadata.put("checksum_algorithm", null); + + int storeNumber = this.whichStoreNumber(bitstream); + Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); + return receivedMetadata; + } + + @Override + public InputStream retrieve(Context context, Bitstream bitstream) + throws SQLException, IOException { + int storeNumber = this.whichStoreNumber(bitstream); + return this.getStore(storeNumber).get(bitstream); + } + + @Override + public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException { + Context context = new Context(Context.Mode.BATCH_EDIT); + int commitCounter = 0; + + try { + context.turnOffAuthorisationSystem(); + + List storage = bitstreamService.findDeletedBitstreams(context); + for (Bitstream bitstream : storage) { + UUID bid = bitstream.getID(); + Map wantedMetadata = new HashMap(); + wantedMetadata.put("size_bytes", null); + wantedMetadata.put("modified", null); + + int storeNumber = this.whichStoreNumber(bitstream); + Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); + + + // Make sure entries which do not exist are removed + if (MapUtils.isEmpty(receivedMetadata)) { + log.debug("bitstore.about is empty, so file is not present"); + if (deleteDbRecords) { + log.debug("deleting record"); + if (verbose) { + System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + } + checksumHistoryService.deleteByBitstream(context, bitstream); + if (verbose) { + System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + } + bitstreamService.expunge(context, bitstream); + } + context.uncacheEntity(bitstream); + continue; + } + + // This is a small chance that this is a file which is + // being stored -- get it next time. + if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { + log.debug("file is recent"); + context.uncacheEntity(bitstream); + continue; + } + + if (deleteDbRecords) { + log.debug("deleting db record"); + if (verbose) { + System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + } + checksumHistoryService.deleteByBitstream(context, bitstream); + if (verbose) { + System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + } + bitstreamService.expunge(context, bitstream); + } + + if (isRegisteredBitstream(bitstream.getInternalId())) { + context.uncacheEntity(bitstream); + continue; // do not delete registered bitstreams + } + + + // Since versioning allows for multiple bitstreams, check if the internal identifier isn't used on + // another place + if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { + this.getStore(storeNumber).remove(bitstream); + + String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); + if (log.isDebugEnabled()) { + log.debug(message); + } + if (verbose) { + System.out.println(message); + } + } + + // Make sure to commit our outstanding work every 100 + // iterations. Otherwise you risk losing the entire transaction + // if we hit an exception, which isn't useful at all for large + // amounts of bitstreams. + commitCounter++; + if (commitCounter % 100 == 0) { + context.dispatchEvents(); + // Commit actual changes to DB after dispatch events + System.out.print("Performing incremental commit to the database..."); + context.commit(); + System.out.println(" Incremental commit done!"); + } + + context.uncacheEntity(bitstream); + } + + System.out.print("Committing changes to the database..."); + context.complete(); + System.out.println(" Done!"); + } catch (SQLException | IOException sqle) { + // Aborting will leave the DB objects around, even if the + // bitstreams are deleted. This is OK; deleting them next + // time around will be a no-op. + if (verbose) { + System.err.println("Error: " + sqle.getMessage()); + } + context.abort(); + throw sqle; + } finally { + context.restoreAuthSystemState(); + } + } + + @Nullable + @Override + public Long getLastModified(Bitstream bitstream) throws IOException { + Map attrs = new HashMap(); + attrs.put("modified", null); + int storeNumber = this.whichStoreNumber(bitstream); + attrs = this.getStore(storeNumber).about(bitstream, attrs); + if (attrs == null || !attrs.containsKey("modified")) { + return null; + } + return Long.valueOf(attrs.get("modified").toString()); + } + + /** + * Decide which store number should be used for the given bitstream. + * If the bitstream is synchronized (stored in to S3 and local), then the static store number is used. + * Otherwise, the bitstream's store number is used. + * + * @param bitstream bitstream + * @return store number + */ + public int whichStoreNumber(Bitstream bitstream) { + if (Objects.equals(bitstream.getStoreNumber(), SYNCHRONIZED_STORES_NUMBER)) { + return getIncoming(); + } else { + return bitstream.getStoreNumber(); + } + } + +} diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncS3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncS3BitStoreService.java new file mode 100644 index 000000000000..cae46a512a56 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncS3BitStoreService.java @@ -0,0 +1,101 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.storage.bitstore; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; + +import com.amazonaws.AmazonClientException; +import com.amazonaws.services.s3.transfer.Upload; +import org.apache.commons.io.FileUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.dspace.content.Bitstream; +import org.dspace.services.ConfigurationService; +import org.springframework.beans.factory.annotation.Autowired; + +/** + * Override of the S3BitStoreService to store all the data also in the local assetstore. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +public class SyncS3BitStoreService extends S3BitStoreService { + + /** + * log4j log + */ + private static final Logger log = LogManager.getLogger(SyncS3BitStoreService.class); + private boolean syncEnabled = false; + + @Autowired(required = true) + DSBitStoreService dsBitStoreService; + + @Autowired(required = true) + ConfigurationService configurationService; + + public SyncS3BitStoreService() { + super(); + } + + public void init() throws IOException { + super.init(); + syncEnabled = configurationService.getBooleanProperty("sync.storage.service.enabled", false); + } + + @Override + public void put(Bitstream bitstream, InputStream in) throws IOException { + String key = getFullKey(bitstream.getInternalId()); + //Copy istream to temp file, and send the file, with some metadata + File scratchFile = File.createTempFile(bitstream.getInternalId(), "s3bs"); + try { + FileUtils.copyInputStreamToFile(in, scratchFile); + long contentLength = scratchFile.length(); + // The ETag may or may not be and MD5 digest of the object data. + // Therefore, we precalculate before uploading + String localChecksum = org.dspace.curate.Utils.checksum(scratchFile, CSA); + + Upload upload = tm.upload(getBucketName(), key, scratchFile); + + upload.waitForUploadResult(); + + bitstream.setSizeBytes(contentLength); + bitstream.setChecksum(localChecksum); + bitstream.setChecksumAlgorithm(CSA); + + if (syncEnabled) { + // Upload file into local assetstore + File localFile = dsBitStoreService.getFile(bitstream); + FileUtils.copyFile(scratchFile, localFile); + } + } catch (AmazonClientException | IOException | InterruptedException e) { + log.error("put(" + bitstream.getInternalId() + ", is)", e); + throw new IOException(e); + } finally { + if (!scratchFile.delete()) { + scratchFile.deleteOnExit(); + } + } + } + + @Override + public void remove(Bitstream bitstream) throws IOException { + String key = getFullKey(bitstream.getInternalId()); + try { + // Remove file from S3 + s3Service.deleteObject(getBucketName(), key); + if (syncEnabled) { + // Remove file from local assetstore + dsBitStoreService.remove(bitstream); + } + } catch (AmazonClientException e) { + log.error("remove(" + key + ")", e); + throw new IOException(e); + } + } +} diff --git a/dspace-api/src/test/data/dspaceFolder/config/local.cfg b/dspace-api/src/test/data/dspaceFolder/config/local.cfg index de6caf8880f7..ce3a6ccce07d 100644 --- a/dspace-api/src/test/data/dspaceFolder/config/local.cfg +++ b/dspace-api/src/test/data/dspaceFolder/config/local.cfg @@ -277,3 +277,6 @@ handle.canonical.prefix = ${dspace.ui.url}/handle/ ### File preview ### # File preview is enabled by default file.preview.enabled = true + +### Storage service ### +sync.storage.service.enabled = false diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamConverter.java index bb5544b3592c..e5d967afc8a4 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamConverter.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamConverter.java @@ -45,6 +45,7 @@ public BitstreamRest convert(org.dspace.content.Bitstream obj, Projection projec checksum.setValue(obj.getChecksum()); b.setCheckSum(checksum); b.setSizeBytes(obj.getSizeBytes()); + b.setStoreNumber(obj.getStoreNumber()); return b; } diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java index 8e9efc2680b7..232d96b044a0 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java @@ -46,6 +46,8 @@ public class BitstreamRest extends DSpaceObjectRest { @JsonProperty(access = Access.READ_ONLY) private Integer sequenceId; + private int storeNumber; + public String getBundleName() { return bundleName; } @@ -78,6 +80,14 @@ public void setSequenceId(Integer sequenceId) { this.sequenceId = sequenceId; } + public int getStoreNumber() { + return storeNumber; + } + + public void setStoreNumber(int storeNumber) { + this.storeNumber = storeNumber; + } + @Override public String getCategory() { return CATEGORY; diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index fe7fd16fbea0..478a22b9bbae 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -242,3 +242,7 @@ shibboleth.name.conversion.outputEncoding = UTF-8 ### File preview ### # File preview is enabled by default file.preview.enabled = false + +### Storage service ### +# Synchronization is NOT enabled by default +sync.storage.service.enabled = true diff --git a/dspace/config/spring/api/bitstore.xml b/dspace/config/spring/api/bitstore.xml index 1cf7d8f68a3c..f02edcbc0807 100644 --- a/dspace/config/spring/api/bitstore.xml +++ b/dspace/config/spring/api/bitstore.xml @@ -3,7 +3,7 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd" default-lazy-init="true"> - + @@ -17,7 +17,7 @@ - + From 51d7a45e72cb3e36db1a0c1a862fc77449465886 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Fri, 22 Dec 2023 09:10:11 +0100 Subject: [PATCH 17/19] ufal/be-s3-checker-sync (#486) * Fixed UserCheck - There was created a wrong select * Fixed ChecksumCheck - the session was closed but there were some requests do the database then --- .../content/dao/impl/CollectionDAOImpl.java | 2 +- .../java/org/dspace/health/ChecksumCheck.java | 46 ++++++++++--------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/CollectionDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/CollectionDAOImpl.java index c0ef6ea42fce..bf89205c6093 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/CollectionDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/CollectionDAOImpl.java @@ -159,7 +159,7 @@ public List findAuthorizedByGroup(Context context, EPerson ePerson, @Override public List findCollectionsWithSubscribers(Context context) throws SQLException { - return list(createQuery(context, "SELECT DISTINCT col FROM Subscription s join s.collection col")); + return list(createQuery(context, "SELECT DISTINCT col FROM Subscription s join s.dSpaceObject col")); } @Override diff --git a/dspace-api/src/main/java/org/dspace/health/ChecksumCheck.java b/dspace-api/src/main/java/org/dspace/health/ChecksumCheck.java index 80e19aef70e5..f1003b4076a6 100644 --- a/dspace-api/src/main/java/org/dspace/health/ChecksumCheck.java +++ b/dspace-api/src/main/java/org/dspace/health/ChecksumCheck.java @@ -18,6 +18,7 @@ import org.dspace.checker.ChecksumResultsCollector; import org.dspace.checker.MostRecentChecksum; import org.dspace.checker.SimpleDispatcher; +import org.dspace.content.Bitstream; import org.dspace.core.Context; /** @@ -40,8 +41,30 @@ public String run(ReportInfo ri) { checker.setReportVerbose(true); try { checker.process(); + if (collector.arr.size() > 0) { + ret = String.format("Checksum performed on [%d] items:\n", + collector.arr.size()); + int ok_items = 0; + for (MostRecentChecksum bi : collector.arr) { + if (!ChecksumResultCode.CHECKSUM_MATCH.equals(bi + .getChecksumResult().getResultCode())) { + Bitstream reloadedBitstream = context.reloadEntity(bi.getBitstream()); + ret += String + .format("md5 checksum FAILED (%s): %s id: %s bitstream-id: %s\n was: %s\n is: %s\n", + bi.getChecksumResult(), reloadedBitstream.getName(), + reloadedBitstream.getInternalId(), reloadedBitstream.getID(), + bi.getExpectedChecksum(), + bi.getCurrentChecksum()); + } else { + ok_items++; + } + } + + ret += String.format("checksum OK for [%d] items\n", ok_items); + } context.complete(); context = null; + return ret; } catch (SQLException e) { error(e); } finally { @@ -49,28 +72,7 @@ public String run(ReportInfo ri) { context.abort(); } } - - if (collector.arr.size() > 0) { - ret = String.format("Checksum performed on [%d] items:\n", - collector.arr.size()); - int ok_items = 0; - for (MostRecentChecksum bi : collector.arr) { - if (!ChecksumResultCode.CHECKSUM_MATCH.equals(bi - .getChecksumResult().getResultCode())) { - ret += String - .format("md5 checksum FAILED (%s): %s id: %s bitstream-id: %s\n was: %s\n is: %s\n", - bi.getChecksumResult(), bi.getBitstream().getName(), - bi.getBitstream().getInternalId(), bi.getBitstream().getID(), - bi.getExpectedChecksum(), - bi.getCurrentChecksum()); - } else { - ok_items++; - } - } - - ret += String.format("checksum OK for [%d] items\n", ok_items); - } - return ret; + return ret; } } From faf9e4222a72a96a7ad5577c53d26e29cc530d65 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Wed, 3 Jan 2024 08:13:53 +0100 Subject: [PATCH 18/19] internal/load-version-from-file (#488) * Load version from the specific file and show content in the root endpoint. * Fixed checkstyle issue * DO not log anything if the VERSION file does not exist. Added VERSION file path into cfg property. * Append every line into final String. --- .../app/rest/converter/RootConverter.java | 39 +++++++++++++++++++ .../org/dspace/app/rest/model/RootRest.java | 12 ++++++ dspace/config/VERSION_D.txt | 0 dspace/config/clarin-dspace.cfg | 4 ++ 4 files changed, 55 insertions(+) create mode 100644 dspace/config/VERSION_D.txt diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/RootConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/RootConverter.java index 61f18a5b3c9c..fc8e996e9cd4 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/RootConverter.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/RootConverter.java @@ -9,8 +9,15 @@ import static org.dspace.app.util.Util.getSourceVersion; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; + +import org.apache.commons.lang3.StringUtils; import org.dspace.app.rest.model.RootRest; import org.dspace.services.ConfigurationService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; @@ -19,6 +26,7 @@ */ @Component public class RootConverter { + private static final Logger log = LoggerFactory.getLogger(RootConverter.class); @Autowired private ConfigurationService configurationService; @@ -29,6 +37,37 @@ public RootRest convert() { rootRest.setDspaceUI(configurationService.getProperty("dspace.ui.url")); rootRest.setDspaceServer(configurationService.getProperty("dspace.server.url")); rootRest.setDspaceVersion("DSpace " + getSourceVersion()); + rootRest.setBuildVersion(getBuildVersion()); return rootRest; } + + /** + * Read the build version from the `build.version.file.path` property + * + * @return content of the version file + */ + private String getBuildVersion() { + String bVersionFilePath = configurationService.getProperty("build.version.file.path"); + + if (StringUtils.isBlank(bVersionFilePath)) { + return "Unknown"; + } + + StringBuilder buildVersion = new StringBuilder(); + try { + FileReader fileReader = new FileReader(bVersionFilePath); + BufferedReader bufferedReader = new BufferedReader(fileReader); + + String line; + // Read each line from the file until the end of the file is reached + while ((line = bufferedReader.readLine()) != null) { + buildVersion.append(line); + } + + } catch (IOException e) { + // Empty - do not log anything + } + + return buildVersion.toString(); + } } diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RootRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RootRest.java index cef8965601ca..b83859898ca0 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RootRest.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/RootRest.java @@ -22,6 +22,8 @@ public class RootRest extends RestAddressableModel { private String dspaceServer; private String dspaceVersion; + private String buildVersion; + public String getCategory() { return CATEGORY; } @@ -67,6 +69,14 @@ public void setDspaceVersion(String dspaceVersion) { this.dspaceVersion = dspaceVersion; } + public String getBuildVersion() { + return buildVersion; + } + + public void setBuildVersion(String buildVersion) { + this.buildVersion = buildVersion; + } + @Override public boolean equals(Object object) { return (object instanceof RootRest && @@ -76,6 +86,7 @@ public boolean equals(Object object) { .append(this.getDspaceUI(), ((RootRest) object).getDspaceUI()) .append(this.getDspaceName(), ((RootRest) object).getDspaceName()) .append(this.getDspaceServer(), ((RootRest) object).getDspaceServer()) + .append(this.getBuildVersion(), ((RootRest) object).getBuildVersion()) .isEquals()); } @@ -88,6 +99,7 @@ public int hashCode() { .append(this.getDspaceName()) .append(this.getDspaceUI()) .append(this.getDspaceServer()) + .append(this.getBuildVersion()) .toHashCode(); } } diff --git a/dspace/config/VERSION_D.txt b/dspace/config/VERSION_D.txt new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/dspace/config/clarin-dspace.cfg b/dspace/config/clarin-dspace.cfg index 478a22b9bbae..754d6422c96a 100644 --- a/dspace/config/clarin-dspace.cfg +++ b/dspace/config/clarin-dspace.cfg @@ -246,3 +246,7 @@ file.preview.enabled = false ### Storage service ### # Synchronization is NOT enabled by default sync.storage.service.enabled = true + + +### The build version is stored in the specific file ### +build.version.file.path = dspace/config/VERSION_D.txt From bf4f503724006a6615d5dcac3e336f12f4f9b524 Mon Sep 17 00:00:00 2001 From: milanmajchrak <90026355+milanmajchrak@users.noreply.github.com> Date: Wed, 3 Jan 2024 08:51:37 +0100 Subject: [PATCH 19/19] ufal/be-s3-checksum-fix (#487) * Cherry picked fix S3 optimalization fixes from Vanilla * Cherry picked adding of pagination on cleanup * Updated Sync services according to S3 optimization fix * The checksum value is compared between S3 and local assetstore and the new result is inserted to the database. * Added docs and refactoring * The checksum values are exposed by REST API * Fixed failing tests --------- Co-authored-by: Tim Donohue Co-authored-by: Luca Giamminonni --- dspace-api/pom.xml | 24 + .../org/dspace/checker/CheckerCommand.java | 49 +- .../checker/ChecksumHistoryServiceImpl.java | 3 +- .../dspace/checker/ChecksumResultCode.java | 3 +- .../checker/SimpleReporterServiceImpl.java | 2 + .../dao/impl/MostRecentChecksumDAOImpl.java | 4 +- .../dspace/content/BitstreamServiceImpl.java | 4 +- .../content/ClarinBitstreamServiceImpl.java | 19 +- .../org/dspace/content/dao/BitstreamDAO.java | 2 +- .../content/dao/impl/BitstreamDAOImpl.java | 5 +- .../content/service/BitstreamService.java | 2 +- .../src/main/java/org/dspace/core/Email.java | 6 +- .../storage/bitstore/BaseBitStoreService.java | 32 +- .../storage/bitstore/BitStoreService.java | 5 +- .../bitstore/BitstreamStorageServiceImpl.java | 159 +++--- .../storage/bitstore/DSBitStoreService.java | 13 +- .../DeleteOnCloseFileInputStream.java | 42 ++ .../storage/bitstore/S3BitStoreService.java | 155 +++--- .../SyncBitstreamStorageServiceImpl.java | 207 +++++--- .../factory/StorageServiceFactory.java | 3 + .../factory/StorageServiceFactoryImpl.java | 9 + .../service/BitstreamStorageService.java | 2 +- ...07.28__Upgrade_to_Lindat_Clarin_schema.sql | 9 +- .../storage/bitstore/S3BitStoreServiceIT.java | 434 ++++++++++++++++ .../bitstore/S3BitStoreServiceTest.java | 481 ------------------ .../converter/BitstreamChecksumConverter.java | 35 ++ .../app/rest/model/BitstreamChecksum.java | 46 ++ .../app/rest/model/BitstreamChecksumRest.java | 66 +++ .../dspace/app/rest/model/BitstreamRest.java | 5 + .../hateoas/BitstreamChecksumResource.java | 26 + .../BitstreamCheckSumLinkRepository.java | 104 ++++ .../app/rest/BitstreamRestControllerIT.java | 47 ++ .../app/rest/matcher/BitstreamMatcher.java | 6 +- 33 files changed, 1245 insertions(+), 764 deletions(-) create mode 100644 dspace-api/src/main/java/org/dspace/storage/bitstore/DeleteOnCloseFileInputStream.java create mode 100644 dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceIT.java delete mode 100644 dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceTest.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamChecksumConverter.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksum.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksumRest.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/BitstreamChecksumResource.java create mode 100644 dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/BitstreamCheckSumLinkRepository.java diff --git a/dspace-api/pom.xml b/dspace-api/pom.xml index c21c348c29b7..9d75342bd231 100644 --- a/dspace-api/pom.xml +++ b/dspace-api/pom.xml @@ -844,6 +844,24 @@ + + + io.findify + s3mock_2.13 + 0.2.6 + test + + + com.amazonawsl + aws-java-sdk-s3 + + + com.amazonaws + aws-java-sdk-s3 + + + + @@ -907,6 +925,12 @@ swagger-core 1.6.2 + + org.scala-lang + scala-library + 2.13.2 + test + diff --git a/dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java b/dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java index 6b16d51bfe1e..ba503d83eb4f 100644 --- a/dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java +++ b/dspace-api/src/main/java/org/dspace/checker/CheckerCommand.java @@ -7,10 +7,13 @@ */ package org.dspace.checker; +import static org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl.SYNCHRONIZED_STORES_NUMBER; + import java.io.IOException; import java.sql.SQLException; import java.util.Date; import java.util.Map; +import java.util.Objects; import org.apache.commons.collections4.MapUtils; import org.apache.logging.log4j.Logger; @@ -20,8 +23,8 @@ import org.dspace.checker.service.MostRecentChecksumService; import org.dspace.content.Bitstream; import org.dspace.core.Context; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; import org.dspace.storage.bitstore.factory.StorageServiceFactory; -import org.dspace.storage.bitstore.service.BitstreamStorageService; /** *

@@ -55,7 +58,7 @@ public final class CheckerCommand { * Checksum history Data access object */ private ChecksumHistoryService checksumHistoryService = null; - private BitstreamStorageService bitstreamStorageService = null; + private SyncBitstreamStorageServiceImpl bitstreamStorageService = null; private ChecksumResultService checksumResultService = null; /** @@ -86,7 +89,7 @@ public final class CheckerCommand { public CheckerCommand(Context context) { checksumService = CheckerServiceFactory.getInstance().getMostRecentChecksumService(); checksumHistoryService = CheckerServiceFactory.getInstance().getChecksumHistoryService(); - bitstreamStorageService = StorageServiceFactory.getInstance().getBitstreamStorageService(); + bitstreamStorageService = StorageServiceFactory.getInstance().getSyncBitstreamStorageService(); checksumResultService = CheckerServiceFactory.getInstance().getChecksumResultService(); this.context = context; } @@ -245,7 +248,9 @@ protected void processBitstream(MostRecentChecksum info) throws SQLException { info.setProcessStartDate(new Date()); try { - Map checksumMap = bitstreamStorageService.computeChecksum(context, info.getBitstream()); + // 1. DB - Store not match + Bitstream bitstream = info.getBitstream(); + Map checksumMap = bitstreamStorageService.computeChecksum(context, bitstream); if (MapUtils.isNotEmpty(checksumMap)) { info.setBitstreamFound(true); if (checksumMap.containsKey("checksum")) { @@ -255,10 +260,42 @@ protected void processBitstream(MostRecentChecksum info) throws SQLException { if (checksumMap.containsKey("checksum_algorithm")) { info.setChecksumAlgorithm(checksumMap.get("checksum_algorithm").toString()); } + + // compare new checksum to previous checksum + info.setChecksumResult(compareChecksums(info.getExpectedChecksum(), info.getCurrentChecksum())); + + } else { + info.setCurrentChecksum(""); + info.setChecksumResult(getChecksumResultByCode(ChecksumResultCode.BITSTREAM_NOT_FOUND)); + info.setToBeProcessed(false); + } + + // 2. Store1 - Synchronized store 2 not match + // Check checksum of synchronized store + if (bitstream.getStoreNumber() != SYNCHRONIZED_STORES_NUMBER) { + return; + } + if (Objects.equals(ChecksumResultCode.CHECKSUM_NO_MATCH, info.getChecksumResult().getResultCode())) { + return; + } + + Map syncStoreChecksumMap = + bitstreamStorageService.computeChecksumSpecStore(context, bitstream, + bitstreamStorageService.getSynchronizedStoreNumber(bitstream)); + if (MapUtils.isNotEmpty(syncStoreChecksumMap)) { + String syncStoreChecksum = ""; + if (checksumMap.containsKey("checksum")) { + syncStoreChecksum = syncStoreChecksumMap.get("checksum").toString(); + } + // compare new checksum to previous checksum + ChecksumResult checksumResult = compareChecksums(info.getCurrentChecksum(), syncStoreChecksum); + // Do not override result with synchronization info if the checksums are not matching between + // DB and store + if (!Objects.equals(checksumResult.getResultCode(), ChecksumResultCode.CHECKSUM_NO_MATCH)) { + info.setChecksumResult(getChecksumResultByCode(ChecksumResultCode.CHECKSUM_SYNC_NO_MATCH)); + } } - // compare new checksum to previous checksum - info.setChecksumResult(compareChecksums(info.getExpectedChecksum(), info.getCurrentChecksum())); } catch (IOException e) { // bitstream located, but file missing from asset store info.setChecksumResult(getChecksumResultByCode(ChecksumResultCode.BITSTREAM_NOT_FOUND)); diff --git a/dspace-api/src/main/java/org/dspace/checker/ChecksumHistoryServiceImpl.java b/dspace-api/src/main/java/org/dspace/checker/ChecksumHistoryServiceImpl.java index f8d6560e9246..f7b05d4de9d3 100644 --- a/dspace-api/src/main/java/org/dspace/checker/ChecksumHistoryServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/checker/ChecksumHistoryServiceImpl.java @@ -74,7 +74,8 @@ public void addHistory(Context context, MostRecentChecksum mostRecentChecksum) t if (mostRecentChecksum.getBitstream().isDeleted()) { checksumResult = checksumResultService.findByCode(context, ChecksumResultCode.BITSTREAM_MARKED_DELETED); } else { - checksumResult = checksumResultService.findByCode(context, ChecksumResultCode.CHECKSUM_MATCH); + checksumResult = checksumResultService.findByCode(context, + mostRecentChecksum.getChecksumResult().getResultCode()); } checksumHistory.setResult(checksumResult); diff --git a/dspace-api/src/main/java/org/dspace/checker/ChecksumResultCode.java b/dspace-api/src/main/java/org/dspace/checker/ChecksumResultCode.java index a0b532144290..a24127bb5371 100644 --- a/dspace-api/src/main/java/org/dspace/checker/ChecksumResultCode.java +++ b/dspace-api/src/main/java/org/dspace/checker/ChecksumResultCode.java @@ -24,5 +24,6 @@ public enum ChecksumResultCode { CHECKSUM_MATCH, CHECKSUM_NO_MATCH, CHECKSUM_PREV_NOT_FOUND, - CHECKSUM_ALGORITHM_INVALID + CHECKSUM_ALGORITHM_INVALID, + CHECKSUM_SYNC_NO_MATCH } diff --git a/dspace-api/src/main/java/org/dspace/checker/SimpleReporterServiceImpl.java b/dspace-api/src/main/java/org/dspace/checker/SimpleReporterServiceImpl.java index 26c102e1e78b..ddefb28e1b57 100644 --- a/dspace-api/src/main/java/org/dspace/checker/SimpleReporterServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/checker/SimpleReporterServiceImpl.java @@ -152,6 +152,7 @@ public int getBitstreamNotFoundReport(Context context, Date startDate, Date endD osw.write("\n"); osw.write(msg("bitstream-not-found-report")); + osw.write(" "); osw.write(applyDateFormatShort(startDate)); osw.write(" "); osw.write(msg("date-range-to")); @@ -230,6 +231,7 @@ public int getUncheckedBitstreamsReport(Context context, OutputStreamWriter osw) osw.write("\n"); osw.write(msg("unchecked-bitstream-report")); + osw.write(" "); osw.write(applyDateFormatShort(new Date())); osw.write("\n\n\n"); diff --git a/dspace-api/src/main/java/org/dspace/checker/dao/impl/MostRecentChecksumDAOImpl.java b/dspace-api/src/main/java/org/dspace/checker/dao/impl/MostRecentChecksumDAOImpl.java index 66ce666b9d6d..a31e02cbab4a 100644 --- a/dspace-api/src/main/java/org/dspace/checker/dao/impl/MostRecentChecksumDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/checker/dao/impl/MostRecentChecksumDAOImpl.java @@ -92,8 +92,8 @@ public List findByResultTypeInDateRange(Context context, Dat criteriaQuery.where(criteriaBuilder.and( criteriaBuilder.equal(mostRecentResult.get(ChecksumResult_.resultCode), resultCode), criteriaBuilder.lessThanOrEqualTo( - mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), startDate), - criteriaBuilder.greaterThan(mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), endDate) + mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), endDate), + criteriaBuilder.greaterThan(mostRecentChecksumRoot.get(MostRecentChecksum_.processStartDate), startDate) ) ); List orderList = new LinkedList<>(); diff --git a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java index 4bde1cabb936..73b09015cd10 100644 --- a/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/BitstreamServiceImpl.java @@ -338,8 +338,8 @@ public void updateLastModified(Context context, Bitstream bitstream) { } @Override - public List findDeletedBitstreams(Context context) throws SQLException { - return bitstreamDAO.findDeletedBitstreams(context); + public List findDeletedBitstreams(Context context, int limit, int offset) throws SQLException { + return bitstreamDAO.findDeletedBitstreams(context, limit, offset); } @Override diff --git a/dspace-api/src/main/java/org/dspace/content/ClarinBitstreamServiceImpl.java b/dspace-api/src/main/java/org/dspace/content/ClarinBitstreamServiceImpl.java index 3c63ada2a763..1d4af3f7abdb 100644 --- a/dspace-api/src/main/java/org/dspace/content/ClarinBitstreamServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/ClarinBitstreamServiceImpl.java @@ -9,7 +9,7 @@ import java.io.IOException; import java.sql.SQLException; -import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Objects; @@ -25,7 +25,7 @@ import org.dspace.core.Constants; import org.dspace.core.Context; import org.dspace.event.Event; -import org.dspace.storage.bitstore.DSBitStoreService; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; import org.dspace.storage.bitstore.service.BitstreamStorageService; import org.springframework.beans.factory.annotation.Autowired; @@ -48,7 +48,7 @@ public class ClarinBitstreamServiceImpl implements ClarinBitstreamService { private static final String CSA = "MD5"; @Autowired - private DSBitStoreService storeService; + private SyncBitstreamStorageServiceImpl syncBitstreamStorageService; @Autowired protected BitstreamDAO bitstreamDAO; @Autowired @@ -99,13 +99,12 @@ public boolean validation(Context context, Bitstream bitstream) } //get file from assetstore based on internal_id //recalculate check fields - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); - Map checksumMap = storeService.about(bitstream, wantedMetadata); + List wantedMetadata = List.of("size_bytes", "checksum", "checksum_algorithm"); + Map receivedMetadata = syncBitstreamStorageService + .getStore(syncBitstreamStorageService.whichStoreNumber(bitstream)) + .about(bitstream, wantedMetadata); //check that new calculated values match the expected values - if (MapUtils.isEmpty(checksumMap) || !valid(bitstream, checksumMap)) { + if (MapUtils.isEmpty(receivedMetadata) || !valid(bitstream, receivedMetadata)) { //an error occurred - expected and calculated values do not match //delete all created data bitstreamService.delete(context, bitstream); @@ -126,7 +125,7 @@ public boolean validation(Context context, Bitstream bitstream) * @param checksumMap calculated values * @return bitstream values match with expected values */ - private boolean valid(Bitstream bitstream, Map checksumMap) { + private boolean valid(Bitstream bitstream, Map checksumMap) { if (!checksumMap.containsKey("checksum") || !checksumMap.containsKey("checksum_algorithm") || !checksumMap.containsKey("size_bytes")) { log.error("Cannot validate of bitstream with id: " + bitstream.getID() + diff --git a/dspace-api/src/main/java/org/dspace/content/dao/BitstreamDAO.java b/dspace-api/src/main/java/org/dspace/content/dao/BitstreamDAO.java index c1ef92313127..0d7afaa3cd73 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/BitstreamDAO.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/BitstreamDAO.java @@ -29,7 +29,7 @@ public interface BitstreamDAO extends DSpaceObjectLegacySupportDAO { public Iterator findAll(Context context, int limit, int offset) throws SQLException; - public List findDeletedBitstreams(Context context) throws SQLException; + public List findDeletedBitstreams(Context context, int limit, int offset) throws SQLException; public List findDuplicateInternalIdentifier(Context context, Bitstream bitstream) throws SQLException; diff --git a/dspace-api/src/main/java/org/dspace/content/dao/impl/BitstreamDAOImpl.java b/dspace-api/src/main/java/org/dspace/content/dao/impl/BitstreamDAOImpl.java index 02e3509c311a..d6d77fe7f0c7 100644 --- a/dspace-api/src/main/java/org/dspace/content/dao/impl/BitstreamDAOImpl.java +++ b/dspace-api/src/main/java/org/dspace/content/dao/impl/BitstreamDAOImpl.java @@ -41,13 +41,14 @@ protected BitstreamDAOImpl() { } @Override - public List findDeletedBitstreams(Context context) throws SQLException { + public List findDeletedBitstreams(Context context, int limit, int offset) throws SQLException { CriteriaBuilder criteriaBuilder = getCriteriaBuilder(context); CriteriaQuery criteriaQuery = getCriteriaQuery(criteriaBuilder, Bitstream.class); Root bitstreamRoot = criteriaQuery.from(Bitstream.class); criteriaQuery.select(bitstreamRoot); + criteriaQuery.orderBy(criteriaBuilder.desc(bitstreamRoot.get(Bitstream_.ID))); criteriaQuery.where(criteriaBuilder.equal(bitstreamRoot.get(Bitstream_.deleted), true)); - return list(context, criteriaQuery, false, Bitstream.class, -1, -1); + return list(context, criteriaQuery, false, Bitstream.class, limit, offset); } diff --git a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java index 4621c95e7c89..8effabf28435 100644 --- a/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java +++ b/dspace-api/src/main/java/org/dspace/content/service/BitstreamService.java @@ -183,7 +183,7 @@ public InputStream retrieve(Context context, Bitstream bitstream) * @return a list of all bitstreams that have been "deleted" * @throws SQLException if database error */ - public List findDeletedBitstreams(Context context) throws SQLException; + public List findDeletedBitstreams(Context context, int limit, int offset) throws SQLException; /** diff --git a/dspace-api/src/main/java/org/dspace/core/Email.java b/dspace-api/src/main/java/org/dspace/core/Email.java index 64da629bcc53..998d934c9558 100644 --- a/dspace-api/src/main/java/org/dspace/core/Email.java +++ b/dspace-api/src/main/java/org/dspace/core/Email.java @@ -314,6 +314,8 @@ public void send() throws MessagingException, IOException { message.addRecipient(Message.RecipientType.TO, new InternetAddress( i.next())); } + // Get headers defined by the template. + String[] templateHeaders = config.getArrayProperty("mail.message.headers"); // Format the mail message body VelocityEngine templateEngine = new VelocityEngine(); @@ -334,6 +336,7 @@ public void send() throws MessagingException, IOException { repo.putStringResource(contentName, content); // Turn content into a template. template = templateEngine.getTemplate(contentName); + templateHeaders = new String[] {}; } StringWriter writer = new StringWriter(); @@ -351,8 +354,7 @@ public void send() throws MessagingException, IOException { message.setSentDate(date); message.setFrom(new InternetAddress(from)); - // Get headers defined by the template. - for (String headerName : config.getArrayProperty("mail.message.headers")) { + for (String headerName : templateHeaders) { String headerValue = (String) vctx.get(headerName); if ("subject".equalsIgnoreCase(headerName)) { if (null != headerValue) { diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BaseBitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BaseBitStoreService.java index 209c1e21e74d..5b367d7a8136 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BaseBitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BaseBitStoreService.java @@ -14,6 +14,8 @@ import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.commons.lang3.StringUtils; @@ -153,22 +155,24 @@ protected boolean isLonger(String internalId, int endIndex) { * Retrieves a map of useful metadata about the File (size, checksum, modified) * * @param file The File to analyze - * @param attrs The map where we are storing values + * @param attrs The list of requested metadata values * @return Map of updated metadatas / attrs * @throws IOException */ - public Map about(File file, Map attrs) throws IOException { + public Map about(File file, List attrs) throws IOException { + + Map metadata = new HashMap(); + try { if (file != null && file.exists()) { - this.putValueIfExistsKey(attrs, SIZE_BYTES, file.length()); - if (attrs.containsKey(CHECKSUM)) { - attrs.put(CHECKSUM, Utils.toHex(this.generateChecksumFrom(file))); - attrs.put(CHECKSUM_ALGORITHM, CSA); + this.putValueIfExistsKey(attrs, metadata, SIZE_BYTES, file.length()); + if (attrs.contains(CHECKSUM)) { + metadata.put(CHECKSUM, Utils.toHex(this.generateChecksumFrom(file))); + metadata.put(CHECKSUM_ALGORITHM, CSA); } - this.putValueIfExistsKey(attrs, MODIFIED, String.valueOf(file.lastModified())); - return attrs; + this.putValueIfExistsKey(attrs, metadata, MODIFIED, String.valueOf(file.lastModified())); } - return null; + return metadata; } catch (Exception e) { log.error("about( FilePath: " + file.getAbsolutePath() + ", Map: " + attrs.toString() + ")", e); throw new IOException(e); @@ -204,13 +208,9 @@ private byte[] generateChecksumFrom(FileInputStream fis) throws IOException, NoS } } - protected void putValueIfExistsKey(Map attrs, String key, Object value) { - this.putEntryIfExistsKey(attrs, key, Map.entry(key, value)); - } - - protected void putEntryIfExistsKey(Map attrs, String key, Map.Entry entry) { - if (attrs.containsKey(key)) { - attrs.put(entry.getKey(), entry.getValue()); + protected void putValueIfExistsKey(List attrs, Map metadata, String key, Object value) { + if (attrs.contains(key)) { + metadata.put(key, value); } } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java index b6ac540c5047..5a02ad1d5617 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitStoreService.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.io.InputStream; +import java.util.List; import java.util.Map; import org.dspace.content.Bitstream; @@ -62,13 +63,13 @@ public interface BitStoreService { * Obtain technical metadata about an asset in the asset store. * * @param bitstream The bitstream to describe - * @param attrs A Map whose keys consist of desired metadata fields + * @param attrs A List of desired metadata fields * @return attrs * A Map with key/value pairs of desired metadata * If file not found, then return null * @throws java.io.IOException If a problem occurs while obtaining metadata */ - public Map about(Bitstream bitstream, Map attrs) throws IOException; + public Map about(Bitstream bitstream, List attrs) throws IOException; /** * Remove an asset from the asset store. diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java index b8a1a2e96ad4..3539496b1466 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/BitstreamStorageServiceImpl.java @@ -17,6 +17,7 @@ import java.util.UUID; import javax.annotation.Nullable; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections4.MapUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -165,12 +166,9 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, bitstream.setStoreNumber(assetstore); bitstreamService.update(context, bitstream); - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); + List wantedMetadata = List.of("size_bytes", "checksum", "checksum_algorithm"); + Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); - Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); if (MapUtils.isEmpty(receivedMetadata)) { String message = "Not able to register bitstream:" + bitstream.getID() + " at path: " + bitstreamPath; log.error(message); @@ -200,13 +198,8 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, } @Override - public Map computeChecksum(Context context, Bitstream bitstream) throws IOException { - Map wantedMetadata = new HashMap(); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); - - Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata); - return receivedMetadata; + public Map computeChecksum(Context context, Bitstream bitstream) throws IOException { + return this.getStore(bitstream.getStoreNumber()).about(bitstream, List.of("checksum", "checksum_algorithm")); } @Override @@ -224,25 +217,61 @@ public InputStream retrieve(Context context, Bitstream bitstream) @Override public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException { Context context = new Context(Context.Mode.BATCH_EDIT); - int commitCounter = 0; + + int offset = 0; + int limit = 100; + + int cleanedBitstreamCount = 0; + + int deletedBitstreamCount = bitstreamService.countDeletedBitstreams(context); + System.out.println("Found " + deletedBitstreamCount + " deleted bistream to cleanup"); try { context.turnOffAuthorisationSystem(); - List storage = bitstreamService.findDeletedBitstreams(context); - for (Bitstream bitstream : storage) { - UUID bid = bitstream.getID(); - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("modified", null); - Map receivedMetadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, wantedMetadata); + while (cleanedBitstreamCount < deletedBitstreamCount) { + + List storage = bitstreamService.findDeletedBitstreams(context, limit, offset); + + if (CollectionUtils.isEmpty(storage)) { + break; + } + + for (Bitstream bitstream : storage) { + UUID bid = bitstream.getID(); + List wantedMetadata = List.of("size_bytes", "modified"); + Map receivedMetadata = this.getStore(bitstream.getStoreNumber()) + .about(bitstream, wantedMetadata); + + + // Make sure entries which do not exist are removed + if (MapUtils.isEmpty(receivedMetadata)) { + log.debug("bitstore.about is empty, so file is not present"); + if (deleteDbRecords) { + log.debug("deleting record"); + if (verbose) { + System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + } + checksumHistoryService.deleteByBitstream(context, bitstream); + if (verbose) { + System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + } + bitstreamService.expunge(context, bitstream); + } + context.uncacheEntity(bitstream); + continue; + } + // This is a small chance that this is a file which is + // being stored -- get it next time. + if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { + log.debug("file is recent"); + context.uncacheEntity(bitstream); + continue; + } - // Make sure entries which do not exist are removed - if (MapUtils.isEmpty(receivedMetadata)) { - log.debug("bitstore.about is empty, so file is not present"); if (deleteDbRecords) { - log.debug("deleting record"); + log.debug("deleting db record"); if (verbose) { System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); } @@ -252,64 +281,42 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio } bitstreamService.expunge(context, bitstream); } - context.uncacheEntity(bitstream); - continue; - } - // This is a small chance that this is a file which is - // being stored -- get it next time. - if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { - log.debug("file is recent"); - context.uncacheEntity(bitstream); - continue; - } - - if (deleteDbRecords) { - log.debug("deleting db record"); - if (verbose) { - System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + if (isRegisteredBitstream(bitstream.getInternalId())) { + context.uncacheEntity(bitstream); + continue; // do not delete registered bitstreams } - checksumHistoryService.deleteByBitstream(context, bitstream); - if (verbose) { - System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + + + // Since versioning allows for multiple bitstreams, check if the internal + // identifier isn't used on + // another place + if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { + this.getStore(bitstream.getStoreNumber()).remove(bitstream); + + String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); + if (log.isDebugEnabled()) { + log.debug(message); + } + if (verbose) { + System.out.println(message); + } } - bitstreamService.expunge(context, bitstream); - } - if (isRegisteredBitstream(bitstream.getInternalId())) { context.uncacheEntity(bitstream); - continue; // do not delete registered bitstreams } + // Commit actual changes to DB after dispatch events + System.out.print("Performing incremental commit to the database..."); + context.commit(); + System.out.println(" Incremental commit done!"); - // Since versioning allows for multiple bitstreams, check if the internal identifier isn't used on - // another place - if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { - this.getStore(bitstream.getStoreNumber()).remove(bitstream); - - String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); - if (log.isDebugEnabled()) { - log.debug(message); - } - if (verbose) { - System.out.println(message); - } - } + cleanedBitstreamCount = cleanedBitstreamCount + storage.size(); - // Make sure to commit our outstanding work every 100 - // iterations. Otherwise you risk losing the entire transaction - // if we hit an exception, which isn't useful at all for large - // amounts of bitstreams. - commitCounter++; - if (commitCounter % 100 == 0) { - context.dispatchEvents(); - // Commit actual changes to DB after dispatch events - System.out.print("Performing incremental commit to the database..."); - context.commit(); - System.out.println(" Incremental commit done!"); + if (!deleteDbRecords) { + offset = offset + limit; } - context.uncacheEntity(bitstream); } System.out.print("Committing changes to the database..."); @@ -332,13 +339,11 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio @Nullable @Override public Long getLastModified(Bitstream bitstream) throws IOException { - Map attrs = new HashMap(); - attrs.put("modified", null); - attrs = this.getStore(bitstream.getStoreNumber()).about(bitstream, attrs); - if (attrs == null || !attrs.containsKey("modified")) { + Map metadata = this.getStore(bitstream.getStoreNumber()).about(bitstream, List.of("modified")); + if (metadata == null || !metadata.containsKey("modified")) { return null; } - return Long.valueOf(attrs.get("modified").toString()); + return Long.valueOf(metadata.get("modified").toString()); } /** @@ -483,7 +488,7 @@ protected boolean isRecent(Long lastModified) { return (now - lastModified) < (1 * 60 * 1000); } - protected BitStoreService getStore(int position) throws IOException { + public BitStoreService getStore(int position) throws IOException { BitStoreService bitStoreService = this.stores.get(position); if (!bitStoreService.isInitialized()) { bitStoreService.init(); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java index 1fdf1e84e115..6fef7365e482 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/DSBitStoreService.java @@ -15,6 +15,7 @@ import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.List; import java.util.Map; import org.apache.logging.log4j.Logger; @@ -126,13 +127,13 @@ public void put(Bitstream bitstream, InputStream in) throws IOException { /** * Obtain technical metadata about an asset in the asset store. * - * @param bitstream The asset to describe - * @param attrs A Map whose keys consist of desired metadata fields - * @return attrs - * A Map with key/value pairs of desired metadata - * @throws java.io.IOException If a problem occurs while obtaining metadata + * @param bitstream The asset to describe + * @param attrs A List of desired metadata fields + * @return attrs A Map with key/value pairs of desired metadata + * @throws java.io.IOException If a problem occurs while obtaining + * metadata */ - public Map about(Bitstream bitstream, Map attrs) throws IOException { + public Map about(Bitstream bitstream, List attrs) throws IOException { try { // potentially expensive, since it may calculate the checksum File file = getFile(bitstream); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/DeleteOnCloseFileInputStream.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/DeleteOnCloseFileInputStream.java new file mode 100644 index 000000000000..62c24544eeac --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/DeleteOnCloseFileInputStream.java @@ -0,0 +1,42 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.storage.bitstore; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; + +/** + * When inputstream closes, then delete the file + * http://stackoverflow.com/a/4694155/368581 + */ +public class DeleteOnCloseFileInputStream extends FileInputStream { + + private File file; + + public DeleteOnCloseFileInputStream(String fileName) throws FileNotFoundException { + this(new File(fileName)); + } + + public DeleteOnCloseFileInputStream(File file) throws FileNotFoundException { + super(file); + this.file = file; + } + + public void close() throws IOException { + try { + super.close(); + } finally { + if (file != null) { + file.delete(); + file = null; + } + } + } +} diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java index 1ad4c33f8213..be20f019fefd 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/S3BitStoreService.java @@ -7,10 +7,19 @@ */ package org.dspace.storage.bitstore; +import static java.lang.String.valueOf; + import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.security.DigestInputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.function.Supplier; import javax.validation.constraints.NotNull; @@ -22,12 +31,11 @@ import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.amazonaws.services.s3.model.AmazonS3Exception; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.transfer.Download; import com.amazonaws.services.s3.transfer.TransferManager; import com.amazonaws.services.s3.transfer.TransferManagerBuilder; import com.amazonaws.services.s3.transfer.Upload; @@ -37,7 +45,7 @@ import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import org.apache.commons.io.FileUtils; +import org.apache.commons.io.output.NullOutputStream; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpStatus; import org.apache.logging.log4j.LogManager; @@ -73,7 +81,7 @@ public class S3BitStoreService extends BaseBitStoreService { /** * Checksum algorithm */ - protected static final String CSA = "MD5"; + static final String CSA = "MD5"; // These settings control the way an identifier is hashed into // directory and file names @@ -160,13 +168,11 @@ public S3BitStoreService() {} /** * This constructor is used for test purpose. - * In this way is possible to use a mocked instance of AmazonS3 * - * @param s3Service mocked AmazonS3 service + * @param s3Service AmazonS3 service */ - protected S3BitStoreService(AmazonS3 s3Service, TransferManager tm) { + protected S3BitStoreService(AmazonS3 s3Service) { this.s3Service = s3Service; - this.tm = tm; } @Override @@ -283,9 +289,16 @@ public InputStream get(Bitstream bitstream) throws IOException { key = key.substring(REGISTERED_FLAG.length()); } try { - S3Object object = s3Service.getObject(new GetObjectRequest(bucketName, key)); - return (object != null) ? object.getObjectContent() : null; - } catch (AmazonClientException e) { + File tempFile = File.createTempFile("s3-disk-copy-" + UUID.randomUUID(), "temp"); + tempFile.deleteOnExit(); + + GetObjectRequest getObjectRequest = new GetObjectRequest(bucketName, key); + + Download download = tm.download(getObjectRequest, tempFile); + download.waitForCompletion(); + + return new DeleteOnCloseFileInputStream(tempFile); + } catch (AmazonClientException | InterruptedException e) { log.error("get(" + key + ")", e); throw new IOException(e); } @@ -307,24 +320,30 @@ public void put(Bitstream bitstream, InputStream in) throws IOException { String key = getFullKey(bitstream.getInternalId()); //Copy istream to temp file, and send the file, with some metadata File scratchFile = File.createTempFile(bitstream.getInternalId(), "s3bs"); - try { - FileUtils.copyInputStreamToFile(in, scratchFile); - long contentLength = scratchFile.length(); - // The ETag may or may not be and MD5 digest of the object data. - // Therefore, we precalculate before uploading - String localChecksum = org.dspace.curate.Utils.checksum(scratchFile, CSA); + try ( + FileOutputStream fos = new FileOutputStream(scratchFile); + // Read through a digest input stream that will work out the MD5 + DigestInputStream dis = new DigestInputStream(in, MessageDigest.getInstance(CSA)); + ) { + Utils.bufferedCopy(dis, fos); + in.close(); Upload upload = tm.upload(bucketName, key, scratchFile); upload.waitForUploadResult(); - bitstream.setSizeBytes(contentLength); - bitstream.setChecksum(localChecksum); + bitstream.setSizeBytes(scratchFile.length()); + // we cannot use the S3 ETAG here as it could be not a MD5 in case of multipart upload (large files) or if + // the bucket is encrypted + bitstream.setChecksum(Utils.toHex(dis.getMessageDigest().digest())); bitstream.setChecksumAlgorithm(CSA); } catch (AmazonClientException | IOException | InterruptedException e) { log.error("put(" + bitstream.getInternalId() + ", is)", e); throw new IOException(e); + } catch (NoSuchAlgorithmException nsae) { + // Should never happen + log.warn("Caught NoSuchAlgorithmException", nsae); } finally { if (!scratchFile.delete()) { scratchFile.deleteOnExit(); @@ -339,61 +358,56 @@ public void put(Bitstream bitstream, InputStream in) throws IOException { * (Does not use getContentMD5, as that is 128-bit MD5 digest calculated on caller's side) * * @param bitstream The asset to describe - * @param attrs A Map whose keys consist of desired metadata fields + * @param attrs A List of desired metadata fields * @return attrs * A Map with key/value pairs of desired metadata * If file not found, then return null * @throws java.io.IOException If a problem occurs while obtaining metadata */ @Override - public Map about(Bitstream bitstream, Map attrs) throws IOException { + public Map about(Bitstream bitstream, List attrs) throws IOException { + String key = getFullKey(bitstream.getInternalId()); // If this is a registered bitstream, strip the -R prefix before retrieving if (isRegisteredBitstream(key)) { key = key.substring(REGISTERED_FLAG.length()); } + + Map metadata = new HashMap<>(); + try { + ObjectMetadata objectMetadata = s3Service.getObjectMetadata(bucketName, key); if (objectMetadata != null) { - return this.about(objectMetadata, attrs); + putValueIfExistsKey(attrs, metadata, "size_bytes", objectMetadata.getContentLength()); + putValueIfExistsKey(attrs, metadata, "modified", valueOf(objectMetadata.getLastModified().getTime())); + } + + putValueIfExistsKey(attrs, metadata, "checksum_algorithm", CSA); + + if (attrs.contains("checksum")) { + try (InputStream in = get(bitstream); + DigestInputStream dis = new DigestInputStream(in, MessageDigest.getInstance(CSA)) + ) { + Utils.copy(dis, NullOutputStream.NULL_OUTPUT_STREAM); + byte[] md5Digest = dis.getMessageDigest().digest(); + metadata.put("checksum", Utils.toHex(md5Digest)); + } catch (NoSuchAlgorithmException nsae) { + // Should never happen + log.warn("Caught NoSuchAlgorithmException", nsae); + } } + + return metadata; } catch (AmazonS3Exception e) { if (e.getStatusCode() == HttpStatus.SC_NOT_FOUND) { - return null; + return metadata; } } catch (AmazonClientException e) { log.error("about(" + key + ", attrs)", e); throw new IOException(e); } - return null; - } - - /** - * Populates map values by checking key existence - *
- * Adds technical metadata about an asset in the asset store, like: - *

    - *
  • size_bytes
  • - *
  • checksum
  • - *
  • checksum_algorithm
  • - *
  • modified
  • - *
- * - * @param objectMetadata containing technical data - * @param attrs map with keys populated - * @return Map of enriched attrs with values - */ - public Map about(ObjectMetadata objectMetadata, Map attrs) { - if (objectMetadata != null) { - this.putValueIfExistsKey(attrs, SIZE_BYTES, objectMetadata.getContentLength()); - - // put CHECKSUM_ALGORITHM if exists CHECKSUM - this.putValueIfExistsKey(attrs, CHECKSUM, objectMetadata.getETag()); - this.putEntryIfExistsKey(attrs, CHECKSUM, Map.entry(CHECKSUM_ALGORITHM, CSA)); - - this.putValueIfExistsKey(attrs, MODIFIED, String.valueOf(objectMetadata.getLastModified().getTime())); - } - return attrs; + return metadata; } /** @@ -478,22 +492,6 @@ public void setAwsAccessKey(String awsAccessKey) { this.awsAccessKey = awsAccessKey; } - @Autowired(required = true) - public void setPathStyleAccessEnabled(boolean pathStyleAccessEnabled) { - this.pathStyleAccessEnabled = pathStyleAccessEnabled; - } - - public boolean getPathStyleAccessEnabled() { - return this.pathStyleAccessEnabled; - } - public void setEndpoint(String endpoint) { - this.endpoint = endpoint; - } - - public String getEndpoint() { - return this.endpoint; - } - public String getAwsSecretKey() { return awsSecretKey; } @@ -536,6 +534,22 @@ public void setUseRelativePath(boolean useRelativePath) { this.useRelativePath = useRelativePath; } + public String getEndpoint() { + return endpoint; + } + + public void setEndpoint(String endpoint) { + this.endpoint = endpoint; + } + + public boolean getPathStyleAccessEnabled() { + return pathStyleAccessEnabled; + } + + public void setPathStyleAccessEnabled(boolean pathStyleAccessEnabled) { + this.pathStyleAccessEnabled = pathStyleAccessEnabled; + } + /** * Contains a command-line testing tool. Expects arguments: * -a accessKey -s secretKey -f assetFileName @@ -573,13 +587,14 @@ public static void main(String[] args) throws Exception { String accessKey = command.getOptionValue("a"); String secretKey = command.getOptionValue("s"); - String assetFile = command.getOptionValue("f"); S3BitStoreService store = new S3BitStoreService(); AWSCredentials awsCredentials = new BasicAWSCredentials(accessKey, secretKey); - store.s3Service = new AmazonS3Client(awsCredentials); + store.s3Service = AmazonS3ClientBuilder.standard() + .withCredentials(new AWSStaticCredentialsProvider(awsCredentials)) + .build(); //Todo configurable region Region usEast1 = Region.getRegion(Regions.US_EAST_1); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java index e48487955209..d2266f02d75c 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/SyncBitstreamStorageServiceImpl.java @@ -10,13 +10,12 @@ import java.io.IOException; import java.io.InputStream; import java.sql.SQLException; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.UUID; import javax.annotation.Nullable; +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections4.MapUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -41,7 +40,7 @@ public class SyncBitstreamStorageServiceImpl extends BitstreamStorageServiceImpl private static final Logger log = LogManager.getLogger(); private boolean syncEnabled = false; - private static final int SYNCHRONIZED_STORES_NUMBER = 77; + public static final int SYNCHRONIZED_STORES_NUMBER = 77; @Autowired ConfigurationService configurationService; @@ -136,12 +135,9 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, } bitstreamService.update(context, bitstream); - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); + List wantedMetadata = List.of("size_bytes", "checksum", "checksum_algorithm"); + Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); - Map receivedMetadata = this.getStore(assetstore).about(bitstream, wantedMetadata); if (MapUtils.isEmpty(receivedMetadata)) { String message = "Not able to register bitstream:" + bitstream.getID() + " at path: " + bitstreamPath; log.error(message); @@ -172,13 +168,20 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, @Override public Map computeChecksum(Context context, Bitstream bitstream) throws IOException { - Map wantedMetadata = new HashMap(); - wantedMetadata.put("checksum", null); - wantedMetadata.put("checksum_algorithm", null); - int storeNumber = this.whichStoreNumber(bitstream); - Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); - return receivedMetadata; + return this.getStore(storeNumber).about(bitstream, List.of("checksum", "checksum_algorithm")); + } + + /** + * Compute the checksum of a bitstream in a specific store. + * @param context DSpace Context object + * @param bitstream Bitstream to compute checksum for + * @param storeNumber Store number to compute checksum for + * @return Map with checksum and checksum algorithm + * @throws IOException if IO error + */ + public Map computeChecksumSpecStore(Context context, Bitstream bitstream, int storeNumber) throws IOException { + return this.getStore(storeNumber).about(bitstream, List.of("checksum", "checksum_algorithm")); } @Override @@ -191,27 +194,62 @@ public InputStream retrieve(Context context, Bitstream bitstream) @Override public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException { Context context = new Context(Context.Mode.BATCH_EDIT); - int commitCounter = 0; + + int offset = 0; + int limit = 100; + + int cleanedBitstreamCount = 0; + + int deletedBitstreamCount = bitstreamService.countDeletedBitstreams(context); + System.out.println("Found " + deletedBitstreamCount + " deleted bistream to cleanup"); try { context.turnOffAuthorisationSystem(); - List storage = bitstreamService.findDeletedBitstreams(context); - for (Bitstream bitstream : storage) { - UUID bid = bitstream.getID(); - Map wantedMetadata = new HashMap(); - wantedMetadata.put("size_bytes", null); - wantedMetadata.put("modified", null); + while (cleanedBitstreamCount < deletedBitstreamCount) { + + List storage = bitstreamService.findDeletedBitstreams(context, limit, offset); - int storeNumber = this.whichStoreNumber(bitstream); - Map receivedMetadata = this.getStore(storeNumber).about(bitstream, wantedMetadata); + if (CollectionUtils.isEmpty(storage)) { + break; + } + + for (Bitstream bitstream : storage) { + UUID bid = bitstream.getID(); + List wantedMetadata = List.of("size_bytes", "modified"); + int storeNumber = this.whichStoreNumber(bitstream); + Map receivedMetadata = this.getStore(storeNumber) + .about(bitstream, wantedMetadata); + + + // Make sure entries which do not exist are removed + if (MapUtils.isEmpty(receivedMetadata)) { + log.debug("bitstore.about is empty, so file is not present"); + if (deleteDbRecords) { + log.debug("deleting record"); + if (verbose) { + System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + } + checksumHistoryService.deleteByBitstream(context, bitstream); + if (verbose) { + System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + } + bitstreamService.expunge(context, bitstream); + } + context.uncacheEntity(bitstream); + continue; + } + // This is a small chance that this is a file which is + // being stored -- get it next time. + if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { + log.debug("file is recent"); + context.uncacheEntity(bitstream); + continue; + } - // Make sure entries which do not exist are removed - if (MapUtils.isEmpty(receivedMetadata)) { - log.debug("bitstore.about is empty, so file is not present"); if (deleteDbRecords) { - log.debug("deleting record"); + log.debug("deleting db record"); if (verbose) { System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); } @@ -221,64 +259,42 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio } bitstreamService.expunge(context, bitstream); } - context.uncacheEntity(bitstream); - continue; - } - - // This is a small chance that this is a file which is - // being stored -- get it next time. - if (isRecent(Long.valueOf(receivedMetadata.get("modified").toString()))) { - log.debug("file is recent"); - context.uncacheEntity(bitstream); - continue; - } - if (deleteDbRecords) { - log.debug("deleting db record"); - if (verbose) { - System.out.println(" - Deleting bitstream information (ID: " + bid + ")"); + if (isRegisteredBitstream(bitstream.getInternalId())) { + context.uncacheEntity(bitstream); + continue; // do not delete registered bitstreams } - checksumHistoryService.deleteByBitstream(context, bitstream); - if (verbose) { - System.out.println(" - Deleting bitstream record from database (ID: " + bid + ")"); + + + // Since versioning allows for multiple bitstreams, check if the internal + // identifier isn't used on + // another place + if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { + this.getStore(storeNumber).remove(bitstream); + + String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); + if (log.isDebugEnabled()) { + log.debug(message); + } + if (verbose) { + System.out.println(message); + } } - bitstreamService.expunge(context, bitstream); - } - if (isRegisteredBitstream(bitstream.getInternalId())) { context.uncacheEntity(bitstream); - continue; // do not delete registered bitstreams } + // Commit actual changes to DB after dispatch events + System.out.print("Performing incremental commit to the database..."); + context.commit(); + System.out.println(" Incremental commit done!"); - // Since versioning allows for multiple bitstreams, check if the internal identifier isn't used on - // another place - if (bitstreamService.findDuplicateInternalIdentifier(context, bitstream).isEmpty()) { - this.getStore(storeNumber).remove(bitstream); - - String message = ("Deleted bitstreamID " + bid + ", internalID " + bitstream.getInternalId()); - if (log.isDebugEnabled()) { - log.debug(message); - } - if (verbose) { - System.out.println(message); - } - } + cleanedBitstreamCount = cleanedBitstreamCount + storage.size(); - // Make sure to commit our outstanding work every 100 - // iterations. Otherwise you risk losing the entire transaction - // if we hit an exception, which isn't useful at all for large - // amounts of bitstreams. - commitCounter++; - if (commitCounter % 100 == 0) { - context.dispatchEvents(); - // Commit actual changes to DB after dispatch events - System.out.print("Performing incremental commit to the database..."); - context.commit(); - System.out.println(" Incremental commit done!"); + if (!deleteDbRecords) { + offset = offset + limit; } - context.uncacheEntity(bitstream); } System.out.print("Committing changes to the database..."); @@ -301,14 +317,12 @@ public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLExceptio @Nullable @Override public Long getLastModified(Bitstream bitstream) throws IOException { - Map attrs = new HashMap(); - attrs.put("modified", null); int storeNumber = this.whichStoreNumber(bitstream); - attrs = this.getStore(storeNumber).about(bitstream, attrs); - if (attrs == null || !attrs.containsKey("modified")) { + Map metadata = this.getStore(storeNumber).about(bitstream, List.of("modified")); + if (metadata == null || !metadata.containsKey("modified")) { return null; } - return Long.valueOf(attrs.get("modified").toString()); + return Long.valueOf(metadata.get("modified").toString()); } /** @@ -320,11 +334,44 @@ public Long getLastModified(Bitstream bitstream) throws IOException { * @return store number */ public int whichStoreNumber(Bitstream bitstream) { - if (Objects.equals(bitstream.getStoreNumber(), SYNCHRONIZED_STORES_NUMBER)) { + if (isBitstreamStoreSynchronized(bitstream)) { return getIncoming(); } else { return bitstream.getStoreNumber(); } } + /** + * Check if the bitstream is synchronized (stored in more stores) + * The bitstream is synchronized if it has the static store number. + * + * @param bitstream to check if it is synchronized + * @return true if the bitstream is synchronized + */ + public boolean isBitstreamStoreSynchronized(Bitstream bitstream) { + return bitstream.getStoreNumber() == SYNCHRONIZED_STORES_NUMBER; + } + + + /** + * Get the store number where the bitstream is synchronized. It is not active (incoming) store. + * + * @param bitstream to get the synchronized store number + * @return store number + */ + public int getSynchronizedStoreNumber(Bitstream bitstream) { + int storeNumber = -1; + if (!isBitstreamStoreSynchronized(bitstream)) { + storeNumber = bitstream.getStoreNumber(); + } + + for (Map.Entry storeEntry : getStores().entrySet()) { + if (storeEntry.getKey() == SYNCHRONIZED_STORES_NUMBER || storeEntry.getKey() == getIncoming()) { + continue; + } + storeNumber = storeEntry.getKey(); + } + return storeNumber; + } + } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactory.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactory.java index be954557fc44..e0ce37802e70 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactory.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactory.java @@ -8,6 +8,7 @@ package org.dspace.storage.bitstore.factory; import org.dspace.services.factory.DSpaceServicesFactory; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; import org.dspace.storage.bitstore.service.BitstreamStorageService; /** @@ -20,6 +21,8 @@ public abstract class StorageServiceFactory { public abstract BitstreamStorageService getBitstreamStorageService(); + public abstract SyncBitstreamStorageServiceImpl getSyncBitstreamStorageService(); + public static StorageServiceFactory getInstance() { return DSpaceServicesFactory.getInstance().getServiceManager() .getServiceByName("storageServiceFactory", StorageServiceFactory.class); diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactoryImpl.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactoryImpl.java index 0dc67223d830..a44a4fbae73e 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactoryImpl.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/factory/StorageServiceFactoryImpl.java @@ -7,6 +7,7 @@ */ package org.dspace.storage.bitstore.factory; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; import org.dspace.storage.bitstore.service.BitstreamStorageService; import org.springframework.beans.factory.annotation.Autowired; @@ -20,9 +21,17 @@ public class StorageServiceFactoryImpl extends StorageServiceFactory { @Autowired(required = true) private BitstreamStorageService bitstreamStorageService; + @Autowired(required = true) + private SyncBitstreamStorageServiceImpl syncBitstreamStorageService; + @Override public BitstreamStorageService getBitstreamStorageService() { return bitstreamStorageService; } + + @Override + public SyncBitstreamStorageServiceImpl getSyncBitstreamStorageService() { + return syncBitstreamStorageService; + } } diff --git a/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java b/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java index 209ef5d16be6..7f5ed8f9129f 100644 --- a/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java +++ b/dspace-api/src/main/java/org/dspace/storage/bitstore/service/BitstreamStorageService.java @@ -102,7 +102,7 @@ public interface BitstreamStorageService { public UUID register(Context context, Bitstream bitstream, int assetstore, String bitstreamPath) throws SQLException, IOException, AuthorizeException; - public Map computeChecksum(Context context, Bitstream bitstream) throws IOException; + public Map computeChecksum(Context context, Bitstream bitstream) throws IOException; /** * Does the internal_id column in the bitstream row indicate the bitstream diff --git a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.2_2022.07.28__Upgrade_to_Lindat_Clarin_schema.sql b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.2_2022.07.28__Upgrade_to_Lindat_Clarin_schema.sql index b336010262f0..86e90bad2ad9 100644 --- a/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.2_2022.07.28__Upgrade_to_Lindat_Clarin_schema.sql +++ b/dspace-api/src/main/resources/org/dspace/storage/rdbms/sqlmigration/postgres/V7.2_2022.07.28__Upgrade_to_Lindat_Clarin_schema.sql @@ -487,4 +487,11 @@ ALTER COLUMN element TYPE character varying(128); ALTER TABLE eperson ADD welcome_info varchar(30); -ALTER TABLE eperson ADD can_edit_submission_metadata BOOL; \ No newline at end of file +ALTER TABLE eperson ADD can_edit_submission_metadata BOOL; + +insert into checksum_results +values +( + 'CHECKSUM_SYNC_NO_MATCH', + 'The checksum value from S3 is not matching the checksum value from the local file system' +); diff --git a/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceIT.java b/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceIT.java new file mode 100644 index 000000000000..7aae1cf2719c --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceIT.java @@ -0,0 +1,434 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.storage.bitstore; + +import static com.amazonaws.regions.Regions.DEFAULT_REGION; +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.dspace.storage.bitstore.S3BitStoreService.CSA; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasEntry; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.startsWith; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.Bucket; +import com.amazonaws.services.s3.model.ObjectMetadata; +import io.findify.s3mock.S3Mock; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.dspace.AbstractIntegrationTestWithDatabase; +import org.dspace.app.matcher.LambdaMatcher; +import org.dspace.authorize.AuthorizeException; +import org.dspace.builder.BitstreamBuilder; +import org.dspace.builder.CollectionBuilder; +import org.dspace.builder.CommunityBuilder; +import org.dspace.builder.ItemBuilder; +import org.dspace.content.Bitstream; +import org.dspace.content.Collection; +import org.dspace.content.Item; +import org.dspace.core.Utils; +import org.hamcrest.Matcher; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + + +/** + * @author Luca Giamminonni (luca.giamminonni at 4science.com) + */ +public class S3BitStoreServiceIT extends AbstractIntegrationTestWithDatabase { + + private static final String DEFAULT_BUCKET_NAME = "dspace-asset-localhost"; + + private S3BitStoreService s3BitStoreService; + + private AmazonS3 amazonS3Client; + + private S3Mock s3Mock; + + private Collection collection; + + private File s3Directory; + + @Before + public void setup() throws Exception { + + s3Directory = new File(System.getProperty("java.io.tmpdir"), "s3"); + + s3Mock = S3Mock.create(8001, s3Directory.getAbsolutePath()); + s3Mock.start(); + + amazonS3Client = createAmazonS3Client(); + + s3BitStoreService = new S3BitStoreService(amazonS3Client); + + context.turnOffAuthorisationSystem(); + + parentCommunity = CommunityBuilder.createCommunity(context) + .build(); + + collection = CollectionBuilder.createCollection(context, parentCommunity) + .build(); + + context.restoreAuthSystemState(); + } + + @After + public void cleanUp() throws IOException { + FileUtils.deleteDirectory(s3Directory); + s3Mock.shutdown(); + } + + @Test + public void testBitstreamPutAndGetWithAlreadyPresentBucket() throws IOException { + + String bucketName = "testbucket"; + + amazonS3Client.createBucket(bucketName); + + s3BitStoreService.setBucketName(bucketName); + s3BitStoreService.init(); + + assertThat(amazonS3Client.listBuckets(), contains(bucketNamed(bucketName))); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + String expectedChecksum = Utils.toHex(generateChecksum(content)); + + assertThat(bitstream.getSizeBytes(), is((long) content.length())); + assertThat(bitstream.getChecksum(), is(expectedChecksum)); + assertThat(bitstream.getChecksumAlgorithm(), is(CSA)); + + InputStream inputStream = s3BitStoreService.get(bitstream); + assertThat(IOUtils.toString(inputStream, UTF_8), is(content)); + + String key = s3BitStoreService.getFullKey(bitstream.getInternalId()); + ObjectMetadata objectMetadata = amazonS3Client.getObjectMetadata(bucketName, key); + assertThat(objectMetadata.getContentMD5(), is(expectedChecksum)); + + } + + @Test + public void testBitstreamPutAndGetWithoutSpecifingBucket() throws IOException { + + s3BitStoreService.init(); + + assertThat(s3BitStoreService.getBucketName(), is(DEFAULT_BUCKET_NAME)); + + assertThat(amazonS3Client.listBuckets(), contains(bucketNamed(DEFAULT_BUCKET_NAME))); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + String expectedChecksum = Utils.toHex(generateChecksum(content)); + + assertThat(bitstream.getSizeBytes(), is((long) content.length())); + assertThat(bitstream.getChecksum(), is(expectedChecksum)); + assertThat(bitstream.getChecksumAlgorithm(), is(CSA)); + + InputStream inputStream = s3BitStoreService.get(bitstream); + assertThat(IOUtils.toString(inputStream, UTF_8), is(content)); + + String key = s3BitStoreService.getFullKey(bitstream.getInternalId()); + ObjectMetadata objectMetadata = amazonS3Client.getObjectMetadata(DEFAULT_BUCKET_NAME, key); + assertThat(objectMetadata.getContentMD5(), is(expectedChecksum)); + + } + + @Test + public void testBitstreamPutAndGetWithSubFolder() throws IOException { + + s3BitStoreService.setSubfolder("test/DSpace7/"); + s3BitStoreService.init(); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + InputStream inputStream = s3BitStoreService.get(bitstream); + assertThat(IOUtils.toString(inputStream, UTF_8), is(content)); + + String key = s3BitStoreService.getFullKey(bitstream.getInternalId()); + assertThat(key, startsWith("test/DSpace7/")); + + ObjectMetadata objectMetadata = amazonS3Client.getObjectMetadata(DEFAULT_BUCKET_NAME, key); + assertThat(objectMetadata, notNullValue()); + + } + + @Test + public void testBitstreamDeletion() throws IOException { + + s3BitStoreService.init(); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + assertThat(s3BitStoreService.get(bitstream), notNullValue()); + + s3BitStoreService.remove(bitstream); + + IOException exception = assertThrows(IOException.class, () -> s3BitStoreService.get(bitstream)); + assertThat(exception.getCause(), instanceOf(AmazonS3Exception.class)); + assertThat(((AmazonS3Exception) exception.getCause()).getStatusCode(), is(404)); + + } + + @Test + public void testAbout() throws IOException { + + s3BitStoreService.init(); + + context.turnOffAuthorisationSystem(); + String content = "Test bitstream content"; + Bitstream bitstream = createBitstream(content); + context.restoreAuthSystemState(); + + s3BitStoreService.put(bitstream, toInputStream(content)); + + Map about = s3BitStoreService.about(bitstream, List.of()); + assertThat(about.size(), is(0)); + + about = s3BitStoreService.about(bitstream, List.of("size_bytes")); + assertThat(about, hasEntry("size_bytes", 22L)); + assertThat(about.size(), is(1)); + + about = s3BitStoreService.about(bitstream, List.of("size_bytes", "modified")); + assertThat(about, hasEntry("size_bytes", 22L)); + assertThat(about, hasEntry(is("modified"), notNullValue())); + assertThat(about.size(), is(2)); + + String expectedChecksum = Utils.toHex(generateChecksum(content)); + + about = s3BitStoreService.about(bitstream, List.of("size_bytes", "modified", "checksum")); + assertThat(about, hasEntry("size_bytes", 22L)); + assertThat(about, hasEntry(is("modified"), notNullValue())); + assertThat(about, hasEntry("checksum", expectedChecksum)); + assertThat(about.size(), is(3)); + + about = s3BitStoreService.about(bitstream, List.of("size_bytes", "modified", "checksum", "checksum_algorithm")); + assertThat(about, hasEntry("size_bytes", 22L)); + assertThat(about, hasEntry(is("modified"), notNullValue())); + assertThat(about, hasEntry("checksum", expectedChecksum)); + assertThat(about, hasEntry("checksum_algorithm", CSA)); + assertThat(about.size(), is(4)); + + } + + @Test + public void handleRegisteredIdentifierPrefixInS3() { + String trueBitStreamId = "012345"; + String registeredBitstreamId = s3BitStoreService.REGISTERED_FLAG + trueBitStreamId; + // Should be detected as registered bitstream + assertTrue(this.s3BitStoreService.isRegisteredBitstream(registeredBitstreamId)); + } + + @Test + public void stripRegisteredBitstreamPrefixWhenCalculatingPath() { + // Set paths and IDs + String s3Path = "UNIQUE_S3_PATH/test/bitstream.pdf"; + String registeredBitstreamId = s3BitStoreService.REGISTERED_FLAG + s3Path; + // Paths should be equal, since the getRelativePath method should strip the registered -R prefix + String relativeRegisteredPath = this.s3BitStoreService.getRelativePath(registeredBitstreamId); + assertEquals(s3Path, relativeRegisteredPath); + } + + @Test + public void givenBitStreamIdentifierLongerThanPossibleWhenIntermediatePathIsComputedThenIsSplittedAndTruncated() { + String path = "01234567890123456789"; + String computedPath = this.s3BitStoreService.getIntermediatePath(path); + String expectedPath = "01" + File.separator + "23" + File.separator + "45" + File.separator; + assertThat(computedPath, equalTo(expectedPath)); + } + + @Test + public void givenBitStreamIdentifierShorterThanAFolderLengthWhenIntermediatePathIsComputedThenIsSingleFolder() { + String path = "0"; + String computedPath = this.s3BitStoreService.getIntermediatePath(path); + String expectedPath = "0" + File.separator; + assertThat(computedPath, equalTo(expectedPath)); + } + + @Test + public void givenPartialBitStreamIdentifierWhenIntermediatePathIsComputedThenIsCompletlySplitted() { + String path = "01234"; + String computedPath = this.s3BitStoreService.getIntermediatePath(path); + String expectedPath = "01" + File.separator + "23" + File.separator + "4" + File.separator; + assertThat(computedPath, equalTo(expectedPath)); + } + + @Test + public void givenMaxLengthBitStreamIdentifierWhenIntermediatePathIsComputedThenIsSplittedAllAsSubfolder() { + String path = "012345"; + String computedPath = this.s3BitStoreService.getIntermediatePath(path); + String expectedPath = "01" + File.separator + "23" + File.separator + "45" + File.separator; + assertThat(computedPath, equalTo(expectedPath)); + } + + @Test + public void givenBitStreamIdentifierWhenIntermediatePathIsComputedThenNotEndingDoubleSlash() throws IOException { + StringBuilder path = new StringBuilder("01"); + String computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + int slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("2"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); + + path.append("3"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); + + path.append("4"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); + + path.append("56789"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); + } + + @Test + public void givenBitStreamIdentidierWhenIntermediatePathIsComputedThenMustBeSplitted() throws IOException { + StringBuilder path = new StringBuilder("01"); + String computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + int slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("2"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("3"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("4"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + + path.append("56789"); + computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); + slashes = computeSlashes(path.toString()); + assertThat(computedPath, Matchers.endsWith(File.separator)); + assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); + } + + @Test + public void givenBitStreamIdentifierWithSlashesWhenSanitizedThenSlashesMustBeRemoved() { + String sInternalId = new StringBuilder("01") + .append(File.separator) + .append("22") + .append(File.separator) + .append("33") + .append(File.separator) + .append("4455") + .toString(); + String computedPath = this.s3BitStoreService.sanitizeIdentifier(sInternalId); + assertThat(computedPath, Matchers.not(Matchers.startsWith(File.separator))); + assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator))); + assertThat(computedPath, Matchers.not(Matchers.containsString(File.separator))); + } + + private byte[] generateChecksum(String content) { + try { + MessageDigest m = MessageDigest.getInstance("MD5"); + m.update(content.getBytes()); + return m.digest(); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } + + private AmazonS3 createAmazonS3Client() { + return AmazonS3ClientBuilder.standard() + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .withEndpointConfiguration(new EndpointConfiguration("http://127.0.0.1:8001", DEFAULT_REGION.getName())) + .build(); + } + + private Item createItem() { + return ItemBuilder.createItem(context, collection) + .withTitle("Test item") + .build(); + } + + private Bitstream createBitstream(String content) { + try { + return BitstreamBuilder + .createBitstream(context, createItem(), toInputStream(content)) + .build(); + } catch (SQLException | AuthorizeException | IOException e) { + throw new RuntimeException(e); + } + } + + private Matcher bucketNamed(String name) { + return LambdaMatcher.matches(bucket -> bucket.getName().equals(name)); + } + + private InputStream toInputStream(String content) { + return IOUtils.toInputStream(content, UTF_8); + } + + private int computeSlashes(String internalId) { + int minimum = internalId.length(); + int slashesPerLevel = minimum / S3BitStoreService.digitsPerLevel; + int odd = Math.min(1, minimum % S3BitStoreService.digitsPerLevel); + int slashes = slashesPerLevel + odd; + return Math.min(slashes, S3BitStoreService.directoryLevels); + } + +} diff --git a/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceTest.java b/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceTest.java deleted file mode 100644 index 3a5141f2272c..000000000000 --- a/dspace-api/src/test/java/org/dspace/storage/bitstore/S3BitStoreServiceTest.java +++ /dev/null @@ -1,481 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace.storage.bitstore; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.isEmptyOrNullString; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThrows; -import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.startsWith; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.util.function.Supplier; -import java.util.regex.Pattern; - -import com.amazonaws.regions.Regions; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3Client; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.PutObjectRequest; -import com.amazonaws.services.s3.model.PutObjectResult; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; -import com.amazonaws.services.s3.transfer.TransferManager; -import com.amazonaws.services.s3.transfer.Upload; -import com.amazonaws.services.s3.transfer.model.UploadResult; -import org.apache.commons.io.FileUtils; -import org.dspace.AbstractUnitTest; -import org.dspace.content.Bitstream; -import org.dspace.curate.Utils; -import org.hamcrest.Matchers; -import org.junit.Before; -import org.junit.Test; -import org.mockito.ArgumentMatchers; -import org.mockito.Mock; -import org.mockito.MockedStatic; -import org.mockito.Mockito; - - - - -/** - * @author Vincenzo Mecca (vins01-4science - vincenzo.mecca at 4science.com) - * - */ -public class S3BitStoreServiceTest extends AbstractUnitTest { - - private S3BitStoreService s3BitStoreService; - - @Mock - private AmazonS3Client s3Service; - - @Mock - private TransferManager tm; - - @Mock - private Bitstream bitstream; - - @Mock - private Bitstream externalBitstream; - - @Before - public void setUp() throws Exception { - this.s3BitStoreService = new S3BitStoreService(s3Service, tm); - } - - private Supplier mockedServiceSupplier() { - return () -> this.s3Service; - } - - @Test - public void givenBucketWhenInitThenUsesSameBucket() throws IOException { - String bucketName = "Bucket0"; - s3BitStoreService.setBucketName(bucketName); - when(this.s3Service.doesBucketExistV2(bucketName)).thenReturn(false); - - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - - this.s3BitStoreService.init(); - - verify(this.s3Service).doesBucketExistV2(bucketName); - verify(this.s3Service, Mockito.times(1)).createBucket(bucketName); - assertThat(s3BitStoreService.getAwsAccessKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsSecretKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - } - - @Test - public void givenEmptyBucketWhenInitThenUsesDefaultBucket() throws IOException { - assertThat(s3BitStoreService.getBucketName(), isEmptyOrNullString()); - when(this.s3Service.doesBucketExistV2(startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX))).thenReturn(false); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - - this.s3BitStoreService.init(); - - verify(this.s3Service, Mockito.times(1)).createBucket(startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX)); - assertThat(s3BitStoreService.getBucketName(), Matchers.startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX)); - assertThat(s3BitStoreService.getAwsAccessKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsSecretKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - } - - @Test - public void givenAccessKeysWhenInitThenVerifiesCorrectBuilderCreation() throws IOException { - assertThat(s3BitStoreService.getAwsAccessKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsSecretKey(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getBucketName(), isEmptyOrNullString()); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - when(this.s3Service.doesBucketExistV2(startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX))).thenReturn(false); - - final String awsAccessKey = "ACCESS_KEY"; - final String awsSecretKey = "SECRET_KEY"; - - this.s3BitStoreService.setAwsAccessKey(awsAccessKey); - this.s3BitStoreService.setAwsSecretKey(awsSecretKey); - - try (MockedStatic mockedS3BitStore = Mockito.mockStatic(S3BitStoreService.class)) { - mockedS3BitStore - .when(() -> - S3BitStoreService.amazonClientBuilderBy( - ArgumentMatchers.any(Regions.class), - ArgumentMatchers.argThat( - credentials -> - awsAccessKey.equals(credentials.getAWSAccessKeyId()) && - awsSecretKey.equals(credentials.getAWSSecretKey()) - ) - ) - ) - .thenReturn(this.mockedServiceSupplier()); - - this.s3BitStoreService.init(); - - mockedS3BitStore.verify( - () -> - S3BitStoreService.amazonClientBuilderBy( - ArgumentMatchers.any(Regions.class), - ArgumentMatchers.argThat( - credentials -> - awsAccessKey.equals(credentials.getAWSAccessKeyId()) && - awsSecretKey.equals(credentials.getAWSSecretKey()) - ) - ) - ); - } - - - verify(this.s3Service, Mockito.times(1)).createBucket(startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX)); - assertThat(s3BitStoreService.getBucketName(), Matchers.startsWith(S3BitStoreService.DEFAULT_BUCKET_PREFIX)); - assertThat(s3BitStoreService.getAwsAccessKey(), Matchers.equalTo(awsAccessKey)); - assertThat(s3BitStoreService.getAwsSecretKey(), Matchers.equalTo(awsSecretKey)); - assertThat(s3BitStoreService.getAwsRegionName(), isEmptyOrNullString()); - } - - @Test - public void givenBucketBitStreamIdInputStreamWhenRetrievingFromS3ThenUsesBucketBitStreamId() throws IOException { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - S3Object object = Mockito.mock(S3Object.class); - S3ObjectInputStream inputStream = Mockito.mock(S3ObjectInputStream.class); - when(object.getObjectContent()).thenReturn(inputStream); - when(this.s3Service.getObject(ArgumentMatchers.any(GetObjectRequest.class))).thenReturn(object); - - this.s3BitStoreService.init(); - assertThat(this.s3BitStoreService.get(bitstream), Matchers.equalTo(inputStream)); - - verify(this.s3Service).getObject( - ArgumentMatchers.argThat( - request -> - bucketName.contentEquals(request.getBucketName()) && - bitStreamId.contentEquals(request.getKey()) - ) - ); - - } - - @Test - public void givenBucketBitStreamIdWhenNothingFoundOnS3ThenReturnsNull() throws IOException { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - when(this.s3Service.getObject(ArgumentMatchers.any(GetObjectRequest.class))).thenReturn(null); - - this.s3BitStoreService.init(); - assertThat(this.s3BitStoreService.get(bitstream), Matchers.nullValue()); - - verify(this.s3Service).getObject( - ArgumentMatchers.argThat( - request -> - bucketName.contentEquals(request.getBucketName()) && - bitStreamId.contentEquals(request.getKey()) - ) - ); - - } - - @Test - public void givenSubFolderWhenRequestsItemFromS3ThenTheIdentifierShouldHaveProperPath() throws IOException { - String bucketName = "BucketTest"; - String bitStreamId = "012345"; - String subfolder = "/test/DSpace7/"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - this.s3BitStoreService.setSubfolder(subfolder); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - S3Object object = Mockito.mock(S3Object.class); - S3ObjectInputStream inputStream = Mockito.mock(S3ObjectInputStream.class); - when(object.getObjectContent()).thenReturn(inputStream); - when(this.s3Service.getObject(ArgumentMatchers.any(GetObjectRequest.class))).thenReturn(object); - - this.s3BitStoreService.init(); - assertThat(this.s3BitStoreService.get(bitstream), Matchers.equalTo(inputStream)); - - verify(this.s3Service).getObject( - ArgumentMatchers.argThat( - request -> - bucketName.equals(request.getBucketName()) && - request.getKey().startsWith(subfolder) && - request.getKey().contains(bitStreamId) && - !request.getKey().contains(File.separator + File.separator) - ) - ); - - } - - @Test - public void handleRegisteredIdentifierPrefixInS3() { - String trueBitStreamId = "012345"; - String registeredBitstreamId = s3BitStoreService.REGISTERED_FLAG + trueBitStreamId; - // Should be detected as registered bitstream - assertTrue(this.s3BitStoreService.isRegisteredBitstream(registeredBitstreamId)); - } - - @Test - public void stripRegisteredBitstreamPrefixWhenCalculatingPath() { - // Set paths and IDs - String s3Path = "UNIQUE_S3_PATH/test/bitstream.pdf"; - String registeredBitstreamId = s3BitStoreService.REGISTERED_FLAG + s3Path; - // Paths should be equal, since the getRelativePath method should strip the registered -R prefix - String relativeRegisteredPath = this.s3BitStoreService.getRelativePath(registeredBitstreamId); - assertEquals(s3Path, relativeRegisteredPath); - } - - @Test - public void givenBitStreamIdentifierLongerThanPossibleWhenIntermediatePathIsComputedThenIsSplittedAndTruncated() { - String path = "01234567890123456789"; - String computedPath = this.s3BitStoreService.getIntermediatePath(path); - String expectedPath = "01" + File.separator + "23" + File.separator + "45" + File.separator; - assertThat(computedPath, equalTo(expectedPath)); - } - - @Test - public void givenBitStreamIdentifierShorterThanAFolderLengthWhenIntermediatePathIsComputedThenIsSingleFolder() { - String path = "0"; - String computedPath = this.s3BitStoreService.getIntermediatePath(path); - String expectedPath = "0" + File.separator; - assertThat(computedPath, equalTo(expectedPath)); - } - - @Test - public void givenPartialBitStreamIdentifierWhenIntermediatePathIsComputedThenIsCompletlySplitted() { - String path = "01234"; - String computedPath = this.s3BitStoreService.getIntermediatePath(path); - String expectedPath = "01" + File.separator + "23" + File.separator + "4" + File.separator; - assertThat(computedPath, equalTo(expectedPath)); - } - - @Test - public void givenMaxLengthBitStreamIdentifierWhenIntermediatePathIsComputedThenIsSplittedAllAsSubfolder() { - String path = "012345"; - String computedPath = this.s3BitStoreService.getIntermediatePath(path); - String expectedPath = "01" + File.separator + "23" + File.separator + "45" + File.separator; - assertThat(computedPath, equalTo(expectedPath)); - } - - @Test - public void givenBitStreamIdentifierWhenIntermediatePathIsComputedThenNotEndingDoubleSlash() throws IOException { - StringBuilder path = new StringBuilder("01"); - String computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - int slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); - assertThat(computedPath.split(Pattern.quote(File.separator)).length, Matchers.equalTo(slashes)); - - path.append("2"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); - - path.append("3"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); - - path.append("4"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); - - path.append("56789"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator + File.separator))); - } - - @Test - public void givenBitStreamIdentidierWhenIntermediatePathIsComputedThenMustBeSplitted() throws IOException { - StringBuilder path = new StringBuilder("01"); - String computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - int slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - - path.append("2"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - - path.append("3"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - - path.append("4"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - - path.append("56789"); - computedPath = this.s3BitStoreService.getIntermediatePath(path.toString()); - slashes = computeSlashes(path.toString()); - assertThat(computedPath, Matchers.endsWith(File.separator)); -// assertThat(computedPath.split(File.separator).length, Matchers.equalTo(slashes)); - } - - @Test - public void givenBitStreamIdentifierWithSlashesWhenSanitizedThenSlashesMustBeRemoved() { - String sInternalId = new StringBuilder("01") - .append(File.separator) - .append("22") - .append(File.separator) - .append("33") - .append(File.separator) - .append("4455") - .toString(); - String computedPath = this.s3BitStoreService.sanitizeIdentifier(sInternalId); - assertThat(computedPath, Matchers.not(Matchers.startsWith(File.separator))); - assertThat(computedPath, Matchers.not(Matchers.endsWith(File.separator))); - assertThat(computedPath, Matchers.not(Matchers.containsString(File.separator))); - } - - @Test - public void givenBitStreamWhenRemoveThenCallS3DeleteMethod() throws Exception { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - this.s3BitStoreService.init(); - this.s3BitStoreService.remove(bitstream); - - verify(this.s3Service, Mockito.times(1)).deleteObject(ArgumentMatchers.eq(bucketName), - ArgumentMatchers.eq(bitStreamId)); - - } - - @Test - public void givenBitStreamWhenPutThenCallS3PutMethodAndStoresInBitStream() throws Exception { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - File file = Mockito.mock(File.class); - InputStream in = Mockito.mock(InputStream.class); - PutObjectResult putObjectResult = Mockito.mock(PutObjectResult.class); - Upload upload = Mockito.mock(Upload.class); - UploadResult uploadResult = Mockito.mock(UploadResult.class); - when(upload.waitForUploadResult()).thenReturn(uploadResult); - String mockedTag = "1a7771d5fdd7bfdfc84033c70b1ba555"; - when(file.length()).thenReturn(8L); - try (MockedStatic fileMock = Mockito.mockStatic(File.class)) { - try (MockedStatic fileUtilsMock = Mockito.mockStatic(FileUtils.class)) { - try (MockedStatic curateUtils = Mockito.mockStatic(Utils.class)) { - curateUtils.when(() -> Utils.checksum((File) ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenReturn(mockedTag); - - fileMock - .when(() -> File.createTempFile(ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenReturn(file); - - when(this.tm.upload(ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenReturn(upload); - - this.s3BitStoreService.init(); - this.s3BitStoreService.put(bitstream, in); - } - } - - } - - verify(this.bitstream, Mockito.times(1)).setSizeBytes( - ArgumentMatchers.eq(8L) - ); - - verify(this.bitstream, Mockito.times(1)).setChecksum( - ArgumentMatchers.eq(mockedTag) - ); - - verify(this.tm, Mockito.times(1)).upload( - ArgumentMatchers.eq(bucketName), - ArgumentMatchers.eq(bitStreamId), - ArgumentMatchers.eq(file) - ); - - verify(file, Mockito.times(1)).delete(); - - } - - @Test - public void givenBitStreamWhenCallingPutFileCopyingThrowsIOExceptionPutThenFileIsRemovedAndStreamClosed() - throws Exception { - String bucketName = "BucketTest"; - String bitStreamId = "BitStreamId"; - this.s3BitStoreService.setBucketName(bucketName); - this.s3BitStoreService.setUseRelativePath(false); - when(bitstream.getInternalId()).thenReturn(bitStreamId); - - File file = Mockito.mock(File.class); - InputStream in = Mockito.mock(InputStream.class); - try (MockedStatic fileMock = Mockito.mockStatic(File.class)) { - try (MockedStatic fileUtilsMock = Mockito.mockStatic(FileUtils.class)) { - fileUtilsMock - .when(() -> FileUtils.copyInputStreamToFile(ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenThrow(IOException.class); - fileMock - .when(() -> File.createTempFile(ArgumentMatchers.any(), ArgumentMatchers.any())) - .thenReturn(file); - - this.s3BitStoreService.init(); - assertThrows(IOException.class, () -> this.s3BitStoreService.put(bitstream, in)); - } - - } - - verify(this.bitstream, Mockito.never()).setSizeBytes(ArgumentMatchers.any(Long.class)); - - verify(this.bitstream, Mockito.never()).setChecksum(ArgumentMatchers.any(String.class)); - - verify(this.s3Service, Mockito.never()).putObject(ArgumentMatchers.any(PutObjectRequest.class)); - - verify(file, Mockito.times(1)).delete(); - - } - - private int computeSlashes(String internalId) { - int minimum = internalId.length(); - int slashesPerLevel = minimum / S3BitStoreService.digitsPerLevel; - int odd = Math.min(1, minimum % S3BitStoreService.digitsPerLevel); - int slashes = slashesPerLevel + odd; - return Math.min(slashes, S3BitStoreService.directoryLevels); - } - -} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamChecksumConverter.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamChecksumConverter.java new file mode 100644 index 000000000000..fbf24f4f8e74 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/converter/BitstreamChecksumConverter.java @@ -0,0 +1,35 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.converter; + +import org.dspace.app.rest.model.BitstreamChecksum; +import org.dspace.app.rest.model.BitstreamChecksumRest; +import org.dspace.app.rest.projection.Projection; +import org.springframework.stereotype.Component; + +/** + * Convert the BitstreamChecksum to appropriate REST data model + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +@Component +public class BitstreamChecksumConverter implements DSpaceConverter { + @Override + public BitstreamChecksumRest convert(BitstreamChecksum modelObject, Projection projection) { + BitstreamChecksumRest bitstreamChecksumRest = new BitstreamChecksumRest(); + bitstreamChecksumRest.setActiveStore(modelObject.getActiveStore()); + bitstreamChecksumRest.setDatabaseChecksum(modelObject.getDatabaseChecksum()); + bitstreamChecksumRest.setSynchronizedStore(modelObject.getSynchronizedStore()); + return bitstreamChecksumRest; + } + + @Override + public Class getModelClass() { + return BitstreamChecksum.class; + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksum.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksum.java new file mode 100644 index 000000000000..3c3c494df134 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksum.java @@ -0,0 +1,46 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model; + +/** + * This class handles the checksums of a bitstream (local, S3, database) + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +public class BitstreamChecksum { + CheckSumRest activeStore = null; + CheckSumRest synchronizedStore = null; + CheckSumRest databaseChecksum = null; + + public BitstreamChecksum() { + } + + public CheckSumRest getActiveStore() { + return activeStore; + } + + public void setActiveStore(CheckSumRest activeStore) { + this.activeStore = activeStore; + } + + public CheckSumRest getSynchronizedStore() { + return synchronizedStore; + } + + public void setSynchronizedStore(CheckSumRest synchronizedStore) { + this.synchronizedStore = synchronizedStore; + } + + public CheckSumRest getDatabaseChecksum() { + return databaseChecksum; + } + + public void setDatabaseChecksum(CheckSumRest databaseChecksum) { + this.databaseChecksum = databaseChecksum; + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksumRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksumRest.java new file mode 100644 index 000000000000..8195d90bbdaf --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamChecksumRest.java @@ -0,0 +1,66 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model; + +import com.fasterxml.jackson.annotation.JsonProperty; + +/** + * The BitstreamChecksum REST Resource. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +public class BitstreamChecksumRest extends RestAddressableModel implements RestModel { + private static final long serialVersionUID = -3415049466402327251L; + public static final String NAME = "bitstreamchecksum"; + CheckSumRest activeStore = null; + CheckSumRest synchronizedStore = null; + CheckSumRest databaseChecksum = null; + + public BitstreamChecksumRest() { + } + + @Override + @JsonProperty(access = JsonProperty.Access.READ_ONLY) + public String getType() { + return NAME; + } + + public CheckSumRest getActiveStore() { + return activeStore; + } + + public void setActiveStore(CheckSumRest activeStore) { + this.activeStore = activeStore; + } + + public CheckSumRest getSynchronizedStore() { + return synchronizedStore; + } + + public void setSynchronizedStore(CheckSumRest synchronizedStore) { + this.synchronizedStore = synchronizedStore; + } + + public CheckSumRest getDatabaseChecksum() { + return databaseChecksum; + } + + public void setDatabaseChecksum(CheckSumRest databaseChecksum) { + this.databaseChecksum = databaseChecksum; + } + + @Override + public String getCategory() { + return null; + } + + @Override + public Class getController() { + return null; + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java index 232d96b044a0..a1c3156a01a9 100644 --- a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/BitstreamRest.java @@ -27,6 +27,10 @@ @LinkRest( name = BitstreamRest.THUMBNAIL, method = "getThumbnail" + ), + @LinkRest( + name = BitstreamRest.CHECKSUM, + method = "getChecksum" ) }) public class BitstreamRest extends DSpaceObjectRest { @@ -37,6 +41,7 @@ public class BitstreamRest extends DSpaceObjectRest { public static final String BUNDLE = "bundle"; public static final String FORMAT = "format"; public static final String THUMBNAIL = "thumbnail"; + public static final String CHECKSUM = "checksum"; private String bundleName; diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/BitstreamChecksumResource.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/BitstreamChecksumResource.java new file mode 100644 index 000000000000..a83255d98491 --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/model/hateoas/BitstreamChecksumResource.java @@ -0,0 +1,26 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.model.hateoas; + +import org.dspace.app.rest.model.BitstreamChecksumRest; +import org.dspace.app.rest.model.hateoas.annotations.RelNameDSpaceResource; +import org.dspace.app.rest.utils.Utils; + +/** + * Bitstream Checksum Rest HAL Resource. The HAL Resource wraps the REST Resource + * adding support for the links and embedded resources + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +@RelNameDSpaceResource(BitstreamChecksumRest.NAME) +public class BitstreamChecksumResource extends DSpaceResource { + + public BitstreamChecksumResource(BitstreamChecksumRest data, Utils utils) { + super(data, utils); + } +} diff --git a/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/BitstreamCheckSumLinkRepository.java b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/BitstreamCheckSumLinkRepository.java new file mode 100644 index 000000000000..77ad383624ab --- /dev/null +++ b/dspace-server-webapp/src/main/java/org/dspace/app/rest/repository/BitstreamCheckSumLinkRepository.java @@ -0,0 +1,104 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.app.rest.repository; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Map; +import java.util.Objects; +import java.util.UUID; +import javax.annotation.Nullable; +import javax.servlet.http.HttpServletRequest; + +import org.dspace.app.rest.model.BitstreamChecksum; +import org.dspace.app.rest.model.BitstreamChecksumRest; +import org.dspace.app.rest.model.BitstreamRest; +import org.dspace.app.rest.model.CheckSumRest; +import org.dspace.app.rest.projection.Projection; +import org.dspace.content.Bitstream; +import org.dspace.content.service.BitstreamService; +import org.dspace.core.Context; +import org.dspace.storage.bitstore.SyncBitstreamStorageServiceImpl; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.domain.Pageable; +import org.springframework.data.rest.webmvc.ResourceNotFoundException; +import org.springframework.security.access.prepost.PreAuthorize; +import org.springframework.stereotype.Component; + +/** + * Link repository for "checksum" subresource of an individual bitstream. + * + * @author Milan Majchrak (milan.majchrak at dataquest.sk) + */ +@Component(BitstreamRest.CATEGORY + "." + BitstreamRest.NAME + "." + BitstreamRest.CHECKSUM) +public class BitstreamCheckSumLinkRepository extends AbstractDSpaceRestRepository implements LinkRestRepository { + + @Autowired + BitstreamService bitstreamService; + + @Autowired + SyncBitstreamStorageServiceImpl syncBitstreamStorageService; + + @PreAuthorize("hasPermission(#bitstreamId, 'BITSTREAM', 'READ')") + public BitstreamChecksumRest getChecksum(@Nullable HttpServletRequest request, + UUID bitstreamId, + @Nullable Pageable optionalPageable, + Projection projection) { + try { + Context context = obtainContext(); + Bitstream bitstream = bitstreamService.find(context, bitstreamId); + if (bitstream == null) { + throw new ResourceNotFoundException("No such bitstream: " + bitstreamId); + } + + CheckSumRest activeStoreChecksum = new CheckSumRest(); + CheckSumRest databaseChecksum = new CheckSumRest(); + CheckSumRest synchronizedStoreChecksum = new CheckSumRest(); + + // Get the checksum from the active store + composeChecksumRest(activeStoreChecksum, syncBitstreamStorageService.computeChecksum(context, bitstream)); + // Get the checksum from the database + databaseChecksum.setCheckSumAlgorithm(bitstream.getChecksumAlgorithm()); + databaseChecksum.setValue(bitstream.getChecksum()); + + if (syncBitstreamStorageService.isBitstreamStoreSynchronized(bitstream)) { + // Get the checksum from the synchronized store + composeChecksumRest(synchronizedStoreChecksum, + syncBitstreamStorageService.computeChecksumSpecStore(context, bitstream, + syncBitstreamStorageService.getSynchronizedStoreNumber(bitstream))); + } + + BitstreamChecksum bitstreamChecksum = new BitstreamChecksum(); + bitstreamChecksum.setActiveStore(activeStoreChecksum); + bitstreamChecksum.setDatabaseChecksum(databaseChecksum); + bitstreamChecksum.setSynchronizedStore(synchronizedStoreChecksum); + + return converter.toRest(bitstreamChecksum, projection); + } catch (SQLException e) { + throw new RuntimeException(e); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Compose the checksum rest object from the checksum map + */ + private void composeChecksumRest(CheckSumRest checksumRest, Map checksumMap) { + if (Objects.isNull(checksumMap)) { + return; + } + if (checksumMap.containsKey("checksum")) { + checksumRest.setValue(checksumMap.get("checksum").toString()); + } + + if (checksumMap.containsKey("checksum_algorithm")) { + checksumRest.setCheckSumAlgorithm(checksumMap.get("checksum_algorithm").toString()); + } + } +} diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java index 09dbdca505fe..b7ece99fed04 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/BitstreamRestControllerIT.java @@ -22,6 +22,7 @@ import static org.hamcrest.CoreMatchers.not; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -1178,4 +1179,50 @@ public void closeInputStreamsDownloadWithCoverPage() throws Exception { Mockito.verify(inputStreamSpy, times(1)).close(); } + @Test + public void testChecksumLinkRepository() throws Exception { + context.turnOffAuthorisationSystem(); + + //** GIVEN ** + //1. A community-collection structure with one parent community and one collections. + parentCommunity = CommunityBuilder.createCommunity(context) + .withName("Parent Community") + .build(); + + Collection col1 = CollectionBuilder.createCollection(context, parentCommunity).withName("Collection 1").build(); + + //2. A public item with a bitstream + String bitstreamContent = "0123456789"; + + try (InputStream is = IOUtils.toInputStream(bitstreamContent, CharEncoding.UTF_8)) { + + Item publicItem1 = ItemBuilder.createItem(context, col1) + .withTitle("Public item 1") + .withIssueDate("2017-10-17") + .withAuthor("Smith, Donald").withAuthor("Doe, John") + .build(); + + bitstream = BitstreamBuilder + .createBitstream(context, publicItem1, is) + .withName("Test bitstream") + .withDescription("This is a bitstream to test range requests") + .withMimeType("text/plain") + .build(); + } + context.restoreAuthSystemState(); + + getClient() + .perform(get("/api/core/bitstreams/" + bitstream.getID() + "/checksum")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.activeStore.value", is(bitstream.getChecksum()))) + .andExpect(jsonPath("$.activeStore.checkSumAlgorithm", is(bitstream.getChecksumAlgorithm()))) + .andExpect(jsonPath("$.databaseChecksum.value", is(bitstream.getChecksum()))) + .andExpect(jsonPath("$.databaseChecksum.checkSumAlgorithm", + is(bitstream.getChecksumAlgorithm()))) + .andExpect(jsonPath("$.synchronizedStore.value", nullValue())) + .andExpect(jsonPath("$.synchronizedStore.checkSumAlgorithm", nullValue())); + + + } + } diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/BitstreamMatcher.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/BitstreamMatcher.java index 36fc2f2aa131..9c9c0513c182 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/BitstreamMatcher.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/matcher/BitstreamMatcher.java @@ -102,7 +102,8 @@ public static Matcher matchFullEmbeds() { return matchEmbeds( "bundle", "format", - "thumbnail" + "thumbnail", + "checksum" ); } @@ -115,7 +116,8 @@ public static Matcher matchLinks(UUID uuid) { "content", "format", "self", - "thumbnail" + "thumbnail", + "checksum" ); }