From b5fe4c75944deea540b807021e0f88401878118f Mon Sep 17 00:00:00 2001 From: "Balazs E. Pataki" Date: Fri, 17 Mar 2023 16:22:48 +0100 Subject: [PATCH 001/546] Fix placement of allowedApiCalls in example manifests allowedApiCalls should be at the top level, not inside toolParameters. --- .../external-tools/dynamicDatasetTool.json | 20 +++++++++---------- .../root/external-tools/fabulousFileTool.json | 18 ++++++++--------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json index 47413c8a625..22dd6477cb4 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json @@ -14,14 +14,14 @@ { "locale":"{localeCode}" } - ], - "allowedApiCalls": [ - { - "name":"retrieveDatasetJson", - "httpMethod":"GET", - "urlTemplate":"/api/v1/datasets/{datasetId}", - "timeOut":10 - } - ] - } + ] + }, + "allowedApiCalls": [ + { + "name":"retrieveDatasetJson", + "httpMethod":"GET", + "urlTemplate":"/api/v1/datasets/{datasetId}", + "timeOut":10 + } + ] } diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json index 1c132576099..2b6a0b8e092 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json @@ -21,14 +21,14 @@ { "locale":"{localeCode}" } - ], - "allowedApiCalls": [ - { - "name":"retrieveDataFile", - "httpMethod":"GET", - "urlTemplate":"/api/v1/access/datafile/{fileId}", - "timeOut":270 - } ] - } + }, + "allowedApiCalls": [ + { + "name":"retrieveDataFile", + "httpMethod":"GET", + "urlTemplate":"/api/v1/access/datafile/{fileId}", + "timeOut":270 + } + ] } From d76092c1ec57a835920b8fd10e6883299f8b6d3a Mon Sep 17 00:00:00 2001 From: "Balazs E. Pataki" Date: Fri, 17 Mar 2023 16:24:41 +0100 Subject: [PATCH 002/546] Add missing break to DATASET case Without this it also evaluates the FILE case causing NPE when dataFile is accessed. --- .../harvard/iq/dataverse/externaltools/ExternalToolHandler.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index 88a51017b75..dac046373ba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -111,6 +111,7 @@ public String handleRequest(boolean preview) { case DATASET: callback=SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + dataset.getId() + "/versions/:latest/toolparams/" + externalTool.getId(); + break; case FILE: callback= SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/files/" + dataFile.getId() + "/metadata/" + fileMetadata.getId() + "/toolparams/" From ecac37fbd64c83bfc8d045ae3204ab86dc7bc29d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 2 May 2023 10:52:13 -0400 Subject: [PATCH 003/546] initial Globus Store class with some quick test code --- pom.xml | 7 +- .../dataaccess/GlobusOverlayAccessIO.java | 655 ++++++++++++++++++ .../dataaccess/RemoteOverlayAccessIO.java | 34 +- .../iq/dataverse/settings/JvmSettings.java | 2 + 4 files changed, 680 insertions(+), 18 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java diff --git a/pom.xml b/pom.xml index 5f514819947..e5b191f0ed7 100644 --- a/pom.xml +++ b/pom.xml @@ -167,8 +167,13 @@ org.eclipse.microprofile.config microprofile-config-api - provided + + + org.apache.geronimo.config + geronimo-config-impl + 1.0 + jakarta.platform jakarta.jakartaee-api diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java new file mode 100644 index 00000000000..fe62e25ad6f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -0,0 +1,655 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.channels.Channel; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.WritableByteChannel; +import java.nio.file.Path; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.List; +import java.util.function.Predicate; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.http.client.config.CookieSpecs; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpDelete; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.ssl.SSLContextBuilder; +import org.apache.http.util.EntityUtils; + +import javax.net.ssl.SSLContext; + +/** + * @author qqmyers + * @param what it stores + */ +/* + * Globus Overlay Driver + * + * StorageIdentifier format: + * :///// + */ +public class GlobusOverlayAccessIO extends StorageIO { + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); + + private StorageIO baseStore = null; + private String path = null; + private String endpointWithBasePath = null; + + private static HttpClientContext localContext = HttpClientContext.create(); + private PoolingHttpClientConnectionManager cm = null; + CloseableHttpClient httpclient = null; + private int timeout = 1200; + private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); + private static boolean trustCerts = false; + private int httpConcurrency = 4; + + public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { + super(dvObject, req, driverId); + this.setIsLocalFile(false); + configureStores(req, driverId, null); + logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); + + logger.fine("Relative path: " + path); + } + + public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { + super(null, null, driverId); + this.setIsLocalFile(false); + configureStores(null, driverId, storageLocation); + + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Relative path: " + path); + } + + private void validatePath(String relPath) throws IOException { + try { + URI absoluteURI = new URI(endpointWithBasePath + "/" + relPath); + if(!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) { + throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s endpoint/basePath"); + } + } catch(URISyntaxException use) { + throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); + } + } + + + @Override + public void open(DataAccessOption... options) throws IOException { + + baseStore.open(options); + + DataAccessRequest req = this.getRequest(); + + if (isWriteAccessRequested(options)) { + isWriteAccess = true; + isReadAccess = false; + } else { + isWriteAccess = false; + isReadAccess = true; + } + + if (dvObject instanceof DataFile) { + String storageIdentifier = dvObject.getStorageIdentifier(); + + DataFile dataFile = this.getDataFile(); + + if (req != null && req.getParameter("noVarHeader") != null) { + baseStore.setNoVarHeader(true); + } + + if (storageIdentifier == null || "".equals(storageIdentifier)) { + throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); + } + + // Fix new DataFiles: DataFiles that have not yet been saved may use this method + // when they don't have their storageidentifier in the final form + // So we fix it up here. ToDo: refactor so that storageidentifier is generated + // by the appropriate StorageIO class and is final from the start. + logger.fine("StorageIdentifier is: " + storageIdentifier); + + if (isReadAccess) { + if (dataFile.getFilesize() >= 0) { + this.setSize(dataFile.getFilesize()); + } else { + logger.fine("Setting size"); + this.setSize(getSizeFromGlobus()); + } + if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + + List datavariables = dataFile.getDataTable().getDataVariables(); + String varHeaderLine = generateVariableHeader(datavariables); + this.setVarHeader(varHeaderLine); + } + + } + + this.setMimeType(dataFile.getContentType()); + + try { + this.setFileName(dataFile.getFileMetadata().getLabel()); + } catch (Exception ex) { + this.setFileName("unknown"); + } + } else if (dvObject instanceof Dataset) { + throw new IOException( + "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + } else if (dvObject instanceof Dataverse) { + throw new IOException( + "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + } else { + this.setSize(getSizeFromGlobus()); + } + } + + private long getSizeFromGlobus() { + throw new NotImplementedException(); + /* + long size = -1; + HttpHead head = new HttpHead(endpointWithBasePath + "/" + path); + try { + CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); + + try { + int code = response.getStatusLine().getStatusCode(); + logger.fine("Response for HEAD: " + code); + switch (code) { + case 200: + Header[] headers = response.getHeaders(HTTP.CONTENT_LEN); + logger.fine("Num headers: " + headers.length); + String sizeString = response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); + logger.fine("Content-Length: " + sizeString); + size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); + logger.fine("Found file size: " + size); + break; + default: + logger.warning("Response from " + head.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (IOException e) { + logger.warning(e.getMessage()); + } + return size; + */ + } + + @Override + public InputStream getInputStream() throws IOException { + if (super.getInputStream() == null) { + try { + HttpGet get = new HttpGet(generateTemporaryDownloadUrl(null, null, null)); + CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); + + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + setInputStream(response.getEntity().getContent()); + break; + default: + logger.warning("Response from " + get.getURI().toString() + " was " + code); + throw new IOException("Cannot retrieve: " + endpointWithBasePath + "/" + path + " code: " + code); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + e.printStackTrace(); + throw new IOException("Error retrieving: " + endpointWithBasePath + "/" + path + " " + e.getMessage()); + + } + setChannel(Channels.newChannel(super.getInputStream())); + } + return super.getInputStream(); + } + + @Override + public Channel getChannel() throws IOException { + if (super.getChannel() == null) { + getInputStream(); + } + return channel; + } + + @Override + public ReadableByteChannel getReadChannel() throws IOException { + // Make sure StorageIO.channel variable exists + getChannel(); + return super.getReadChannel(); + } + + @Override + public void delete() throws IOException { + // Delete is best-effort - we tell the remote server and it may or may not + // implement this call + if (!isDirectAccess()) { + throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); + } + try { + HttpDelete del = new HttpDelete(endpointWithBasePath + "/" + path); + CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); + try { + int code = response.getStatusLine().getStatusCode(); + switch (code) { + case 200: + logger.fine("Sent DELETE for " + endpointWithBasePath + "/" + path); + default: + logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); + } + } finally { + EntityUtils.consume(response.getEntity()); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + throw new IOException("Error deleting: " + endpointWithBasePath + "/" + path); + + } + + // Delete all the cached aux files as well: + deleteAllAuxObjects(); + + } + + @Override + public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { + return baseStore.openAuxChannel(auxItemTag, options); + } + + @Override + public boolean isAuxObjectCached(String auxItemTag) throws IOException { + return baseStore.isAuxObjectCached(auxItemTag); + } + + @Override + public long getAuxObjectSize(String auxItemTag) throws IOException { + return baseStore.getAuxObjectSize(auxItemTag); + } + + @Override + public Path getAuxObjectAsPath(String auxItemTag) throws IOException { + return baseStore.getAuxObjectAsPath(auxItemTag); + } + + @Override + public void backupAsAux(String auxItemTag) throws IOException { + baseStore.backupAsAux(auxItemTag); + } + + @Override + public void revertBackupAsAux(String auxItemTag) throws IOException { + baseStore.revertBackupAsAux(auxItemTag); + } + + @Override + // this method copies a local filesystem Path into this DataAccess Auxiliary + // location: + public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + baseStore.savePathAsAux(fileSystemPath, auxItemTag); + } + + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); + } + + /** + * @param inputStream InputStream we want to save + * @param auxItemTag String representing this Auxiliary type ("extension") + * @throws IOException if anything goes wrong. + */ + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag); + } + + @Override + public List listAuxObjects() throws IOException { + return baseStore.listAuxObjects(); + } + + @Override + public void deleteAuxObject(String auxItemTag) throws IOException { + baseStore.deleteAuxObject(auxItemTag); + } + + @Override + public void deleteAllAuxObjects() throws IOException { + baseStore.deleteAllAuxObjects(); + } + + @Override + public String getStorageLocation() throws IOException { + String fullStorageLocation = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStorageLocation); + int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); + if(driverIndex >=0) { + fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + if (this.getDvObject() instanceof Dataset) { + throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); + } else if (this.getDvObject() instanceof DataFile) { + fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; + } else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStorageLocation: " + fullStorageLocation); + return fullStorageLocation; + } + + @Override + public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); + } + + @Override + public boolean exists() { + logger.fine("Exists called"); + return (getSizeFromGlobus() != -1); + } + + @Override + public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: there are no write Channels associated with S3 objects."); + } + + @Override + public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); + } + + @Override + public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { + return baseStore.getAuxFileAsInputStream(auxItemTag); + } + + @Override + public boolean downloadRedirectEnabled() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + if ("true".equalsIgnoreCase(optionValue)) { + return true; + } + return false; + } + + public boolean downloadRedirectEnabled(String auxObjectTag) { + return baseStore.downloadRedirectEnabled(auxObjectTag); + } + + @Override + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) + throws IOException { + + // ToDo - support remote auxiliary Files + if (auxiliaryTag == null) { + String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); + if (secretKey == null) { + return endpointWithBasePath + "/" + path; + } else { + return UrlSignerUtil.signUrl(endpointWithBasePath + "/" + path, getUrlExpirationMinutes(), null, "GET", + secretKey); + } + } else { + return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); + } + } + + int getUrlExpirationMinutes() { + String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); + if (optionValue != null) { + Integer num; + try { + num = Integer.parseInt(optionValue); + } catch (NumberFormatException ex) { + num = null; + } + if (num != null) { + return num; + } + } + return 60; + } + + private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); + logger.info("base-uri is " + endpointWithBasePath); + if (endpointWithBasePath == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-uri is required"); + } else { + try { + new URI(endpointWithBasePath); + } catch (Exception e) { + logger.warning( + "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); + throw new IOException("Can't interpret base-url as a URI"); + } + + } + + if (baseStore == null) { + String baseDriverId = getBaseStoreIdFor(driverId); + String fullStorageLocation = null; + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if(dvObject instanceof Dataset) { + baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + } else { + if (this.getDvObject() != null) { + fullStorageLocation = getStoragePath(); + + // S3 expects :/// + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: RemoteOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + + } else if (storageLocation != null) { + // ://// + //remoteDriverId:// is removed if coming through directStorageIO + int index = storageLocation.indexOf(DataAccess.SEPARATOR); + if(index > 0) { + storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); + } + //THe base store needs the baseStoreIdentifier and not the relative URL + fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); + + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" + + fullStorageLocation; + break; + default: + logger.warning("Not Implemented: RemoteOverlay store with base store type: " + + System.getProperty("dataverse.files." + baseDriverId + ".type")); + throw new IOException("Not implemented"); + } + } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + } + if (baseDriverType.contentEquals(DataAccess.S3)) { + ((S3AccessIO) baseStore).setMainDriver(false); + } + } + remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); + try { + remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); + } catch(MalformedURLException mfue) { + logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); + } + } + + //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. + private String getStoragePath() throws IOException { + String fullStoragePath = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStoragePath); + int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); + if(driverIndex >=0) { + fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStoragePath.indexOf("//"); + if(suffixIndex >=0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); + } + if (this.getDvObject() instanceof Dataset) { + fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" + + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (this.getDvObject() instanceof DataFile) { + fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + }else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStoragePath: " + fullStoragePath); + return fullStoragePath; + } + + public CloseableHttpClient getSharedHttpClient() { + if (httpclient == null) { + try { + initHttpPool(); + httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); + + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + logger.warning(ex.getMessage()); + } + } + return httpclient; + } + + private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { + if (trustCerts) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; + + sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed + // certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + + Registry registry = RegistryBuilder.create() + .register("https", connectionFactory).build(); + cm = new PoolingHttpClientConnectionManager(registry); + } else { + cm = new PoolingHttpClientConnectionManager(); + } + cm.setDefaultMaxPerRoute(httpConcurrency); + cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); + } + + @Override + public void savePath(Path fileSystemPath) throws IOException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream) throws IOException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + + } + + @Override + public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { + throw new UnsupportedDataAccessOperationException( + "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + + } + + protected static boolean isValidIdentifier(String driverId, String storageId) { + String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); + String baseUri = System.getProperty("dataverse.files." + driverId + ".base-uri"); + try { + URI absoluteURI = new URI(baseUri + "/" + urlPath); + if(!absoluteURI.normalize().toString().startsWith(baseUri)) { + logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); + return false; + } + } catch(URISyntaxException use) { + logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); + logger.warning(use.getLocalizedMessage()); + return false; + } + return true; + } + + public static String getBaseStoreIdFor(String driverId) { + return System.getProperty("dataverse.files." + driverId + ".base-store"); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); + } + + public static void main(String[] args) { + System.out.println("Running the main method"); + if (args.length > 0) { + System.out.printf("List of arguments: {}", Arrays.toString(args)); + } + System.setProperty("dataverse.files.globus.base-uri", "12345/top"); + System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); + logger.info(JvmSettings.BASE_URI.lookup("globus")); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 66c6a4cc2ee..ee2b6779cba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -65,7 +65,7 @@ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); private StorageIO baseStore = null; - private String urlPath = null; + private String path = null; private String baseUrl = null; private static HttpClientContext localContext = HttpClientContext.create(); @@ -83,10 +83,10 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) this.setIsLocalFile(false); configureStores(req, driverId, null); logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); - urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); - validatePath(urlPath); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); - logger.fine("Base URL: " + urlPath); + logger.fine("Base URL: " + path); } public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { @@ -94,14 +94,14 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE this.setIsLocalFile(false); configureStores(null, driverId, storageLocation); - urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); - validatePath(urlPath); - logger.fine("Base URL: " + urlPath); + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Base URL: " + path); } - private void validatePath(String path) throws IOException { + private void validatePath(String relPath) throws IOException { try { - URI absoluteURI = new URI(baseUrl + "/" + urlPath); + URI absoluteURI = new URI(baseUrl + "/" + relPath); if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); } @@ -182,7 +182,7 @@ public void open(DataAccessOption... options) throws IOException { private long getSizeFromHttpHeader() { long size = -1; - HttpHead head = new HttpHead(baseUrl + "/" + urlPath); + HttpHead head = new HttpHead(baseUrl + "/" + path); try { CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); @@ -224,12 +224,12 @@ public InputStream getInputStream() throws IOException { break; default: logger.warning("Response from " + get.getURI().toString() + " was " + code); - throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code); + throw new IOException("Cannot retrieve: " + baseUrl + "/" + path + " code: " + code); } } catch (Exception e) { logger.warning(e.getMessage()); e.printStackTrace(); - throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage()); + throw new IOException("Error retrieving: " + baseUrl + "/" + path + " " + e.getMessage()); } setChannel(Channels.newChannel(super.getInputStream())); @@ -260,13 +260,13 @@ public void delete() throws IOException { throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); } try { - HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath); + HttpDelete del = new HttpDelete(baseUrl + "/" + path); CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); try { int code = response.getStatusLine().getStatusCode(); switch (code) { case 200: - logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath); + logger.fine("Sent DELETE for " + baseUrl + "/" + path); default: logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); } @@ -275,7 +275,7 @@ public void delete() throws IOException { } } catch (Exception e) { logger.warning(e.getMessage()); - throw new IOException("Error deleting: " + baseUrl + "/" + urlPath); + throw new IOException("Error deleting: " + baseUrl + "/" + path); } @@ -420,9 +420,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary if (auxiliaryTag == null) { String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); if (secretKey == null) { - return baseUrl + "/" + urlPath; + return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 86130f5146e..4fb895f5adc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -47,6 +47,8 @@ public enum JvmSettings { // FILES SETTINGS SCOPE_FILES(PREFIX, "files"), FILES_DIRECTORY(SCOPE_FILES, "directory"), + FILES(SCOPE_FILES), + BASE_URI(FILES, "base-uri"), // SOLR INDEX SETTINGS SCOPE_SOLR(PREFIX, "solr"), From 2c4c927cc8f20d53ee1aaaf1979b793ee53f9b3f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 May 2023 14:13:02 -0400 Subject: [PATCH 004/546] add token --- .../dataaccess/GlobusOverlayAccessIO.java | 171 +++++++++++------- .../iq/dataverse/settings/JvmSettings.java | 1 + 2 files changed, 111 insertions(+), 61 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index fe62e25ad6f..050b9ddc176 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.UrlSignerUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.FileNotFoundException; import java.io.IOException; @@ -31,6 +32,7 @@ import java.util.logging.Logger; import org.apache.commons.lang3.NotImplementedException; +import org.apache.http.client.ClientProtocolException; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; @@ -49,6 +51,7 @@ import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; +import javax.json.JsonObject; import javax.net.ssl.SSLContext; /** @@ -58,8 +61,8 @@ /* * Globus Overlay Driver * - * StorageIdentifier format: - * :///// + * StorageIdentifier format: :///// */ public class GlobusOverlayAccessIO extends StorageIO { @@ -68,6 +71,7 @@ public class GlobusOverlayAccessIO extends StorageIO { private StorageIO baseStore = null; private String path = null; private String endpointWithBasePath = null; + private String globusToken = null; private static HttpClientContext localContext = HttpClientContext.create(); private PoolingHttpClientConnectionManager cm = null; @@ -86,7 +90,7 @@ public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); validatePath(path); - + logger.fine("Relative path: " + path); } @@ -99,18 +103,17 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE validatePath(path); logger.fine("Relative path: " + path); } - + private void validatePath(String relPath) throws IOException { try { URI absoluteURI = new URI(endpointWithBasePath + "/" + relPath); - if(!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) { + if (!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) { throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s endpoint/basePath"); } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); } - } - + } @Override public void open(DataAccessOption... options) throws IOException { @@ -181,37 +184,64 @@ public void open(DataAccessOption... options) throws IOException { } } + // Call the Globus API to get the file size private long getSizeFromGlobus() { - throw new NotImplementedException(); - /* - long size = -1; - HttpHead head = new HttpHead(endpointWithBasePath + "/" + path); + // Construct Globus URL + URI absoluteURI = null; try { - CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); - - try { - int code = response.getStatusLine().getStatusCode(); - logger.fine("Response for HEAD: " + code); - switch (code) { - case 200: - Header[] headers = response.getHeaders(HTTP.CONTENT_LEN); - logger.fine("Num headers: " + headers.length); - String sizeString = response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); - logger.fine("Content-Length: " + sizeString); - size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); - logger.fine("Found file size: " + size); - break; - default: - logger.warning("Response from " + head.getURI().toString() + " was " + code); - } - } finally { - EntityUtils.consume(response.getEntity()); + int filenameStart = path.lastIndexOf("/") + 1; + int pathStart = endpointWithBasePath.indexOf("/") + 1; + + String directoryPath = (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") + + path.substring(0, filenameStart); + String filename = path.substring(filenameStart); + String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath; + + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + path + "&filter=name:" + filename); + HttpGet get = new HttpGet(absoluteURI); + String token = JvmSettings.GLOBUS_TOKEN.lookup(driverId); + logger.info("Token is " + token); + get.addHeader("Authorization", "Bearer " + token); + CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); + if (response.getStatusLine().getStatusCode() == 200) { + //Get reponse as string + String responseString = EntityUtils.toString(response.getEntity()); + logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); + JsonObject responseJson = JsonUtil.getJsonObject(responseString); + return (long) responseJson.getInt("size"); + } else { + logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); + logger.info(EntityUtils.toString(response.getEntity())); } + } catch (URISyntaxException e) { + // Should have been caught in validatePath + e.printStackTrace(); + } catch (ClientProtocolException e) { + // TODO Auto-generated catch block + e.printStackTrace(); } catch (IOException e) { - logger.warning(e.getMessage()); + // TODO Auto-generated catch block + e.printStackTrace(); } - return size; - */ + return -1; + + /* + * long size = -1; HttpHead head = new HttpHead(endpointWithBasePath + "/" + + * path); try { CloseableHttpResponse response = + * getSharedHttpClient().execute(head, localContext); + * + * try { int code = response.getStatusLine().getStatusCode(); + * logger.fine("Response for HEAD: " + code); switch (code) { case 200: Header[] + * headers = response.getHeaders(HTTP.CONTENT_LEN); logger.fine("Num headers: " + * + headers.length); String sizeString = + * response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); + * logger.fine("Content-Length: " + sizeString); size = + * Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); + * logger.fine("Found file size: " + size); break; default: + * logger.warning("Response from " + head.getURI().toString() + " was " + code); + * } } finally { EntityUtils.consume(response.getEntity()); } } catch + * (IOException e) { logger.warning(e.getMessage()); } return size; + */ } @Override @@ -360,8 +390,9 @@ public String getStorageLocation() throws IOException { String fullStorageLocation = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStorageLocation); int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + if (driverIndex >= 0) { + fullStorageLocation = fullStorageLocation + .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); } if (this.getDvObject() instanceof Dataset) { throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); @@ -411,7 +442,7 @@ public boolean downloadRedirectEnabled() { } return false; } - + public boolean downloadRedirectEnabled(String auxObjectTag) { return baseStore.downloadRedirectEnabled(auxObjectTag); } @@ -469,9 +500,10 @@ private void configureStores(DataAccessRequest req, String driverId, String stor if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if(dvObject instanceof Dataset) { + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if (dvObject instanceof Dataset) { baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); } else { if (this.getDvObject() != null) { @@ -486,8 +518,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + + "/" + fullStorageLocation; break; default: logger.warning("Not Implemented: RemoteOverlay store with base store type: " @@ -497,12 +529,12 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } else if (storageLocation != null) { // ://// - //remoteDriverId:// is removed if coming through directStorageIO + // remoteDriverId:// is removed if coming through directStorageIO int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if(index > 0) { + if (index > 0) { storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); } - //THe base store needs the baseStoreIdentifier and not the relative URL + // THe base store needs the baseStoreIdentifier and not the relative URL fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); switch (baseDriverType) { @@ -513,8 +545,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + + "/" + fullStorageLocation; break; default: logger.warning("Not Implemented: RemoteOverlay store with base store type: " @@ -530,37 +562,41 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); try { - remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); - } catch(MalformedURLException mfue) { + remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); + } catch (MalformedURLException mfue) { logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); } } - //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. + // Convenience method to assemble the path, starting with the DOI + // authority/identifier/, that is needed to create a base store via + // DataAccess.getDirectStorageIO - the caller has to add the store type specific + // prefix required. private String getStoragePath() throws IOException { String fullStoragePath = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStoragePath); int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + if (driverIndex >= 0) { + fullStoragePath = fullStoragePath + .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); } int suffixIndex = fullStoragePath.indexOf("//"); - if(suffixIndex >=0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); + if (suffixIndex >= 0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); } if (this.getDvObject() instanceof Dataset) { fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; } else if (this.getDvObject() instanceof DataFile) { fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - }else if (dvObject instanceof Dataverse) { + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (dvObject instanceof Dataverse) { throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); } logger.fine("fullStoragePath: " + fullStoragePath); return fullStoragePath; } - + public CloseableHttpClient getSharedHttpClient() { if (httpclient == null) { try { @@ -622,11 +658,11 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { String baseUri = System.getProperty("dataverse.files." + driverId + ".base-uri"); try { URI absoluteURI = new URI(baseUri + "/" + urlPath); - if(!absoluteURI.normalize().toString().startsWith(baseUri)) { + if (!absoluteURI.normalize().toString().startsWith(baseUri)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); logger.warning(use.getLocalizedMessage()); return false; @@ -642,14 +678,27 @@ public static String getBaseStoreIdFor(String driverId) { public List cleanUp(Predicate filter, boolean dryRun) throws IOException { return baseStore.cleanUp(filter, dryRun); } - + public static void main(String[] args) { System.out.println("Running the main method"); if (args.length > 0) { System.out.printf("List of arguments: {}", Arrays.toString(args)); } - System.setProperty("dataverse.files.globus.base-uri", "12345/top"); + System.setProperty("dataverse.files.globus.base-uri", "2791b83e-b989-47c5-a7fa-ce65fd949522"); System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); + System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOlprRmxGejNTWDlkTVpUNk92ZmVJaFQyTWY0SDd4cXBoTDNSS29vUmRGVlE9"); + System.setProperty("dataverse.files.globus.base-store","file"); + System.setProperty("dataverse.files.file.type", + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + System.setProperty("dataverse.files.file.directory", "/tmp/files"); logger.info(JvmSettings.BASE_URI.lookup("globus")); + try { + GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO("globus://1234//2791b83e-b989-47c5-a7fa-ce65fd949522/hdc1/image001.mrc", "globus"); + logger.info("Size is " + gsio.getSizeFromGlobus()); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 4fb895f5adc..eac8411c939 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -49,6 +49,7 @@ public enum JvmSettings { FILES_DIRECTORY(SCOPE_FILES, "directory"), FILES(SCOPE_FILES), BASE_URI(FILES, "base-uri"), + GLOBUS_TOKEN(FILES, "globus-token"), // SOLR INDEX SETTINGS SCOPE_SOLR(PREFIX, "solr"), From 3c3378f5a3bf39eff13a582d0dc52a2a5549af8f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 May 2023 14:53:25 -0400 Subject: [PATCH 005/546] start refactoring Globus bean --- .../dataaccess/GlobusOverlayAccessIO.java | 28 +++++++++----- .../iq/dataverse/globus/AccessToken.java | 2 +- .../dataverse/globus/GlobusServiceBean.java | 37 +++++++++++-------- 3 files changed, 41 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 050b9ddc176..0d7c5458e14 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -5,6 +5,8 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.globus.AccessToken; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; @@ -28,10 +30,8 @@ import java.util.Arrays; import java.util.List; import java.util.function.Predicate; -import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.commons.lang3.NotImplementedException; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; @@ -83,6 +83,8 @@ public class GlobusOverlayAccessIO extends StorageIO { private static boolean trustCerts = false; private int httpConcurrency = 4; + private String globusAccessToken = null; + public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); this.setIsLocalFile(false); @@ -190,18 +192,19 @@ private long getSizeFromGlobus() { URI absoluteURI = null; try { int filenameStart = path.lastIndexOf("/") + 1; - int pathStart = endpointWithBasePath.indexOf("/") + 1; - - String directoryPath = (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") + int pathStart = endpointWithBasePath.indexOf("/"); +logger.info("endpointWithBasePath: " + endpointWithBasePath); + String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") + path.substring(0, filenameStart); + logger.info("directoryPath: " + directoryPath); String filename = path.substring(filenameStart); String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath; - absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + path + "&filter=name:" + filename); + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + directoryPath + "&filter=name:" + filename); HttpGet get = new HttpGet(absoluteURI); - String token = JvmSettings.GLOBUS_TOKEN.lookup(driverId); - logger.info("Token is " + token); - get.addHeader("Authorization", "Bearer " + token); + + logger.info("Token is " + globusAccessToken); + get.addHeader("Authorization", "Bearer " + globusAccessToken); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); if (response.getStatusLine().getStatusCode() == 200) { //Get reponse as string @@ -482,6 +485,8 @@ int getUrlExpirationMinutes() { } private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + AccessToken accessToken = GlobusServiceBean.getClientToken(JvmSettings.GLOBUS_TOKEN.lookup(driverId)); + globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken(); endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); logger.info("base-uri is " + endpointWithBasePath); if (endpointWithBasePath == null) { @@ -692,8 +697,11 @@ public static void main(String[] args) { DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.file.directory", "/tmp/files"); logger.info(JvmSettings.BASE_URI.lookup("globus")); + + + try { - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO("globus://1234//2791b83e-b989-47c5-a7fa-ce65fd949522/hdc1/image001.mrc", "globus"); + GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO("globus://1234///hdc1/image001.mrc", "globus"); logger.info("Size is " + gsio.getSizeFromGlobus()); } catch (IOException e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java index 877fc68e4a1..c93e2c6aa94 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java @@ -46,7 +46,7 @@ String getRefreshToken() { return refreshToken; } - ArrayList getOtherTokens() { + public ArrayList getOtherTokens() { return otherTokens; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 9d80c5cc280..c2137dd1f47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -167,7 +167,8 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { if (ruleId.length() > 0) { - AccessToken clientTokenUser = getClientToken(); + AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); + globusLogger.info("Start deleting permissions."); String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); @@ -264,15 +265,21 @@ public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger glo return task; } - public AccessToken getClientToken() throws MalformedURLException { - String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - URL url = new URL( - "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); - - MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + public static AccessToken getClientToken(String globusBasicToken) { + URL url; AccessToken clientTokenUser = null; - if (result.status == 200) { - clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + + try { + url = new URL( + "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); + + MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + if (result.status == 200) { + clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + } + } catch (MalformedURLException e) { + // On a statically defined URL... + e.printStackTrace(); } return clientTokenUser; } @@ -306,7 +313,7 @@ public AccessToken getAccessToken(HttpServletRequest origRequest, String globusB } - public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, + public static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { String str = null; HttpURLConnection connection = null; @@ -359,7 +366,7 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode } - private StringBuilder readResultJson(InputStream in) { + private static StringBuilder readResultJson(InputStream in) { StringBuilder sb = null; try { @@ -378,7 +385,7 @@ private StringBuilder readResultJson(InputStream in) { return sb; } - private T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { + private static T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { if (sb != null) { Gson gson = null; if (namingPolicy) { @@ -420,7 +427,7 @@ public String getDirectory(String datasetId) { } - class MakeRequestResponse { + static class MakeRequestResponse { public String jsonResponse; public int status; @@ -451,7 +458,7 @@ public boolean giveGlobusPublicPermissions(String datasetId) if (globusEndpoint.equals("") || globusBasicToken.equals("")) { return false; } - AccessToken clientTokenUser = getClientToken(); + AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); if (clientTokenUser == null) { logger.severe("Cannot get client token "); return false; @@ -908,7 +915,7 @@ private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws try { globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(pollingInterval * 1000); - AccessToken clientTokenUser = getClientToken(); + AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); task = getTask(clientTokenUser, taskId, globusLogger); if (task != null) { From f14b75454a524fd8816d6f5367b0e15fbd0ded92 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 May 2023 14:53:56 -0400 Subject: [PATCH 006/546] enable globus store main() to run - will revert --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e5b191f0ed7..4926f59f8a0 100644 --- a/pom.xml +++ b/pom.xml @@ -184,7 +184,7 @@ org.glassfish jakarta.json - provided + From 502e660fe342939a617edd6d17a425c83b5a269b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 May 2023 13:22:46 -0400 Subject: [PATCH 007/546] suppress thumb generation after a failure --- .../edu/harvard/iq/dataverse/DvObject.java | 14 +++++ .../dataaccess/ImageThumbConverter.java | 55 ++++++++++++------- .../dataverse/ingest/IngestServiceBean.java | 4 +- .../V5.13.0.1__9506-track-thumb-failures.sql | 1 + 4 files changed, 54 insertions(+), 20 deletions(-) create mode 100644 src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 854888737ee..6cb3816e3f1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -181,7 +181,20 @@ public boolean isPreviewImageAvailable() { public void setPreviewImageAvailable(boolean status) { this.previewImageAvailable = status; } + + /** Indicates whether a previous attempt to generate a preview image has failed, regardless of size. + * If so, we won't want to try again every time the preview/thumbnail is requested for a view. + */ + private boolean previewsHaveFailed; + + public boolean isPreviewsHaveFailed() { + return previewsHaveFailed; + } + public void setPreviewsHaveFailed(boolean previewsHaveFailed) { + this.previewsHaveFailed = previewsHaveFailed; + } + public Timestamp getModificationTime() { return modificationTime; } @@ -462,6 +475,7 @@ public void setStorageIdentifier(String storageIdentifier) { */ public abstract boolean isAncestorOf( DvObject other ); + @OneToMany(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true) List roleAssignments; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 2b4aed3a9a5..eb08646454d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -48,6 +48,7 @@ import java.nio.channels.WritableByteChannel; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.io.IOUtils; //import org.primefaces.util.Base64; @@ -110,15 +111,24 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } if (isThumbnailCached(storageIO, size)) { + logger.fine("Found cached thumbnail for " + file.getId()); return true; } - logger.fine("Checking for thumbnail, file type: " + file.getContentType()); - - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - return generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - return generatePDFThumbnail(storageIO, size); + logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate thumbnail, file id: " + file.getId()); + // Don't try to generate if there have been failures: + if (!file.isPreviewsHaveFailed()) { + boolean thumbnailGenerated = false; + if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { + thumbnailGenerated = generateImageThumbnail(storageIO, size); + } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { + thumbnailGenerated = generatePDFThumbnail(storageIO, size); + } + if (!thumbnailGenerated) { + logger.fine("No thumbnail generated for " + file.getId()); + file.setPreviewGenerationHasPreviouslyFailed(true); + } + return thumbnailGenerated; } return false; @@ -436,20 +446,27 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { if (cachedThumbnailChannel == null) { logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size); - // try to generate, if not available: - boolean generated = false; - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - generated = generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - generated = generatePDFThumbnail(storageIO, size); - } + // try to generate, if not available and hasn't failed before + logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate base64 thumbnail, file id: " + file.getId()); + if (!file.isPreviewsHaveFailed()) { + boolean generated = false; + if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { + generated = generateImageThumbnail(storageIO, size); + } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { + generated = generatePDFThumbnail(storageIO, size); + } - if (generated) { - // try to open again: - try { - cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); - } catch (Exception ioEx) { - cachedThumbnailChannel = null; + if (!generated) { + // Record failure + logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId()); + file.setPreviewGenerationHasPreviouslyFailed(true); + } else { + // Success - try to open again: + try { + cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); + } catch (Exception ioEx) { + cachedThumbnailChannel = null; + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 5a353453fe8..fbe2d7b38ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -292,7 +292,9 @@ public List saveAndAddFilesToDataset(DatasetVersion version, } catch (IOException ioex) { logger.warning("Failed to save generated file " + generated.toString()); - } + //Shouldn't mark this file as having a preview after this. + dataFile.setPreviewImageAvailable(false); + } } // ... but we definitely want to delete it: diff --git a/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql new file mode 100644 index 00000000000..9b12d27db91 --- /dev/null +++ b/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql @@ -0,0 +1 @@ +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewshavefailed BOOLEAN DEFAULT FALSE; \ No newline at end of file From 0fea5ccca11b2348429ddfee75e4bafc709c7473 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 12 May 2023 13:25:38 -0400 Subject: [PATCH 008/546] refactor error --- .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index eb08646454d..254c334d655 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -126,7 +126,7 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } if (!thumbnailGenerated) { logger.fine("No thumbnail generated for " + file.getId()); - file.setPreviewGenerationHasPreviouslyFailed(true); + file.setPreviewsHaveFailed(true); } return thumbnailGenerated; } @@ -459,7 +459,7 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { if (!generated) { // Record failure logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId()); - file.setPreviewGenerationHasPreviouslyFailed(true); + file.setPreviewsHaveFailed(true); } else { // Success - try to open again: try { From 8f5350ae0df4df60c55ff770259531935cb6ac9b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 15 May 2023 10:32:21 -0400 Subject: [PATCH 009/546] cache isThumb available --- .../iq/dataverse/ThumbnailServiceWrapper.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 6c8db8c124b..e2bb21c8a4c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -49,6 +49,7 @@ public class ThumbnailServiceWrapper implements java.io.Serializable { private Map dvobjectThumbnailsMap = new HashMap<>(); private Map dvobjectViewMap = new HashMap<>(); + private Map hasThumbMap = new HashMap<>(); private String getAssignedDatasetImage(Dataset dataset, int size) { if (dataset == null) { @@ -133,7 +134,7 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) { if ((!((DataFile)result.getEntity()).isRestricted() || permissionsWrapper.hasDownloadFilePermission(result.getEntity())) - && dataFileService.isThumbnailAvailable((DataFile) result.getEntity())) { + && isThumbnailAvailable((DataFile) result.getEntity())) { cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( (DataFile) result.getEntity(), @@ -159,6 +160,13 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) { return null; } + public boolean isThumbnailAvailable(DataFile entity) { + if(!hasThumbMap.containsKey(entity.getId())) { + hasThumbMap.put(entity.getId(), dataFileService.isThumbnailAvailable(entity)); + } + return hasThumbMap.get(entity.getId()); + } + // it's the responsibility of the user - to make sure the search result // passed to this method is of the Dataset type! public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { @@ -295,7 +303,7 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo } } - if (dataFileService.isThumbnailAvailable(thumbnailImageFile)) { + if (isThumbnailAvailable(thumbnailImageFile)) { cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( thumbnailImageFile, size); @@ -323,6 +331,7 @@ public String getDataverseCardImageAsBase64Url(SolrSearchResult result) { public void resetObjectMaps() { dvobjectThumbnailsMap = new HashMap<>(); dvobjectViewMap = new HashMap<>(); + hasThumbMap = new HashMap<>(); } From 8604eef7f470eade8dbf885ed42bc47407db74ff Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 15 May 2023 13:22:18 -0400 Subject: [PATCH 010/546] set thumb fail column --- .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 5 ++++- .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 196f84b6877..a5822828682 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1127,7 +1127,7 @@ public boolean isThumbnailAvailable (DataFile file) { } // If thumbnails are not even supported for this class of files, - // there's notthing to talk about: + // there's nothing to talk about: if (!FileUtil.isThumbnailSupported(file)) { return false; } @@ -1149,6 +1149,9 @@ public boolean isThumbnailAvailable (DataFile file) { file.setPreviewImageAvailable(true); this.save(file); return true; + } else { + file.setPreviewsHaveFailed(true); + this.save(file); } return false; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 254c334d655..ab9294eea72 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -115,7 +115,7 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s return true; } - logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate thumbnail, file id: " + file.getId()); + logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); // Don't try to generate if there have been failures: if (!file.isPreviewsHaveFailed()) { boolean thumbnailGenerated = false; From aeae8f4ddbb05794c177e9b1d33725e1ed7d7e2f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 15 May 2023 13:50:49 -0400 Subject: [PATCH 011/546] use thumb wrapper in edit and view files --- src/main/webapp/editFilesFragment.xhtml | 4 ++-- src/main/webapp/file-info-fragment.xhtml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index a4e635b8c14..af06b44e3bc 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -360,13 +360,13 @@
- - + #{fileMetadata.label} diff --git a/src/main/webapp/file-info-fragment.xhtml b/src/main/webapp/file-info-fragment.xhtml index 33a8d2c3ca5..3e8e80d51e7 100644 --- a/src/main/webapp/file-info-fragment.xhtml +++ b/src/main/webapp/file-info-fragment.xhtml @@ -28,8 +28,8 @@
- - + From c4ad20bc4b67b93908e60b76a251240f4a6e2540 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 17 May 2023 13:49:35 -0400 Subject: [PATCH 012/546] add api --- .../edu/harvard/iq/dataverse/api/Admin.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d219339add9..14c556e9caa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2321,4 +2321,26 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl)); } + @DELETE + @Path("/clearThumbnailFailureFlag") + public Response clearThumbnailFailureFlag() { + em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE").executeUpdate(); + return ok("Thumnail Failure Flags cleared."); + } + + @DELETE + @Path("/clearThumbnailFailureFlag/{id}") + public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String fileId) { + try { + DataFile df = findDataFileOrDie(fileId); + Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE where id = ?"); + deleteQuery.setParameter(1, df.getId()); + deleteQuery.executeUpdate(); + return ok("Thumnail Failure Flag cleared for file id=: " + df.getId() + "."); + } catch (WrappedResponse r) { + logger.info("Could not find file with the id: " + fileId); + return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId); + } + } + } From 63e98b3b60a4baae98f1f88a282b97694929c443 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 17 May 2023 14:16:47 -0400 Subject: [PATCH 013/546] make clearer --- .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index a5822828682..f41565c9449 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1149,11 +1149,9 @@ public boolean isThumbnailAvailable (DataFile file) { file.setPreviewImageAvailable(true); this.save(file); return true; - } else { - file.setPreviewsHaveFailed(true); - this.save(file); } - + file.setPreviewsHaveFailed(true); + this.save(file); return false; } From 2671cb75effb5425d02b3e874c7525b7833dc533 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 17 May 2023 14:25:58 -0400 Subject: [PATCH 014/546] update comment --- src/main/java/edu/harvard/iq/dataverse/DvObject.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 6cb3816e3f1..87619450133 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -182,8 +182,11 @@ public void setPreviewImageAvailable(boolean status) { this.previewImageAvailable = status; } - /** Indicates whether a previous attempt to generate a preview image has failed, regardless of size. - * If so, we won't want to try again every time the preview/thumbnail is requested for a view. + /** + * Indicates whether a previous attempt to generate a preview image has failed, + * regardless of size. This could be due to the file not being accessible, or a + * real failure in generating the thumbnail. In both cases, we won't want to try + * again every time the preview/thumbnail is requested for a view. */ private boolean previewsHaveFailed; From 19db99b1427700c9cc4ad462c0edd017e6dd5799 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 17 May 2023 14:26:28 -0400 Subject: [PATCH 015/546] remove setting flag where datafile is not clearly being saved to db --- .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index ab9294eea72..921faba7989 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -126,7 +126,6 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } if (!thumbnailGenerated) { logger.fine("No thumbnail generated for " + file.getId()); - file.setPreviewsHaveFailed(true); } return thumbnailGenerated; } @@ -459,7 +458,6 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { if (!generated) { // Record failure logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId()); - file.setPreviewsHaveFailed(true); } else { // Success - try to open again: try { From 156d025970eeb5223b6fd8343db09cafee057fed Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 1 Jun 2023 15:09:25 -0400 Subject: [PATCH 016/546] fix non-merge-able error when recording thumb fail --- .../iq/dataverse/DataFileServiceBean.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index f41565c9449..880b2ea7dc4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1142,17 +1142,17 @@ public boolean isThumbnailAvailable (DataFile file) { is more important... */ - - if (ImageThumbConverter.isThumbnailAvailable(file)) { - file = this.find(file.getId()); - file.setPreviewImageAvailable(true); - this.save(file); - return true; - } - file.setPreviewsHaveFailed(true); - this.save(file); - return false; + file = this.find(file.getId()); + if (ImageThumbConverter.isThumbnailAvailable(file)) { + file.setPreviewImageAvailable(true); + this.save(file); + return true; + } else { + file.setPreviewsHaveFailed(true); + this.save(file); + return false; + } } From 97aa46cb3e9bd2d424961e68e9d024216740c57f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 13 Jun 2023 16:50:38 -0400 Subject: [PATCH 017/546] rename script --- ...humb-failures.sql => V5.13.0.2__9506-track-thumb-failures.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.13.0.1__9506-track-thumb-failures.sql => V5.13.0.2__9506-track-thumb-failures.sql} (100%) diff --git a/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V5.13.0.2__9506-track-thumb-failures.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql rename to src/main/resources/db/migration/V5.13.0.2__9506-track-thumb-failures.sql From dbc36c9d938571a5b61156611c445d266fbafe76 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 13 Jun 2023 17:06:19 -0400 Subject: [PATCH 018/546] refactor - remove duplicate code --- .../dataaccess/ImageThumbConverter.java | 29 ++++++------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 921faba7989..fb0785ffd7b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -114,7 +114,11 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s logger.fine("Found cached thumbnail for " + file.getId()); return true; } + return generateThumbnail(storageIO, size); + } + + private static boolean generateThumbnail(StorageIO storageIO, int size) { logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); // Don't try to generate if there have been failures: if (!file.isPreviewsHaveFailed()) { @@ -131,7 +135,6 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } return false; - } // Note that this method works on ALL file types for which thumbnail @@ -446,25 +449,11 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size); // try to generate, if not available and hasn't failed before - logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate base64 thumbnail, file id: " + file.getId()); - if (!file.isPreviewsHaveFailed()) { - boolean generated = false; - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - generated = generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - generated = generatePDFThumbnail(storageIO, size); - } - - if (!generated) { - // Record failure - logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId()); - } else { - // Success - try to open again: - try { - cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); - } catch (Exception ioEx) { - cachedThumbnailChannel = null; - } + if(generateThumbnail(storageIO, size)) { + try { + cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); + } catch (Exception ioEx) { + cachedThumbnailChannel = null; } } From 0c8972304a43c25ed1de1c5cc6cc1c09ef419948 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 14 Jun 2023 10:30:05 -0400 Subject: [PATCH 019/546] try ds logos as url requests --- .../iq/dataverse/ThumbnailServiceWrapper.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index e2bb21c8a4c..66f79472178 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -5,6 +5,7 @@ */ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.api.Datasets; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; @@ -12,7 +13,8 @@ import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail; import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.util.FileUtil; -import java.io.File; +import edu.harvard.iq.dataverse.util.SystemConfig; + import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -21,6 +23,8 @@ import java.util.Base64; import java.util.HashMap; import java.util.Map; +import java.util.logging.Logger; + import javax.ejb.EJB; import javax.enterprise.context.RequestScoped; import javax.faces.view.ViewScoped; @@ -36,6 +40,9 @@ @RequestScoped @Named public class ThumbnailServiceWrapper implements java.io.Serializable { + + private static final Logger logger = Logger.getLogger(ThumbnailServiceWrapper.class.getCanonicalName()); + @Inject PermissionsWrapper permissionsWrapper; @EJB @@ -214,7 +221,13 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo this.dvobjectThumbnailsMap.put(datasetId, ""); return null; } + + String url = SystemConfig.getDataverseSiteUrlStatic() + "/datasets/" + dataset.getId() + "/logo"; + logger.fine("getDatasetCardImageAsBase64Url: " + url); + this.dvobjectThumbnailsMap.put(datasetId,url); + return url; +/* String cardImageUrl = null; StorageIO dataAccess = null; @@ -320,6 +333,7 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo //logger.info("dataset id " + result.getEntityId() + ", returning " + cardImageUrl); return cardImageUrl; + */ } // it's the responsibility of the user - to make sure the search result From dc4b6ae5201af228b1b484c6dd430713f8728ccc Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 14 Jun 2023 17:19:41 -0400 Subject: [PATCH 020/546] set the datasetid for search cards --- .../java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 66f79472178..4c3778527d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -194,6 +194,7 @@ public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { return null; } Dataset dataset = (Dataset)result.getEntity(); + dataset.setId(result.getEntityId()); Long versionId = result.getDatasetVersionId(); From 546cfdf2048158320e76a9345e9ebc3caf7ca6c2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 1 Jun 2023 15:09:25 -0400 Subject: [PATCH 021/546] fix non-merge-able error when recording thumb fail --- .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 880b2ea7dc4..ec12480d28d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1148,11 +1148,10 @@ public boolean isThumbnailAvailable (DataFile file) { file.setPreviewImageAvailable(true); this.save(file); return true; - } else { - file.setPreviewsHaveFailed(true); - this.save(file); - return false; } + file.setPreviewsHaveFailed(true); + this.save(file); + return false; } From d3a48dffdfaa56bba065b3c36a2b6469e4227c33 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 14 Jun 2023 17:44:02 -0400 Subject: [PATCH 022/546] typo --- .../java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 4c3778527d7..8dda91fd6a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -223,7 +223,7 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo return null; } - String url = SystemConfig.getDataverseSiteUrlStatic() + "/datasets/" + dataset.getId() + "/logo"; + String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; logger.fine("getDatasetCardImageAsBase64Url: " + url); this.dvobjectThumbnailsMap.put(datasetId,url); return url; From f505428f12a5ead774642837bdb871deda34ee27 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 19 Jun 2023 13:13:01 -0400 Subject: [PATCH 023/546] only send url if thumb should exist --- .../iq/dataverse/ThumbnailServiceWrapper.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 8dda91fd6a3..19c53ffa77e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.api.Datasets; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataset.DatasetUtil; import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail; @@ -222,6 +223,20 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo this.dvobjectThumbnailsMap.put(datasetId, ""); return null; } + DataFile thumbnailFile = dataset.getThumbnailFile(); + + if (thumbnailFile == null) { + thumbnailFile = DatasetUtil.attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null); + if (thumbnailFile == null) { + logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo available that could be selected automatically."); + return null; + } + } + if (thumbnailFile.isRestricted()) { + logger.fine("Dataset (id :" + dataset.getId() + ") has a logo the user selected but the file must have later been restricted. Returning null."); + return null; + } + String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; logger.fine("getDatasetCardImageAsBase64Url: " + url); From 2d177a60fe67df26bafad35cf237e048a21545ee Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 19 Jun 2023 15:08:15 -0400 Subject: [PATCH 024/546] use inputStream.transferTo --- .../dataaccess/ImageThumbConverter.java | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index fb0785ffd7b..bd87c5541a5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -223,30 +223,32 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s } if (tempFilesRequired) { - ReadableByteChannel pdfFileChannel; - + //ReadableByteChannel pdfFileChannel; + InputStream inputStream = null; try { storageIO.open(); - //inputStream = storageIO.getInputStream(); - pdfFileChannel = storageIO.getReadChannel(); + inputStream = storageIO.getInputStream(); + //pdfFileChannel = storageIO.getReadChannel(); } catch (Exception ioex) { logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier()); return false; } File tempFile; - FileChannel tempFileChannel = null; + OutputStream outputStream = null; + //FileChannel tempFileChannel = null; try { tempFile = File.createTempFile("tempFileToRescale", ".tmp"); - tempFileChannel = new FileOutputStream(tempFile).getChannel(); + outputStream = new FileOutputStream(tempFile); + inputStream.transferTo(outputStream); - tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); + //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); } catch (IOException ioex) { logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file."); return false; } finally { - IOUtils.closeQuietly(tempFileChannel); - IOUtils.closeQuietly(pdfFileChannel); + IOUtils.closeQuietly(inputStream); + IOUtils.closeQuietly(outputStream); } sourcePdfFile = tempFile; } From 6540b5da0966addffa3a0a6a9d7e67735f89e237 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 19 Jun 2023 15:42:29 -0400 Subject: [PATCH 025/546] add debug --- .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index bd87c5541a5..4a2b8ea0e6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -240,7 +240,8 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s try { tempFile = File.createTempFile("tempFileToRescale", ".tmp"); outputStream = new FileOutputStream(tempFile); - inputStream.transferTo(outputStream); + long sz = inputStream.transferTo(outputStream); + logger.info(" wrote " + sz + " bytes to " + tempFile.getAbsolutePath()); //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); } catch (IOException ioex) { @@ -763,7 +764,7 @@ public static String generatePDFThumbnailFromFile(String fileLocation, int size) try { fileSize = new File(fileLocation).length(); } catch (Exception ex) { - // + logger.warning("Can't open file: " + fileLocation); } if (fileSize == 0 || fileSize > sizeLimit) { From e202d0abc7395fe85218745510b32ade9b6ca770 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 19 Jun 2023 16:15:58 -0400 Subject: [PATCH 026/546] more debug --- .../iq/dataverse/dataaccess/ImageThumbConverter.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 4a2b8ea0e6d..3033269f3bc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -196,6 +196,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s // We rely on ImageMagick to convert PDFs; so if it's not installed, // better give up right away: if (!isImageMagickInstalled()) { + logger.info("Couldn't find IM"); return false; } @@ -218,12 +219,15 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s tempFilesRequired = true; } catch (IOException ioex) { + logger.warning(ioex.getMessage()); + ioex.printStackTrace(); // this on the other hand is likely a fatal condition :( return false; } if (tempFilesRequired) { //ReadableByteChannel pdfFileChannel; + logger.info("Creating temp file"); InputStream inputStream = null; try { storageIO.open(); @@ -241,7 +245,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s tempFile = File.createTempFile("tempFileToRescale", ".tmp"); outputStream = new FileOutputStream(tempFile); long sz = inputStream.transferTo(outputStream); - logger.info(" wrote " + sz + " bytes to " + tempFile.getAbsolutePath()); + logger.info("Wrote " + sz + " bytes to " + tempFile.getAbsolutePath()); //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); } catch (IOException ioex) { From b9cd2bbf0c42fb4e7aada29d7cea817c195ca75d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 20 Jun 2023 10:22:05 -0400 Subject: [PATCH 027/546] include failed preview flag in queries --- .../edu/harvard/iq/dataverse/DatasetVersionServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 439e4b17ed4..0bd0a01aef1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -762,7 +762,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - // + "AND o.previewImageAvailable = false " + + "AND o.previewshavefailed = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype LIKE 'image/%' " @@ -796,7 +796,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - // + "AND o.previewImageAvailable = false " + + "AND o.previewshavefailed = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype = 'application/pdf' " From ac5a9564848ba241a993e8e9252641820e9041b4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 20 Jun 2023 10:22:59 -0400 Subject: [PATCH 028/546] use getThumbnailByVersionId --- .../iq/dataverse/ThumbnailServiceWrapper.java | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 19c53ffa77e..ff5e510e82c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -226,23 +226,20 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo DataFile thumbnailFile = dataset.getThumbnailFile(); if (thumbnailFile == null) { - thumbnailFile = DatasetUtil.attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null); - if (thumbnailFile == null) { - logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo available that could be selected automatically."); - return null; - } - } - if (thumbnailFile.isRestricted()) { - logger.fine("Dataset (id :" + dataset.getId() + ") has a logo the user selected but the file must have later been restricted. Returning null."); - return null; + + // We attempt to auto-select via the optimized, native query-based method + // from the DatasetVersionService: + if (datasetVersionService.getThumbnailByVersionId(versionId) == null) { + return null; + } } - String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; logger.fine("getDatasetCardImageAsBase64Url: " + url); this.dvobjectThumbnailsMap.put(datasetId,url); return url; + /* String cardImageUrl = null; StorageIO dataAccess = null; From 98acd6b50af770779329de1201663d8599edf16a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 20 Jun 2023 10:49:24 -0400 Subject: [PATCH 029/546] cleanup --- .../dataverse/dataaccess/ImageThumbConverter.java | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 3033269f3bc..458b8da227b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -196,7 +196,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s // We rely on ImageMagick to convert PDFs; so if it's not installed, // better give up right away: if (!isImageMagickInstalled()) { - logger.info("Couldn't find IM"); + logger.fine("Couldn't find ImageMagick"); return false; } @@ -220,19 +220,15 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s } catch (IOException ioex) { logger.warning(ioex.getMessage()); - ioex.printStackTrace(); // this on the other hand is likely a fatal condition :( return false; } if (tempFilesRequired) { - //ReadableByteChannel pdfFileChannel; - logger.info("Creating temp file"); InputStream inputStream = null; try { storageIO.open(); inputStream = storageIO.getInputStream(); - //pdfFileChannel = storageIO.getReadChannel(); } catch (Exception ioex) { logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier()); return false; @@ -240,14 +236,11 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s File tempFile; OutputStream outputStream = null; - //FileChannel tempFileChannel = null; try { tempFile = File.createTempFile("tempFileToRescale", ".tmp"); outputStream = new FileOutputStream(tempFile); - long sz = inputStream.transferTo(outputStream); - logger.info("Wrote " + sz + " bytes to " + tempFile.getAbsolutePath()); - - //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); + //Reads/transfers all bytes from the input stream to the output stream. + inputStream.transferTo(outputStream); } catch (IOException ioex) { logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file."); return false; From 610c65dc9ddd403041ee95475810db2977e57623 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jun 2023 12:56:13 -0400 Subject: [PATCH 030/546] rename and cleanup --- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../iq/dataverse/DataverseServiceBean.java | 45 ------- .../iq/dataverse/ThumbnailServiceWrapper.java | 117 +----------------- .../search/SearchIncludeFragment.java | 2 +- 4 files changed, 6 insertions(+), 160 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 3d608153ba3..2ca1fb825f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -483,7 +483,7 @@ public String getThumbnailString() { thumbnailString = datasetThumbnail.getBase64image(); } else { - thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(dataset, + thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsUrl(dataset, workingVersion.getId(), !workingVersion.isDraft(), ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index e092f209acd..e99458fbc9d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -346,51 +346,6 @@ public String getDataverseLogoThumbnailAsBase64ById(Long dvId) { } return null; } - - /* - public boolean isDataverseLogoThumbnailAvailable(Dataverse dataverse, User user) { - if (dataverse == null) { - return false; - } - - // First, check if the dataverse has a defined logo: - - //if (dataverse.getDataverseTheme() != null && dataverse.getDataverseTheme().getLogo() != null && !dataverse.getDataverseTheme().getLogo().equals("")) { - File dataverseLogoFile = getLogo(dataverse); - if (dataverseLogoFile != null) { - String logoThumbNailPath = null; - - if (dataverseLogoFile.exists()) { - logoThumbNailPath = ImageThumbConverter.generateImageThumbnailFromFile(dataverseLogoFile.getAbsolutePath(), 48); - if (logoThumbNailPath != null) { - return true; - } - } - } - //} - */ - // If there's no uploaded logo for this dataverse, go through its - // [released] datasets and see if any of them have card images: - // - // TODO: - // Discuss/Decide if we really want to do this - i.e., go through every - // file in every dataset below... - // -- L.A. 4.0 beta14 - /* - for (Dataset dataset : datasetService.findPublishedByOwnerId(dataverse.getId())) { - if (dataset != null) { - DatasetVersion releasedVersion = dataset.getReleasedVersion(); - - if (releasedVersion != null) { - if (datasetService.isDatasetCardImageAvailable(releasedVersion, user)) { - return true; - } - } - } - } */ - /* - return false; - } */ private File getLogo(Dataverse dataverse) { if (dataverse.getId() == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index ff5e510e82c..c75c29ea094 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -177,7 +177,7 @@ public boolean isThumbnailAvailable(DataFile entity) { // it's the responsibility of the user - to make sure the search result // passed to this method is of the Dataset type! - public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { + public String getDatasetCardImageAsUrl(SolrSearchResult result) { // Before we do anything else, check if it's a harvested dataset; // no need to check anything else if so (harvested datasets never have // thumbnails) @@ -199,10 +199,10 @@ public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { Long versionId = result.getDatasetVersionId(); - return getDatasetCardImageAsBase64Url(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); + return getDatasetCardImageAsUrl(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); } - public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, boolean autoselect, int size) { + public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean autoselect, int size) { Long datasetId = dataset.getId(); if (datasetId != null) { if (this.dvobjectThumbnailsMap.containsKey(datasetId)) { @@ -235,118 +235,9 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo } String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; - logger.fine("getDatasetCardImageAsBase64Url: " + url); + logger.fine("getDatasetCardImageAsUrl: " + url); this.dvobjectThumbnailsMap.put(datasetId,url); return url; - - -/* - String cardImageUrl = null; - StorageIO dataAccess = null; - - try{ - dataAccess = DataAccess.getStorageIO(dataset); - } - catch(IOException ioex){ - // ignore - } - - InputStream in = null; - // See if the dataset already has a dedicated thumbnail ("logo") saved as - // an auxilary file on the dataset level: - // (don't bother checking if it exists; just try to open the input stream) - try { - in = dataAccess.getAuxFileAsInputStream(datasetLogoThumbnail + ".thumb" + size); - //thumb48addedByImageThumbConverter); - } catch (Exception ioex) { - //ignore - } - - if (in != null) { - try { - byte[] bytes = IOUtils.toByteArray(in); - String base64image = Base64.getEncoder().encodeToString(bytes); - cardImageUrl = FileUtil.DATA_URI_SCHEME + base64image; - this.dvobjectThumbnailsMap.put(datasetId, cardImageUrl); - return cardImageUrl; - } catch (IOException ex) { - this.dvobjectThumbnailsMap.put(datasetId, ""); - return null; - // (alternatively, we could ignore the exception, and proceed with the - // regular process of selecting the thumbnail from the available - // image files - ?) - } finally - { - IOUtils.closeQuietly(in); - } - } - - // If not, see if the dataset has one of its image files already assigned - // to be the designated thumbnail: - cardImageUrl = this.getAssignedDatasetImage(dataset, size); - - if (cardImageUrl != null) { - //logger.info("dataset id " + result.getEntity().getId() + " has a dedicated image assigned; returning " + cardImageUrl); - return cardImageUrl; - } - - // And finally, try to auto-select the thumbnail (unless instructed not to): - - if (!autoselect) { - return null; - } - - // We attempt to auto-select via the optimized, native query-based method - // from the DatasetVersionService: - Long thumbnailImageFileId = datasetVersionService.getThumbnailByVersionId(versionId); - - if (thumbnailImageFileId != null) { - //cardImageUrl = FILE_CARD_IMAGE_URL + thumbnailImageFileId; - if (this.dvobjectThumbnailsMap.containsKey(thumbnailImageFileId)) { - // Yes, return previous answer - //logger.info("using cached result for ... "+datasetId); - if (!"".equals(this.dvobjectThumbnailsMap.get(thumbnailImageFileId))) { - return this.dvobjectThumbnailsMap.get(thumbnailImageFileId); - } - return null; - } - - DataFile thumbnailImageFile = null; - - if (dvobjectViewMap.containsKey(thumbnailImageFileId) - && dvobjectViewMap.get(thumbnailImageFileId).isInstanceofDataFile()) { - thumbnailImageFile = (DataFile) dvobjectViewMap.get(thumbnailImageFileId); - } else { - thumbnailImageFile = dataFileService.findCheapAndEasy(thumbnailImageFileId); - if (thumbnailImageFile != null) { - // TODO: - // do we need this file on the map? - it may not even produce - // a thumbnail! - dvobjectViewMap.put(thumbnailImageFileId, thumbnailImageFile); - } else { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, ""); - return null; - } - } - - if (isThumbnailAvailable(thumbnailImageFile)) { - cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( - thumbnailImageFile, - size); - //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - } - - if (cardImageUrl != null) { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, cardImageUrl); - } else { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, ""); - } - } - - //logger.info("dataset id " + result.getEntityId() + ", returning " + cardImageUrl); - - return cardImageUrl; - */ } // it's the responsibility of the user - to make sure the search result diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index bfe397cf48c..99fe4cd979b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -1302,7 +1302,7 @@ public void setDisplayCardValues() { result.setImageUrl(thumbnailServiceWrapper.getDataverseCardImageAsBase64Url(result)); } else if (result.getType().equals("datasets")) { if (result.getEntity() != null) { - result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(result)); + result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsUrl(result)); } if (result.isHarvested()) { From 391504de43d8992e4b97d506fdfc763e512a8fc4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jun 2023 13:46:35 -0400 Subject: [PATCH 031/546] api docs --- doc/sphinx-guides/source/api/native-api.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index b39cf91337a..24f6c0d4ced 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -4649,3 +4649,23 @@ A curl example using an ``ID`` curl -X POST -H 'Content-Type:application/json' -d "$JSON" $SERVER_URL/api/admin/feedback Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`. + +.. _thumbnail_reset: + +Reset Thumbnail Failure Flags +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If Dataverse attempts to create a thumbnail image for an image or pdf file and the attempt fails, Dataverse will set a flag for the file to avoid repeated attempts to generate the thumbnail. +For cases where the problem may have been temporary (or fixed in a later Dataverse release), two API calls exist to reset this flag for all files or for a given file. + +Curl examples + +.. code-block:: bash + + export SERVER_URL=http://localhost + export fileID=1234 + + curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag + + curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag/$fileID + From de7963a0635646f6c00e1362fc87152029394839 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 21 Jun 2023 13:53:30 -0400 Subject: [PATCH 032/546] refactor typo --- .../iq/dataverse/dataaccess/ImageThumbConverter.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 458b8da227b..febf659b71a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -114,11 +114,11 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s logger.fine("Found cached thumbnail for " + file.getId()); return true; } - return generateThumbnail(storageIO, size); + return generateThumbnail(file, storageIO, size); } - private static boolean generateThumbnail(StorageIO storageIO, int size) { + private static boolean generateThumbnail(DataFile file, StorageIO storageIO, int size) { logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); // Don't try to generate if there have been failures: if (!file.isPreviewsHaveFailed()) { @@ -449,7 +449,7 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size); // try to generate, if not available and hasn't failed before - if(generateThumbnail(storageIO, size)) { + if(generateThumbnail(file, storageIO, size)) { try { cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); } catch (Exception ioEx) { From e73806a6907ec630d7b2389abda632727821f48e Mon Sep 17 00:00:00 2001 From: lubitchv Date: Thu, 27 Jul 2023 17:25:40 -0400 Subject: [PATCH 033/546] increase universe --- .../db/migration/V5.13.0.3__9728-universe-variablemetadata.sql | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql diff --git a/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql b/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql new file mode 100644 index 00000000000..8e311c06b32 --- /dev/null +++ b/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql @@ -0,0 +1,2 @@ +-- increase field universe from 255 to text +ALTER TABLE variablemetadata ALTER COLUMN universe TYPE text; From 495594a2ed039b52951b7f1298426436b64a00f4 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 28 Jul 2023 10:50:22 -0400 Subject: [PATCH 034/546] column text --- .../edu/harvard/iq/dataverse/datavariable/VariableMetadata.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java index c18355c9979..08fcd14e0e6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java @@ -71,6 +71,7 @@ public class VariableMetadata implements Serializable { /** * universe: metadata variable field. */ + @Column(columnDefinition="TEXT") private String universe; /** From be56f48f469ce319c1e3cacc4e14e5bbb9c0ecb9 Mon Sep 17 00:00:00 2001 From: lubitchv Date: Fri, 28 Jul 2023 11:36:23 -0400 Subject: [PATCH 035/546] release note --- doc/release-notes/9728-universe-variablemetadata.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/9728-universe-variablemetadata.md diff --git a/doc/release-notes/9728-universe-variablemetadata.md b/doc/release-notes/9728-universe-variablemetadata.md new file mode 100644 index 00000000000..66a2daf151b --- /dev/null +++ b/doc/release-notes/9728-universe-variablemetadata.md @@ -0,0 +1 @@ +universe field in variablemetadata table was changed from varchar(255) to text. The change was made to support longer strings in "universe" metadata field, similar to the rest of text fields in variablemetadata table. From 36d26d4b0ef9185869a006d78ca3be371dc19112 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 2 Aug 2023 11:52:39 -0400 Subject: [PATCH 036/546] update test cred --- .../harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 0d7c5458e14..081c5a622aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -691,7 +691,7 @@ public static void main(String[] args) { } System.setProperty("dataverse.files.globus.base-uri", "2791b83e-b989-47c5-a7fa-ce65fd949522"); System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); - System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOlprRmxGejNTWDlkTVpUNk92ZmVJaFQyTWY0SDd4cXBoTDNSS29vUmRGVlE9"); + System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOmtsa1RZc242bU1oRXNuUFFwQy9oSzQxSi9EMDV6SjRtUDd1c0ZiN011MEk9"); System.setProperty("dataverse.files.globus.base-store","file"); System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); From 4b755b50bfbe729570dde943c1809ef80b3b840f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 18 Aug 2023 17:25:52 -0400 Subject: [PATCH 037/546] setting is GlobusAppUrl not ...URL --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 8493702406b..a5579c82c6d 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3926,7 +3926,7 @@ GlobusEndpoint is Globus endpoint id used with Globus integration. See :ref:`glo A comma-separated list of the S3 stores that are configured to support Globus integration. See :ref:`globus-support` for details. -:GlobusAppURL +:GlobusAppUrl +++++++++++++ The URL where the `dataverse-globus `_ "transfer" app has been deployed to support Globus integration. See :ref:`globus-support` for details. From 4e6d948d712da42862b9f429d8ef65086a71baab Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 25 Aug 2023 10:00:30 -0400 Subject: [PATCH 038/546] remove req. that app and DV are on same host, note future todo --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 11 +++-------- .../iq/dataverse/globus/GlobusServiceBean.java | 2 ++ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index dcd7eacf50b..b8165f0314f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3329,8 +3329,7 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam(" public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @FormDataParam("jsonData") String jsonData, - @Context UriInfo uriInfo, - @Context HttpHeaders headers + @Context UriInfo uriInfo ) throws IOException, ExecutionException, InterruptedException { logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); @@ -3390,12 +3389,8 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } - String requestUrl = headers.getRequestHeader("origin").get(0); - - if(requestUrl.contains("localhost")){ - requestUrl = "http://localhost:8080"; - } - + String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); + // Async Call globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index c2137dd1f47..5c387710844 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -737,6 +737,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; System.out.println("*******====command ==== " + command); + //ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of calling API + String output = addFilesAsync(command, globusLogger); if (output.equalsIgnoreCase("ok")) { // if(!taskSkippedFiles) From b5e47b98a08f25c1160fc651b84bc1fbefe3dfa4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 29 Aug 2023 12:52:33 -0400 Subject: [PATCH 039/546] fix retrieveSize parsing, refactoring --- .../dataaccess/GlobusOverlayAccessIO.java | 169 ++++++------------ .../dataaccess/RemoteOverlayAccessIO.java | 82 ++++----- 2 files changed, 93 insertions(+), 158 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 081c5a622aa..6a22f8b68f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -4,14 +4,12 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -56,7 +54,6 @@ /** * @author qqmyers - * @param what it stores */ /* * Globus Overlay Driver @@ -64,14 +61,13 @@ * StorageIdentifier format: :///// */ -public class GlobusOverlayAccessIO extends StorageIO { +public class GlobusOverlayAccessIO extends RemoteOverlayAccessIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); private StorageIO baseStore = null; private String path = null; private String endpointWithBasePath = null; - private String globusToken = null; private static HttpClientContext localContext = HttpClientContext.create(); private PoolingHttpClientConnectionManager cm = null; @@ -117,103 +113,37 @@ private void validatePath(String relPath) throws IOException { } } - @Override - public void open(DataAccessOption... options) throws IOException { - - baseStore.open(options); - - DataAccessRequest req = this.getRequest(); - - if (isWriteAccessRequested(options)) { - isWriteAccess = true; - isReadAccess = false; - } else { - isWriteAccess = false; - isReadAccess = true; - } - - if (dvObject instanceof DataFile) { - String storageIdentifier = dvObject.getStorageIdentifier(); - - DataFile dataFile = this.getDataFile(); - - if (req != null && req.getParameter("noVarHeader") != null) { - baseStore.setNoVarHeader(true); - } - - if (storageIdentifier == null || "".equals(storageIdentifier)) { - throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); - } - - // Fix new DataFiles: DataFiles that have not yet been saved may use this method - // when they don't have their storageidentifier in the final form - // So we fix it up here. ToDo: refactor so that storageidentifier is generated - // by the appropriate StorageIO class and is final from the start. - logger.fine("StorageIdentifier is: " + storageIdentifier); - - if (isReadAccess) { - if (dataFile.getFilesize() >= 0) { - this.setSize(dataFile.getFilesize()); - } else { - logger.fine("Setting size"); - this.setSize(getSizeFromGlobus()); - } - if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") - && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { - - List datavariables = dataFile.getDataTable().getDataVariables(); - String varHeaderLine = generateVariableHeader(datavariables); - this.setVarHeader(varHeaderLine); - } - - } - - this.setMimeType(dataFile.getContentType()); - - try { - this.setFileName(dataFile.getFileMetadata().getLabel()); - } catch (Exception ex) { - this.setFileName("unknown"); - } - } else if (dvObject instanceof Dataset) { - throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); - } else if (dvObject instanceof Dataverse) { - throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); - } else { - this.setSize(getSizeFromGlobus()); - } - } - // Call the Globus API to get the file size - private long getSizeFromGlobus() { + @Override + long retrieveSize() { // Construct Globus URL URI absoluteURI = null; try { int filenameStart = path.lastIndexOf("/") + 1; int pathStart = endpointWithBasePath.indexOf("/"); -logger.info("endpointWithBasePath: " + endpointWithBasePath); + logger.info("endpointWithBasePath: " + endpointWithBasePath); String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") + path.substring(0, filenameStart); logger.info("directoryPath: " + directoryPath); String filename = path.substring(filenameStart); String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath; - absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + directoryPath + "&filter=name:" + filename); + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + + "/ls?path=" + directoryPath + "&filter=name:" + filename); HttpGet get = new HttpGet(absoluteURI); - + logger.info("Token is " + globusAccessToken); get.addHeader("Authorization", "Bearer " + globusAccessToken); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); if (response.getStatusLine().getStatusCode() == 200) { - //Get reponse as string + // Get reponse as string String responseString = EntityUtils.toString(response.getEntity()); logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); - return (long) responseJson.getInt("size"); + return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size"); } else { - logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); + logger.warning("Response from " + get.getURI().toString() + " was " + + response.getStatusLine().getStatusCode()); logger.info(EntityUtils.toString(response.getEntity())); } } catch (URISyntaxException e) { @@ -227,24 +157,6 @@ private long getSizeFromGlobus() { e.printStackTrace(); } return -1; - - /* - * long size = -1; HttpHead head = new HttpHead(endpointWithBasePath + "/" + - * path); try { CloseableHttpResponse response = - * getSharedHttpClient().execute(head, localContext); - * - * try { int code = response.getStatusLine().getStatusCode(); - * logger.fine("Response for HEAD: " + code); switch (code) { case 200: Header[] - * headers = response.getHeaders(HTTP.CONTENT_LEN); logger.fine("Num headers: " - * + headers.length); String sizeString = - * response.getHeaders(HTTP.CONTENT_LEN)[0].getValue(); - * logger.fine("Content-Length: " + sizeString); size = - * Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue()); - * logger.fine("Found file size: " + size); break; default: - * logger.warning("Response from " + head.getURI().toString() + " was " + code); - * } } finally { EntityUtils.consume(response.getEntity()); } } catch - * (IOException e) { logger.warning(e.getMessage()); } return size; - */ } @Override @@ -417,7 +329,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { @Override public boolean exists() { logger.fine("Exists called"); - return (getSizeFromGlobus() != -1); + return (retrieveSize() != -1); } @Override @@ -485,9 +397,12 @@ int getUrlExpirationMinutes() { } private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - AccessToken accessToken = GlobusServiceBean.getClientToken(JvmSettings.GLOBUS_TOKEN.lookup(driverId)); + // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); + String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken(); - endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); + // endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); + endpointWithBasePath = System.getProperty("dataverse.files." + this.driverId + ".base-uri"); logger.info("base-uri is " + endpointWithBasePath); if (endpointWithBasePath == null) { throw new IOException("dataverse.files." + this.driverId + ".base-uri is required"); @@ -527,7 +442,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " + logger.warning("Not Implemented: GlobusOverlay store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); throw new IOException("Not implemented"); } @@ -554,7 +469,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " + logger.warning("Not Implemented: GlobusOverlay store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); throw new IOException("Not implemented"); } @@ -640,21 +555,21 @@ private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementExcept @Override public void savePath(Path fileSystemPath) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); + "GlobusOverlayAccessIO: savePath() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + "GlobusOverlayAccessIO: saveInputStream() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + "GlobusOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); } @@ -689,24 +604,42 @@ public static void main(String[] args) { if (args.length > 0) { System.out.printf("List of arguments: {}", Arrays.toString(args)); } - System.setProperty("dataverse.files.globus.base-uri", "2791b83e-b989-47c5-a7fa-ce65fd949522"); + // System.setProperty("dataverse.files.globus.globus_client_id", + // "2791b83e-b989-47c5-a7fa-ce65fd949522"); + System.setProperty("dataverse.files.globus.base-uri", "d8c42580-6528-4605-9ad8-116a61982644"); System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); - System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOmtsa1RZc242bU1oRXNuUFFwQy9oSzQxSi9EMDV6SjRtUDd1c0ZiN011MEk9"); - System.setProperty("dataverse.files.globus.base-store","file"); - System.setProperty("dataverse.files.file.type", - DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + // System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOkt4ZEdndFVDUDVZZG5sRG4rRHEzaVMxTHBtTVRGNlB3RjlwWm9kRTBWNVE9"); + System.setProperty("dataverse.files.globus.globus-token", + "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9"); + System.setProperty("dataverse.files.globus.base-store", "file"); + System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.file.directory", "/tmp/files"); logger.info(JvmSettings.BASE_URI.lookup("globus")); - - - + logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); + try { - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO("globus://1234///hdc1/image001.mrc", "globus"); - logger.info("Size is " + gsio.getSizeFromGlobus()); - + GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO( + "globus://1234///hdc1/image001.mrc", "globus"); + logger.info("Size is " + gsio.retrieveSize()); + } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } + try { + DataFile df = new DataFile(); + Dataset ds = new Dataset(); + ds.setAuthority("10.5072"); + ds.setIdentifier("FK21234"); + df.setOwner(ds); + df.setStorageIdentifier("globus://1234///hdc1/image001.mrc"); + GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO(df, null, "globus"); + logger.info("Size2 is " + gsio.retrieveSize()); + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index ee2b6779cba..710d7a38fb4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -85,7 +85,7 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); validatePath(path); - + logger.fine("Base URL: " + path); } @@ -98,18 +98,17 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE validatePath(path); logger.fine("Base URL: " + path); } - + private void validatePath(String relPath) throws IOException { try { URI absoluteURI = new URI(baseUrl + "/" + relPath); - if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); } - } - + } @Override public void open(DataAccessOption... options) throws IOException { @@ -150,7 +149,7 @@ public void open(DataAccessOption... options) throws IOException { this.setSize(dataFile.getFilesize()); } else { logger.fine("Setting size"); - this.setSize(getSizeFromHttpHeader()); + this.setSize(retrieveSize()); } if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { @@ -171,16 +170,14 @@ public void open(DataAccessOption... options) throws IOException { } } else if (dvObject instanceof Dataset) { throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); } else if (dvObject instanceof Dataverse) { throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); - } else { - this.setSize(getSizeFromHttpHeader()); + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); } } - private long getSizeFromHttpHeader() { + long retrieveSize() { long size = -1; HttpHead head = new HttpHead(baseUrl + "/" + path); try { @@ -356,8 +353,9 @@ public String getStorageLocation() throws IOException { String fullStorageLocation = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStorageLocation); int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + if (driverIndex >= 0) { + fullStorageLocation = fullStorageLocation + .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); } if (this.getDvObject() instanceof Dataset) { throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); @@ -379,7 +377,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { @Override public boolean exists() { logger.fine("Exists called"); - return (getSizeFromHttpHeader() != -1); + return (retrieveSize() != -1); } @Override @@ -407,7 +405,7 @@ public boolean downloadRedirectEnabled() { } return false; } - + public boolean downloadRedirectEnabled(String auxObjectTag) { return baseStore.downloadRedirectEnabled(auxObjectTag); } @@ -422,8 +420,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary if (secretKey == null) { return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", - secretKey); + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); @@ -464,9 +461,10 @@ private void configureStores(DataAccessRequest req, String driverId, String stor if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if(dvObject instanceof Dataset) { + String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if (dvObject instanceof Dataset) { baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); } else { if (this.getDvObject() != null) { @@ -481,8 +479,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + + "/" + fullStorageLocation; break; default: logger.warning("Not Implemented: RemoteOverlay store with base store type: " @@ -492,12 +490,12 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } else if (storageLocation != null) { // ://// - //remoteDriverId:// is removed if coming through directStorageIO + // remoteDriverId:// is removed if coming through directStorageIO int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if(index > 0) { + if (index > 0) { storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); } - //THe base store needs the baseStoreIdentifier and not the relative URL + // THe base store needs the baseStoreIdentifier and not the relative URL fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); switch (baseDriverType) { @@ -508,8 +506,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor break; case DataAccess.FILE: fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; + + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + + "/" + fullStorageLocation; break; default: logger.warning("Not Implemented: RemoteOverlay store with base store type: " @@ -525,37 +523,41 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); try { - remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); - } catch(MalformedURLException mfue) { + remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); + } catch (MalformedURLException mfue) { logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); } } - //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. + // Convenience method to assemble the path, starting with the DOI + // authority/identifier/, that is needed to create a base store via + // DataAccess.getDirectStorageIO - the caller has to add the store type specific + // prefix required. private String getStoragePath() throws IOException { String fullStoragePath = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStoragePath); int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + if (driverIndex >= 0) { + fullStoragePath = fullStoragePath + .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); } int suffixIndex = fullStoragePath.indexOf("//"); - if(suffixIndex >=0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); + if (suffixIndex >= 0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); } if (this.getDvObject() instanceof Dataset) { fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; } else if (this.getDvObject() instanceof DataFile) { fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - }else if (dvObject instanceof Dataverse) { + + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (dvObject instanceof Dataverse) { throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); } logger.fine("fullStoragePath: " + fullStoragePath); return fullStoragePath; } - + public CloseableHttpClient getSharedHttpClient() { if (httpclient == null) { try { @@ -617,11 +619,11 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); try { URI absoluteURI = new URI(baseUrl + "/" + urlPath); - if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); return false; } From cec0b519948d8ba480f49f915dabd5f31e5c5082 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 29 Aug 2023 12:52:48 -0400 Subject: [PATCH 040/546] add globus type --- .../java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index d046fa4661d..f2eb0236df4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -48,6 +48,7 @@ public DataAccess() { public static final String S3 = "s3"; static final String SWIFT = "swift"; static final String REMOTE = "remote"; + static final String GLOBUS = "globus"; static final String TMP = "tmp"; public static final String SEPARATOR = "://"; //Default to "file" is for tests only @@ -98,6 +99,8 @@ protected static StorageIO getStorageIO(T dvObject, Data return new SwiftAccessIO<>(dvObject, req, storageDriverId); case REMOTE: return new RemoteOverlayAccessIO<>(dvObject, req, storageDriverId); + case GLOBUS: + return new GlobusOverlayAccessIO<>(dvObject, req, storageDriverId); case TMP: throw new IOException( "DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); @@ -369,6 +372,8 @@ public static boolean isValidDirectStorageIdentifier(String storageId) { return S3AccessIO.isValidIdentifier(driverId, storageId); case REMOTE: return RemoteOverlayAccessIO.isValidIdentifier(driverId, storageId); + case GLOBUS: + return GlobusOverlayAccessIO.isValidIdentifier(driverId, storageId); default: logger.warning("Request to validate for storage driver: " + driverId); } From 555bf05af241c555300f5c528656de3d10b3c584 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 30 Aug 2023 10:07:49 -0400 Subject: [PATCH 041/546] refactoring g store as a remotestore --- .../dataaccess/GlobusOverlayAccessIO.java | 370 +++--------------- .../dataaccess/RemoteOverlayAccessIO.java | 46 +-- .../iq/dataverse/settings/JvmSettings.java | 2 +- 3 files changed, 80 insertions(+), 338 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 6a22f8b68f3..16345cd1f9c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -58,28 +58,26 @@ /* * Globus Overlay Driver * - * StorageIdentifier format: :///// + * Remote: + * StorageIdentifier format: ://// + * Storage location: / + * Internal + * StorageIdentifier format: ://// + * Storage location: /// + * + * baseUrl: globus:// + */ public class GlobusOverlayAccessIO extends RemoteOverlayAccessIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); - private StorageIO baseStore = null; - private String path = null; - private String endpointWithBasePath = null; - - private static HttpClientContext localContext = HttpClientContext.create(); - private PoolingHttpClientConnectionManager cm = null; - CloseableHttpClient httpclient = null; - private int timeout = 1200; - private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) - .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); - private static boolean trustCerts = false; - private int httpConcurrency = 4; private String globusAccessToken = null; + /* + * If this is set to true, the store supports Globus transfer in and Dataverse/the globus app manage file locations, access controls, deletion, etc. + */ + private boolean isDataverseManaged = false; public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); @@ -104,9 +102,9 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE private void validatePath(String relPath) throws IOException { try { - URI absoluteURI = new URI(endpointWithBasePath + "/" + relPath); - if (!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) { - throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s endpoint/basePath"); + URI absoluteURI = new URI(baseUrl + "/" + relPath); + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { + throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); } } catch (URISyntaxException use) { throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); @@ -120,13 +118,24 @@ long retrieveSize() { URI absoluteURI = null; try { int filenameStart = path.lastIndexOf("/") + 1; + String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); int pathStart = endpointWithBasePath.indexOf("/"); logger.info("endpointWithBasePath: " + endpointWithBasePath); - String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "") - + path.substring(0, filenameStart); + String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart+1) : ""); logger.info("directoryPath: " + directoryPath); + + if(isDataverseManaged) { + Dataset ds = ((DataFile) dvObject).getOwner(); + directoryPath = directoryPath + "/" + ds.getAuthority() + "/" + ds.getIdentifier(); + logger.info("directoryPath now: " + directoryPath); + + } + if(filenameStart > 0) { + directoryPath = directoryPath + path.substring(0, filenameStart); + } + logger.info("directoryPath finally: " + directoryPath); String filename = path.substring(filenameStart); - String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath; + String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + directoryPath + "&filter=name:" + filename); @@ -138,7 +147,7 @@ long retrieveSize() { if (response.getStatusLine().getStatusCode() == 200) { // Get reponse as string String responseString = EntityUtils.toString(response.getEntity()); - logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); + logger.info("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size"); } else { @@ -159,63 +168,26 @@ long retrieveSize() { return -1; } - @Override - public InputStream getInputStream() throws IOException { - if (super.getInputStream() == null) { - try { - HttpGet get = new HttpGet(generateTemporaryDownloadUrl(null, null, null)); - CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); - int code = response.getStatusLine().getStatusCode(); - switch (code) { - case 200: - setInputStream(response.getEntity().getContent()); - break; - default: - logger.warning("Response from " + get.getURI().toString() + " was " + code); - throw new IOException("Cannot retrieve: " + endpointWithBasePath + "/" + path + " code: " + code); - } - } catch (Exception e) { - logger.warning(e.getMessage()); - e.printStackTrace(); - throw new IOException("Error retrieving: " + endpointWithBasePath + "/" + path + " " + e.getMessage()); - } - setChannel(Channels.newChannel(super.getInputStream())); - } - return super.getInputStream(); - } - - @Override - public Channel getChannel() throws IOException { - if (super.getChannel() == null) { - getInputStream(); - } - return channel; - } - - @Override - public ReadableByteChannel getReadChannel() throws IOException { - // Make sure StorageIO.channel variable exists - getChannel(); - return super.getReadChannel(); - } @Override public void delete() throws IOException { + +// Fix // Delete is best-effort - we tell the remote server and it may or may not // implement this call if (!isDirectAccess()) { throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); } try { - HttpDelete del = new HttpDelete(endpointWithBasePath + "/" + path); + HttpDelete del = new HttpDelete(baseUrl + "/" + path); CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); try { int code = response.getStatusLine().getStatusCode(); switch (code) { case 200: - logger.fine("Sent DELETE for " + endpointWithBasePath + "/" + path); + logger.fine("Sent DELETE for " + baseUrl + "/" + path); default: logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); } @@ -224,7 +196,7 @@ public void delete() throws IOException { } } catch (Exception e) { logger.warning(e.getMessage()); - throw new IOException("Error deleting: " + endpointWithBasePath + "/" + path); + throw new IOException("Error deleting: " + baseUrl + "/" + path); } @@ -233,146 +205,20 @@ public void delete() throws IOException { } - @Override - public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { - return baseStore.openAuxChannel(auxItemTag, options); - } - - @Override - public boolean isAuxObjectCached(String auxItemTag) throws IOException { - return baseStore.isAuxObjectCached(auxItemTag); - } - - @Override - public long getAuxObjectSize(String auxItemTag) throws IOException { - return baseStore.getAuxObjectSize(auxItemTag); - } - - @Override - public Path getAuxObjectAsPath(String auxItemTag) throws IOException { - return baseStore.getAuxObjectAsPath(auxItemTag); - } - - @Override - public void backupAsAux(String auxItemTag) throws IOException { - baseStore.backupAsAux(auxItemTag); - } - - @Override - public void revertBackupAsAux(String auxItemTag) throws IOException { - baseStore.revertBackupAsAux(auxItemTag); - } - - @Override - // this method copies a local filesystem Path into this DataAccess Auxiliary - // location: - public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { - baseStore.savePathAsAux(fileSystemPath, auxItemTag); - } - - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); - } - - /** - * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") - * @throws IOException if anything goes wrong. - */ - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag); - } - - @Override - public List listAuxObjects() throws IOException { - return baseStore.listAuxObjects(); - } - - @Override - public void deleteAuxObject(String auxItemTag) throws IOException { - baseStore.deleteAuxObject(auxItemTag); - } - - @Override - public void deleteAllAuxObjects() throws IOException { - baseStore.deleteAllAuxObjects(); - } - - @Override - public String getStorageLocation() throws IOException { - String fullStorageLocation = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStorageLocation); - int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if (driverIndex >= 0) { - fullStorageLocation = fullStorageLocation - .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - if (this.getDvObject() instanceof Dataset) { - throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); - } else if (this.getDvObject() instanceof DataFile) { - fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStorageLocation: " + fullStorageLocation); - return fullStorageLocation; - } - - @Override - public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { - throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); - } - - @Override - public boolean exists() { - logger.fine("Exists called"); - return (retrieveSize() != -1); - } - @Override - public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { - throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: there are no write Channels associated with S3 objects."); - } - @Override - public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException { - throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); - } - - @Override - public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { - return baseStore.getAuxFileAsInputStream(auxItemTag); - } - - @Override - public boolean downloadRedirectEnabled() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); - if ("true".equalsIgnoreCase(optionValue)) { - return true; - } - return false; - } - - public boolean downloadRedirectEnabled(String auxObjectTag) { - return baseStore.downloadRedirectEnabled(auxObjectTag); - } @Override public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { - +//Fix // ToDo - support remote auxiliary Files if (auxiliaryTag == null) { String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); if (secretKey == null) { - return endpointWithBasePath + "/" + path; + return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(endpointWithBasePath + "/" + path, getUrlExpirationMinutes(), null, "GET", + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { @@ -380,35 +226,21 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary } } - int getUrlExpirationMinutes() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); - if (optionValue != null) { - Integer num; - try { - num = Integer.parseInt(optionValue); - } catch (NumberFormatException ex) { - num = null; - } - if (num != null) { - return num; - } - } - return 60; - } - private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); + isDataverseManaged = Boolean.getBoolean("dataverse.files." + this.driverId + ".managed"); + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken(); // endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); - endpointWithBasePath = System.getProperty("dataverse.files." + this.driverId + ".base-uri"); - logger.info("base-uri is " + endpointWithBasePath); - if (endpointWithBasePath == null) { - throw new IOException("dataverse.files." + this.driverId + ".base-uri is required"); + baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); + logger.info("base-url is " + baseUrl); + if (baseUrl == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); } else { try { - new URI(endpointWithBasePath); + new URI(baseUrl); } catch (Exception e) { logger.warning( "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); @@ -442,9 +274,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: GlobusOverlay store with base store type: " + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); + throw new IOException("Not supported"); } } else if (storageLocation != null) { @@ -469,9 +301,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: GlobusOverlay store with base store type: " + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); + throw new IOException("Not supported"); } } baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); @@ -488,97 +320,13 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } } - // Convenience method to assemble the path, starting with the DOI - // authority/identifier/, that is needed to create a base store via - // DataAccess.getDirectStorageIO - the caller has to add the store type specific - // prefix required. - private String getStoragePath() throws IOException { - String fullStoragePath = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStoragePath); - int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if (driverIndex >= 0) { - fullStoragePath = fullStoragePath - .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - int suffixIndex = fullStoragePath.indexOf("//"); - if (suffixIndex >= 0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); - } - if (this.getDvObject() instanceof Dataset) { - fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" - + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (this.getDvObject() instanceof DataFile) { - fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStoragePath: " + fullStoragePath); - return fullStoragePath; - } - - public CloseableHttpClient getSharedHttpClient() { - if (httpclient == null) { - try { - initHttpPool(); - httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); - - } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { - logger.warning(ex.getMessage()); - } - } - return httpclient; - } - - private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { - if (trustCerts) { - // use the TrustSelfSignedStrategy to allow Self Signed Certificates - SSLContext sslContext; - SSLConnectionSocketFactory connectionFactory; - - sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); - // create an SSL Socket Factory to use the SSLContext with the trust self signed - // certificate strategy - // and allow all hosts verifier. - connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); - - Registry registry = RegistryBuilder.create() - .register("https", connectionFactory).build(); - cm = new PoolingHttpClientConnectionManager(registry); - } else { - cm = new PoolingHttpClientConnectionManager(); - } - cm.setDefaultMaxPerRoute(httpConcurrency); - cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); - } - - @Override - public void savePath(Path fileSystemPath) throws IOException { - throw new UnsupportedDataAccessOperationException( - "GlobusOverlayAccessIO: savePath() not implemented in this storage driver."); - - } - - @Override - public void saveInputStream(InputStream inputStream) throws IOException { - throw new UnsupportedDataAccessOperationException( - "GlobusOverlayAccessIO: saveInputStream() not implemented in this storage driver."); - - } - - @Override - public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { - throw new UnsupportedDataAccessOperationException( - "GlobusOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); - - } protected static boolean isValidIdentifier(String driverId, String storageId) { String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); - String baseUri = System.getProperty("dataverse.files." + driverId + ".base-uri"); + String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); try { - URI absoluteURI = new URI(baseUri + "/" + urlPath); - if (!absoluteURI.normalize().toString().startsWith(baseUri)) { + URI absoluteURI = new URI(baseUrl + "/" + urlPath); + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; } @@ -590,14 +338,6 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } - public static String getBaseStoreIdFor(String driverId) { - return System.getProperty("dataverse.files." + driverId + ".base-store"); - } - - @Override - public List cleanUp(Predicate filter, boolean dryRun) throws IOException { - return baseStore.cleanUp(filter, dryRun); - } public static void main(String[] args) { System.out.println("Running the main method"); @@ -606,7 +346,7 @@ public static void main(String[] args) { } // System.setProperty("dataverse.files.globus.globus_client_id", // "2791b83e-b989-47c5-a7fa-ce65fd949522"); - System.setProperty("dataverse.files.globus.base-uri", "d8c42580-6528-4605-9ad8-116a61982644"); + System.setProperty("dataverse.files.globus.base-url", "globus://d8c42580-6528-4605-9ad8-116a61982644"); System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); // System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOkt4ZEdndFVDUDVZZG5sRG4rRHEzaVMxTHBtTVRGNlB3RjlwWm9kRTBWNVE9"); System.setProperty("dataverse.files.globus.globus-token", @@ -614,7 +354,7 @@ public static void main(String[] args) { System.setProperty("dataverse.files.globus.base-store", "file"); System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.file.directory", "/tmp/files"); - logger.info(JvmSettings.BASE_URI.lookup("globus")); + logger.info(JvmSettings.BASE_URL.lookup("globus")); logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); try { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 710d7a38fb4..6b15bcf1dc8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -52,31 +52,32 @@ /** * @author qqmyers - * @param what it stores */ /* * Remote Overlay Driver * * StorageIdentifier format: - * ://// + * ://// + * + * baseUrl: http(s):// */ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); - private StorageIO baseStore = null; - private String path = null; - private String baseUrl = null; + protected StorageIO baseStore = null; + protected String path = null; + protected String baseUrl = null; - private static HttpClientContext localContext = HttpClientContext.create(); - private PoolingHttpClientConnectionManager cm = null; + protected static HttpClientContext localContext = HttpClientContext.create(); + protected PoolingHttpClientConnectionManager cm = null; CloseableHttpClient httpclient = null; - private int timeout = 1200; - private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + protected int timeout = 1200; + protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); - private static boolean trustCerts = false; - private int httpConcurrency = 4; + protected static boolean trustCerts = false; + protected int httpConcurrency = 4; public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); @@ -86,7 +87,7 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); validatePath(path); - logger.fine("Base URL: " + path); + logger.fine("Relative path: " + path); } public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { @@ -96,7 +97,7 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); validatePath(path); - logger.fine("Base URL: " + path); + logger.fine("Relative path: " + path); } private void validatePath(String relPath) throws IOException { @@ -420,7 +421,8 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary if (secretKey == null) { return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", + secretKey); } } else { return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); @@ -483,9 +485,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); + throw new IOException("Not supported"); } } else if (storageLocation != null) { @@ -510,9 +512,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor + "/" + fullStorageLocation; break; default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); + throw new IOException("Not supported"); } } baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); @@ -533,7 +535,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor // authority/identifier/, that is needed to create a base store via // DataAccess.getDirectStorageIO - the caller has to add the store type specific // prefix required. - private String getStoragePath() throws IOException { + protected String getStoragePath() throws IOException { String fullStoragePath = dvObject.getStorageIdentifier(); logger.fine("storageidentifier: " + fullStoragePath); int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); @@ -596,21 +598,21 @@ private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementExcept @Override public void savePath(Path fileSystemPath) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + this.getClass().getName() + ": saveInputStream() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + this.getClass().getName() + ": saveInputStream(InputStream, Long) not implemented in this storage driver."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 451dbcc56d1..ffe08a6afb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -50,7 +50,7 @@ public enum JvmSettings { SCOPE_FILES(PREFIX, "files"), FILES_DIRECTORY(SCOPE_FILES, "directory"), FILES(SCOPE_FILES), - BASE_URI(FILES, "base-uri"), + BASE_URL(FILES, "base-url"), GLOBUS_TOKEN(FILES, "globus-token"), // SOLR INDEX SETTINGS From 270e0fd0a28b516f62dc29e927bbb19753f47d19 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Sep 2023 10:08:33 -0400 Subject: [PATCH 042/546] temporary fix for local compile issues --- .../harvest/server/web/servlet/OAIServlet.java | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 9cf1629abfc..3ce88fdf204 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -74,18 +74,13 @@ public class OAIServlet extends HttpServlet { @EJB SystemConfig systemConfig; - - @Inject - @ConfigProperty(name = "dataverse.oai.server.maxidentifiers", defaultValue="100") - private Integer maxListIdentifiers; - @Inject - @ConfigProperty(name = "dataverse.oai.server.maxsets", defaultValue="100") - private Integer maxListSets; + //Todo - revert this change - added to get past some local compile issues + private Integer maxListIdentifiers=100; + + private Integer maxListSets=100; - @Inject - @ConfigProperty(name = "dataverse.oai.server.maxrecords", defaultValue="10") - private Integer maxListRecords; + private Integer maxListRecords=10; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.web.servlet.OAIServlet"); // If we are going to stick with this solution - of providing a minimalist From 1828855a162683d564e02507ce60fd99963b43d0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 12 Sep 2023 10:09:53 -0400 Subject: [PATCH 043/546] updates/fixes re: extending RemoteOverlay, etc. --- .../iq/dataverse/dataaccess/DataAccess.java | 2 + .../dataaccess/GlobusOverlayAccessIO.java | 208 +++++++----------- .../dataaccess/RemoteOverlayAccessIO.java | 9 +- .../iq/dataverse/dataaccess/StorageIO.java | 2 +- .../dataverse/globus/GlobusServiceBean.java | 52 +++-- 5 files changed, 119 insertions(+), 154 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index f2eb0236df4..8387f8110cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -132,6 +132,8 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation) return new SwiftAccessIO<>(storageLocation, storageDriverId); case REMOTE: return new RemoteOverlayAccessIO<>(storageLocation, storageDriverId); + case GLOBUS: + return new GlobusOverlayAccessIO<>(storageLocation, storageDriverId); default: logger.warning("Could not find storage driver for: " + fullStorageLocation); throw new IOException("getDirectStorageIO: Unsupported storage method."); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 16345cd1f9c..b00724e2825 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -58,46 +58,63 @@ /* * Globus Overlay Driver * - * Remote: - * StorageIdentifier format: ://// - * Storage location: / - * Internal - * StorageIdentifier format: ://// - * Storage location: /// + * Remote: StorageIdentifier format: + * ://// Storage location: + * / Internal StorageIdentifier format: + * :// Storage location: + * /// * * baseUrl: globus:// - + * */ public class GlobusOverlayAccessIO extends RemoteOverlayAccessIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); - - private String globusAccessToken = null; + String globusAccessToken = null; /* - * If this is set to true, the store supports Globus transfer in and Dataverse/the globus app manage file locations, access controls, deletion, etc. + * If this is set to true, the store supports Globus transfer in and + * Dataverse/the globus app manage file locations, access controls, deletion, + * etc. */ - private boolean isDataverseManaged = false; + private boolean dataverseManaged = false; public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); - this.setIsLocalFile(false); - configureStores(req, driverId, null); - logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); - path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); - validatePath(path); + if (dvObject instanceof DataFile) { + globusAccessToken = retrieveGlobusAccessToken(); + } + dataverseManaged = isDataverseManaged(this.driverId); - logger.fine("Relative path: " + path); + logger.info("GAT3: " + globusAccessToken); } public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { - super(null, null, driverId); - this.setIsLocalFile(false); - configureStores(null, driverId, storageLocation); + this.driverId = driverId; + this.dataverseManaged = isDataverseManaged(this.driverId); + if (dataverseManaged) { + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); + path = parts[1]; + } else { + this.setIsLocalFile(false); + configureStores(null, driverId, storageLocation); + + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Relative path: " + path); + } +//ToDo - only when needed? + globusAccessToken = retrieveGlobusAccessToken(); + + } + + private String retrieveGlobusAccessToken() { + // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); + String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); - path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); - validatePath(path); - logger.fine("Relative path: " + path); + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + return accessToken.getOtherTokens().get(0).getAccessToken(); } private void validatePath(String relPath) throws IOException { @@ -114,6 +131,7 @@ private void validatePath(String relPath) throws IOException { // Call the Globus API to get the file size @Override long retrieveSize() { + logger.info("GAT2: " + globusAccessToken); // Construct Globus URL URI absoluteURI = null; try { @@ -121,16 +139,16 @@ long retrieveSize() { String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); int pathStart = endpointWithBasePath.indexOf("/"); logger.info("endpointWithBasePath: " + endpointWithBasePath); - String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart+1) : ""); + String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); logger.info("directoryPath: " + directoryPath); - if(isDataverseManaged) { + if (dataverseManaged && (dvObject!=null)) { Dataset ds = ((DataFile) dvObject).getOwner(); directoryPath = directoryPath + "/" + ds.getAuthority() + "/" + ds.getIdentifier(); logger.info("directoryPath now: " + directoryPath); } - if(filenameStart > 0) { + if (filenameStart > 0) { directoryPath = directoryPath + path.substring(0, filenameStart); } logger.info("directoryPath finally: " + directoryPath); @@ -168,12 +186,15 @@ long retrieveSize() { return -1; } - - - + + @Override + public InputStream getInputStream() throws IOException { + throw new IOException("Not implemented"); + } + @Override public void delete() throws IOException { - + // Fix // Delete is best-effort - we tell the remote server and it may or may not // implement this call @@ -205,9 +226,6 @@ public void delete() throws IOException { } - - - @Override public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { @@ -218,114 +236,37 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary if (secretKey == null) { return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", - secretKey); + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName); } } - private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); - String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); - isDataverseManaged = Boolean.getBoolean("dataverse.files." + this.driverId + ".managed"); + private static boolean isDataverseManaged(String driverId) { + return Boolean.getBoolean("dataverse.files." + driverId + ".managed"); + } - AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); - globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken(); - // endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId); - baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); - logger.info("base-url is " + baseUrl); + static boolean isValidIdentifier(String driverId, String storageId) { + String baseIdentifier = storageId.substring(storageId.lastIndexOf("//") + 2); + String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); if (baseUrl == null) { - throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); - } else { - try { - new URI(baseUrl); - } catch (Exception e) { - logger.warning( - "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); - throw new IOException("Can't interpret base-url as a URI"); - } - + return false; } - - if (baseStore == null) { - String baseDriverId = getBaseStoreIdFor(driverId); - String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", - DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if (dvObject instanceof Dataset) { - baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + // Internally managed endpoints require standard name pattern (submitted via + // /addFile(s) api) + if (isDataverseManaged(driverId)) { + boolean hasStandardName = usesStandardNamePattern(baseIdentifier); + if (hasStandardName) { + return true; } else { - if (this.getDvObject() != null) { - fullStorageLocation = getStoragePath(); - - // S3 expects :/// - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") - + "/" + fullStorageLocation; - break; - default: - logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not supported"); - } - - } else if (storageLocation != null) { - // ://// - // remoteDriverId:// is removed if coming through directStorageIO - int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if (index > 0) { - storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); - } - // THe base store needs the baseStoreIdentifier and not the relative URL - fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); - - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") - + "/" + fullStorageLocation; - break; - default: - logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not supported"); - } - } - baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); - } - if (baseDriverType.contentEquals(DataAccess.S3)) { - ((S3AccessIO) baseStore).setMainDriver(false); + logger.warning("Unacceptable identifier pattern in submitted identifier: " + baseIdentifier); + return false; } } - remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); + // Remote endpoints require a valid URI within the baseUrl try { - remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); - } catch (MalformedURLException mfue) { - logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); - } - } - - - protected static boolean isValidIdentifier(String driverId, String storageId) { - String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); - String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); - try { - URI absoluteURI = new URI(baseUrl + "/" + urlPath); + URI absoluteURI = new URI(baseUrl + "/" + baseIdentifier); if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; @@ -338,7 +279,6 @@ protected static boolean isValidIdentifier(String driverId, String storageId) { return true; } - public static void main(String[] args) { System.out.println("Running the main method"); if (args.length > 0) { @@ -347,15 +287,19 @@ public static void main(String[] args) { // System.setProperty("dataverse.files.globus.globus_client_id", // "2791b83e-b989-47c5-a7fa-ce65fd949522"); System.setProperty("dataverse.files.globus.base-url", "globus://d8c42580-6528-4605-9ad8-116a61982644"); - System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); + System.out.println("NotValid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); + System.out.println("ValidRemote: " + isValidIdentifier("globus", "globus://localid//of/the/hill")); + System.setProperty("dataverse.files.globus.managed", "true"); + + System.out.println("ValidLocal: " + isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42")); // System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOkt4ZEdndFVDUDVZZG5sRG4rRHEzaVMxTHBtTVRGNlB3RjlwWm9kRTBWNVE9"); System.setProperty("dataverse.files.globus.globus-token", "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9"); System.setProperty("dataverse.files.globus.base-store", "file"); System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.file.directory", "/tmp/files"); - logger.info(JvmSettings.BASE_URL.lookup("globus")); - logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); + // logger.info(JvmSettings.BASE_URL.lookup("globus")); + // logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); try { GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO( diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 6b15bcf1dc8..a9653f2ab68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -65,6 +65,8 @@ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); + String globusAccessToken = null; + protected StorageIO baseStore = null; protected String path = null; protected String baseUrl = null; @@ -79,6 +81,9 @@ public class RemoteOverlayAccessIO extends StorageIO { protected static boolean trustCerts = false; protected int httpConcurrency = 4; + public RemoteOverlayAccessIO() { + } + public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); this.setIsLocalFile(false); @@ -445,7 +450,7 @@ int getUrlExpirationMinutes() { return 60; } - private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); if (baseUrl == null) { throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); @@ -616,7 +621,7 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc } - protected static boolean isValidIdentifier(String driverId, String storageId) { + static boolean isValidIdentifier(String driverId, String storageId) { String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); try { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index bfd5c5f0d8f..333d72e09b2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -611,7 +611,7 @@ public static boolean isDirectUploadEnabled(String driverId) { //Check that storageIdentifier is consistent with store's config //False will prevent direct uploads - protected static boolean isValidIdentifier(String driverId, String storageId) { + static boolean isValidIdentifier(String driverId, String storageId) { return false; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 5c387710844..d98e1c9b7f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -594,11 +594,10 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin globusLogger.info("Starting an globusUpload "); - String datasetIdentifier = dataset.getStorageIdentifier(); - + // ToDo - use DataAccess methods? - String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); - datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); + //String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); + //datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); Thread.sleep(5000); @@ -670,18 +669,26 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin JsonArray filesJsonArray = jsonObject.getJsonArray("files"); if (filesJsonArray != null) { + String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from // externalTool String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String[] bits = storageIdentifier.split(":"); - String bucketName = bits[1].replace("/", ""); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String storeId = parts[0]; + //If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + String bucketName = ""; + if(bits.length > 1) { + bucketName = bits[0]; + } String fileId = bits[bits.length - 1]; // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId; + //or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; String fileName = fileJsonObject.getString("fileName"); inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); @@ -690,7 +697,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // calculateMissingMetadataFields: checksum, mimetype JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - +logger.info("Size: " + newfilesJsonArray.size()); +logger.info("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { @@ -699,15 +707,21 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String storageIdentifier = fileJsonObject.getString("storageIdentifier"); String fileName = fileJsonObject.getString("fileName"); String directoryLabel = fileJsonObject.getString("directoryLabel"); - String[] bits = storageIdentifier.split(":"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + //If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + String bucketName = ""; + if(bits.length > 1) { + bucketName = bits[0]; + } String fileId = bits[bits.length - 1]; - + List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) .filter(Objects::nonNull).collect(Collectors.toList()); - if (newfileJsonObject != null) { - if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + logger.info("List Size: " + newfileJsonObject.size()); + //if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { JsonPatch path = Json.createPatchBuilder() .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); fileJsonObject = path.apply(fileJsonObject); @@ -716,11 +730,11 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin fileJsonObject = path.apply(fileJsonObject); jsonDataSecondAPI.add(fileJsonObject); countSuccess++; - } else { - globusLogger.info(fileName - + " will be skipped from adding to dataset by second API due to missing values "); - countError++; - } + // } else { + // globusLogger.info(fileName + // + " will be skipped from adding to dataset by second API due to missing values "); + // countError++; + // } } else { globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); @@ -1045,8 +1059,8 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) } catch (IOException ioex) { count = 3; logger.info(ioex.getMessage()); - globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath - + ") does not appear to be an S3 object associated with driver: "); + globusLogger.info("DataFile (fullPAth " + fullPath + + ") does not appear to be accessible withing Dataverse: "); } catch (Exception ex) { count = count + 1; ex.printStackTrace(); From 3d2255b963f869028b68576075462664f67a5888 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Wed, 13 Sep 2023 18:35:40 +0200 Subject: [PATCH 044/546] Assign roles from email address Give a user a role from email address of the user's account --- .../iq/dataverse/authorization/users/AuthenticatedUser.java | 3 ++- src/main/webapp/roles-assign.xhtml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java index 89429b912f6..17db9e63e8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java @@ -64,7 +64,8 @@ @NamedQuery( name="AuthenticatedUser.filter", query="select au from AuthenticatedUser au WHERE (" + "LOWER(au.userIdentifier) like LOWER(:query) OR " - + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query))"), + + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query) or " + + "lower(au.email) like lower(:query))"), @NamedQuery( name="AuthenticatedUser.findAdminUser", query="select au from AuthenticatedUser au WHERE " + "au.superuser = true " diff --git a/src/main/webapp/roles-assign.xhtml b/src/main/webapp/roles-assign.xhtml index 4b31f10dbfc..4b355c74d5c 100644 --- a/src/main/webapp/roles-assign.xhtml +++ b/src/main/webapp/roles-assign.xhtml @@ -31,7 +31,8 @@ styleClass="DropdownPopup" panelStyleClass="DropdownPopupPanel" var="roleAssignee" itemLabel="#{roleAssignee.displayInfo.title}" itemValue="#{roleAssignee}" converter="roleAssigneeConverter"> - + + From ae16dadddd7978dae23dd62671c05433db2f9300 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 18 Sep 2023 10:13:52 -0400 Subject: [PATCH 045/546] minor cleanup --- .../iq/dataverse/globus/GlobusServiceBean.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 56219f843a7..9aae4dffc03 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -130,7 +130,7 @@ ArrayList checkPermisions(AccessToken clientTokenUser, String directory, return ids; } - +/* public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) throws MalformedURLException { if (directory != null && !directory.equals("")) { @@ -163,8 +163,8 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin count++; } } - - public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { +*/ + public void deletePermission(String ruleId, Logger globusLogger) throws MalformedURLException { if (ruleId.length() > 0) { AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); @@ -554,7 +554,9 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) + rawStorageId + "&fileName=" + df.getCurrentName(); } } - return tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix; + String finalUrl = tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix; + logger.info("Calling app: " + finalUrl); + return finalUrl; } public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) { @@ -624,7 +626,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String taskStatus = getTaskStatus(task); if (ruleId.length() > 0) { - deletePermision(ruleId, globusLogger); + deletePermission(ruleId, globusLogger); } // If success, switch to an EditInProgress lock - do this before removing the @@ -897,7 +899,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro String taskStatus = getTaskStatus(task); if (ruleId.length() > 0) { - deletePermision(ruleId, globusLogger); + deletePermission(ruleId, globusLogger); } if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { From 9562b788b7dfbfec53d6d7e9aeb52e690cddddf4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 18 Sep 2023 10:14:43 -0400 Subject: [PATCH 046/546] start allowupload method, fix messaging when disabled --- .../harvard/iq/dataverse/api/Datasets.java | 63 ++++++++++++++++++- src/main/java/propertyFiles/Bundle.properties | 3 + 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 599890913fd..a999a71b2d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3393,6 +3393,65 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } + /** Requests permissions for a given globus user to upload to the dataset + * + * @param crc + * @param datasetId + * @param jsonData + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ + @POST + @AuthRequired + @Path("{id}/allowGlobusUpload") + @Consumes(MediaType.APPLICATION_JSON) + public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody + ) throws IOException, ExecutionException, InterruptedException { + + + logger.info(" ==== (api allowGlobusUpload) jsonBody ====== " + jsonBody); + + + if (!systemConfig.isGlobusUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + } + + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + User authUser; + authUser = getRequestUser(crc); + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + // Async Call + globusService.givePermission(jsonBody, jsonBody, jsonBody, null, datasetId, jsonBody).globusDownload(jsonData, dataset, authUser); + + return ok("Async call to Globus Download started"); + + } + + /** Monitors a globus download and removes permissions on the dir/dataset when done + * + * @param crc + * @param datasetId + * @param jsonData + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ @POST @AuthRequired @Path("{id}/deleteglobusRule") @@ -3404,8 +3463,8 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData); - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + if (!systemConfig.isGlobusDownload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); } // ------------------------------------- diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 997f0470cc3..0343e109e61 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2623,6 +2623,9 @@ datasets.api.privateurl.anonymized.error.released=Can't create a URL for anonymi datasets.api.creationdate=Date Created datasets.api.modificationdate=Last Modified Date datasets.api.curationstatus=Curation Status +datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this installation of Dataverse. +datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this installation of Dataverse. + #Dataverses.java From c6197b3bf23ad1dccb023ea668799e7a79805d93 Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Mon, 18 Sep 2023 10:40:05 -0400 Subject: [PATCH 047/546] #9920 support Postgres 16 --- pom.xml | 4 ++-- scripts/installer/install.py | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 7ba22d2a076..c5b7fc302f3 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ war 1.2.18.4 - 9.21.2 + 9.22.1 1.20.1 0.8.7 5.2.1 @@ -790,7 +790,7 @@ true docker-build - 13 + 16 gdcc/dataverse:${app.image.tag} unstable diff --git a/scripts/installer/install.py b/scripts/installer/install.py index 5a7b9f75696..18995695638 100644 --- a/scripts/installer/install.py +++ b/scripts/installer/install.py @@ -422,9 +422,13 @@ conn.close() if int(pg_major_version) >= 15: + admin_conn_string = "dbname='"+pgDb+"' user='postgres' password='"+pgAdminPassword+"' host='"+pgHost+"'" + conn = psycopg2.connect(admin_conn_string) + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + cur = conn.cursor() conn_cmd = "GRANT CREATE ON SCHEMA public TO "+pgUser+";" - print("PostgreSQL 15 or higher detected. Running " + conn_cmd) try: + print("PostgreSQL 15 or higher detected. Running " + conn_cmd) cur.execute(conn_cmd) except: if force: From 116845c753a8364d14bad2edafcebf6a0e28dde6 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 19 Sep 2023 15:09:11 -0400 Subject: [PATCH 048/546] refactoring, add allowUpload api call --- .../harvard/iq/dataverse/api/Datasets.java | 7 +- .../dataaccess/GlobusOverlayAccessIO.java | 2 +- .../iq/dataverse/globus/GlobusEndpoint.java | 31 ++++++ .../dataverse/globus/GlobusServiceBean.java | 104 ++++++++++++------ 4 files changed, 109 insertions(+), 35 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index a999a71b2d4..745f294fee6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3434,11 +3434,14 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar } catch (WrappedResponse wr) { return wr.getResponse(); } + + JsonObject params = JsonUtil.getJsonObject(jsonBody); + String principal = params.getString("principal"); // Async Call - globusService.givePermission(jsonBody, jsonBody, jsonBody, null, datasetId, jsonBody).globusDownload(jsonData, dataset, authUser); + globusService.givePermission("identity", principal, "rw", dataset); - return ok("Async call to Globus Download started"); + return ok("Permission Granted"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index b18e6bb7e76..965dc3c0947 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -214,7 +214,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary } } - private static boolean isDataverseManaged(String driverId) { + public static boolean isDataverseManaged(String driverId) { return Boolean.getBoolean("dataverse.files." + driverId + ".managed"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java new file mode 100644 index 00000000000..d1e5d19a592 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java @@ -0,0 +1,31 @@ +package edu.harvard.iq.dataverse.globus; + +public class GlobusEndpoint { + + private String id; + private String clientToken; + private String basePath; + + + public GlobusEndpoint(String id, String clientToken, String basePath) { + + } + public String getId() { + return id; + } + public void setId(String id) { + this.id = id; + } + public String getClientToken() { + return clientToken; + } + public void setClientToken(String clientToken) { + this.clientToken = clientToken; + } + public String getBasePath() { + return basePath; + } + public void setBasePath(String basePath) { + this.basePath = basePath; + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 9aae4dffc03..910ee796e0e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -46,6 +46,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -106,23 +107,23 @@ public void setUserTransferToken(String userTransferToken) { this.userTransferToken = userTransferToken; } - ArrayList checkPermisions(AccessToken clientTokenUser, String directory, String globusEndpoint, - String principalType, String principal) throws MalformedURLException { - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); + private ArrayList checkPermissions(GlobusEndpoint endpoint, String principalType, String principal) throws MalformedURLException { + + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list"); MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + endpoint.getClientToken(), "GET", null); ArrayList ids = new ArrayList(); if (result.status == 200) { AccessList al = parseJson(result.jsonResponse, AccessList.class, false); for (int i = 0; i < al.getDATA().size(); i++) { Permissions pr = al.getDATA().get(i); - if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory)) + if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath())) && pr.getPrincipalType().equals(principalType) && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) { ids.add(pr.getId()); } else { - logger.info(pr.getPath() + " === " + directory + " == " + pr.getPrincipalType()); + logger.info(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType()); continue; } } @@ -185,24 +186,24 @@ public void deletePermission(String ruleId, Logger globusLogger) throws Malforme } - public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, - String directory, String globusEndpoint) throws MalformedURLException { + public int givePermission(String principalType, String principal, String perm, Dataset dataset) throws MalformedURLException { - ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, principal); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + ArrayList rules = checkPermissions(endpoint, principalType, principal); Permissions permissions = new Permissions(); permissions.setDATA_TYPE("access"); permissions.setPrincipalType(principalType); permissions.setPrincipal(principal); - permissions.setPath(directory + "/"); + permissions.setPath(endpoint.getBasePath() + "/"); permissions.setPermissions(perm); Gson gson = new GsonBuilder().create(); MakeRequestResponse result = null; if (rules.size() == 0) { logger.info("Start creating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access"); - result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST", + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access"); + result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions)); if (result.status == 400) { @@ -214,9 +215,9 @@ public int givePermission(String principalType, String principal, String perm, A return result.status; } else { logger.info("Start Updating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access/" + rules.get(0)); - result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", + result = makeRequest(url, "Bearer", endpoint.getClientToken(), "PUT", gson.toJson(permissions)); if (result.status == 400) { @@ -438,36 +439,25 @@ static class MakeRequestResponse { } - private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint) + private MakeRequestResponse findDirectory(String directory, String clientToken, String globusEndpoint) throws MalformedURLException { URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path=" + directory + "/"); MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + clientToken, "GET", null); logger.info("find directory status:" + result.status); return result; } - public boolean giveGlobusPublicPermissions(String datasetId) + public boolean giveGlobusPublicPermissions(Dataset dataset) throws UnsupportedEncodingException, MalformedURLException { - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - if (globusEndpoint.equals("") || globusBasicToken.equals("")) { - return false; - } - AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); - if (clientTokenUser == null) { - logger.severe("Cannot get client token "); - return false; - } + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - String directory = getDirectory(datasetId); - logger.info(directory); - MakeRequestResponse status = findDirectory(directory, clientTokenUser, globusEndpoint); + MakeRequestResponse status = findDirectory(endpoint.getBasePath(), endpoint.getClientToken(), endpoint.getId()); if (status.status == 200) { @@ -485,8 +475,7 @@ public boolean giveGlobusPublicPermissions(String datasetId) * 201) { logger.info("Cannot get permission for " + file.getName()); } } } } */ - int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory, - globusEndpoint); + int perStatus = givePermission("all_authenticated_users", "", "r", dataset); logger.info("givePermission status " + perStatus); if (perStatus == 409) { logger.info("Permissions already exist or limit was reached"); @@ -1287,4 +1276,55 @@ public String calculatemime(String fileName) throws InterruptedException { * updatePermision(clientTokenUser, directory, "identity", "r"); return true; } * */ + + GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { + Dataset dataset = null; + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + dataset = (Dataset) dvObject.getOwner(); + } else { + throw new IllegalArgumentException("Unsupported DvObject type: " + dvObject.getClass().getName()); + } + String driverId = dataset.getEffectiveStorageDriverId(); + GlobusEndpoint endpoint = null; + String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); + + String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); + int pathStart = endpointWithBasePath.indexOf("/"); + logger.info("endpointWithBasePath: " + endpointWithBasePath); + String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); + logger.info("directoryPath: " + directoryPath); + + if (GlobusOverlayAccessIO.isDataverseManaged(driverId) && (dataset!=null)) { + directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); + logger.info("directoryPath now: " + directoryPath); + + } else { + //remote store - may have path in file storageidentifier + String relPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + int filenameStart = relPath.lastIndexOf("/") + 1; + if (filenameStart > 0) { + directoryPath = directoryPath + relPath.substring(0, filenameStart); + } + } + logger.info("directoryPath finally: " + directoryPath); + + String endpointId = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; + + logger.info("endpointId: " + endpointId); + + String globusToken = System.getProperty("dataverse.files." + driverId + ".globus-token"); + + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + String clientToken = accessToken.getOtherTokens().get(0).getAccessToken(); + + endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath); + + return endpoint; + } + + private static boolean isDataverseManaged(String driverId) { + return Boolean.getBoolean("dataverse.files." + driverId + ".managed"); + } } From c0dacb50fb117f01639b22bae6b404c6cc71596b Mon Sep 17 00:00:00 2001 From: Ludovic DANIEL Date: Wed, 20 Sep 2023 16:59:25 +0200 Subject: [PATCH 049/546] #9940 - fixed various issues with generated urls of authors for signposting --- .../dataverse/util/SignpostingResources.java | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java index 2c9b7167059..19e1c1298ae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java @@ -19,6 +19,8 @@ Two configurable options allow changing the limit for the number of authors or d import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObjectBuilder; +import org.apache.commons.validator.routines.UrlValidator; + import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -164,12 +166,11 @@ private List getAuthorURLs(boolean limit) { for (DatasetAuthor da : workingDatasetVersion.getDatasetAuthors()) { logger.fine(String.format("idtype: %s; idvalue: %s, affiliation: %s; identifierUrl: %s", da.getIdType(), da.getIdValue(), da.getAffiliation(), da.getIdentifierAsUrl())); - String authorURL = ""; - authorURL = getAuthorUrl(da); + String authorURL = getAuthorUrl(da); if (authorURL != null && !authorURL.isBlank()) { // return empty if number of visible author more than max allowed // >= since we're comparing before incrementing visibleAuthorCounter - if (visibleAuthorCounter >= maxAuthors) { + if (limit && visibleAuthorCounter >= maxAuthors) { authorURLs.clear(); break; } @@ -211,15 +212,22 @@ private String getAuthorsAsString(List datasetAuthorURLs) { * */ private String getAuthorUrl(DatasetAuthor da) { - String authorURL = ""; - //If no type and there's a value, assume it is a URL (is this reasonable?) - //Otherise, get the URL using the type and value - if (da.getIdType() != null && !da.getIdType().isBlank() && da.getIdValue()!=null) { - authorURL = da.getIdValue(); - } else { - authorURL = da.getIdentifierAsUrl(); + + final String identifierAsUrl = da.getIdentifierAsUrl(); + // First, try to get URL using the type and value + if(identifierAsUrl != null) { + return identifierAsUrl; } - return authorURL; + + final String idValue = da.getIdValue(); + UrlValidator urlValidator = new UrlValidator(new String[]{"http", "https"}); + // Otherwise, try to use idValue as url if it's valid + if(urlValidator.isValid(idValue)) { + return idValue; + } + + // No url found + return null; } private JsonArrayBuilder getJsonAuthors(List datasetAuthorURLs) { From 9d846d2455e820cc9312863079086c66b0799c7a Mon Sep 17 00:00:00 2001 From: Vera Clemens Date: Tue, 26 Sep 2023 09:13:13 +0200 Subject: [PATCH 050/546] fix: require ManageDatasetPermissions for listing role assignments on datasets --- .../engine/command/impl/ListRoleAssignments.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java index 1858ba377ab..b619d32cc7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java @@ -6,16 +6,18 @@ import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Collections; /** * * @author michael */ -@RequiredPermissions( Permission.ManageDataversePermissions ) +// no annotations here, since permissions are dynamically decided public class ListRoleAssignments extends AbstractCommand> { private final DvObject definitionPoint; @@ -34,5 +36,12 @@ public List execute(CommandContext ctxt) throws CommandException } return ctxt.permissions().assignmentsOn(definitionPoint); } + + @Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + definitionPoint.isInstanceofDataset() ? Collections.singleton(Permission.ManageDatasetPermissions) + : Collections.singleton(Permission.ManageDataversePermissions)); + } } From 41e363e343861f6b416e6add60e60778f697cce0 Mon Sep 17 00:00:00 2001 From: Vera Clemens Date: Tue, 26 Sep 2023 09:13:36 +0200 Subject: [PATCH 051/546] test: require ManageDatasetPermissions for listing role assignments on datasets --- scripts/api/data/role-contributor-plus.json | 12 +++ .../harvard/iq/dataverse/api/DatasetsIT.java | 87 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 scripts/api/data/role-contributor-plus.json diff --git a/scripts/api/data/role-contributor-plus.json b/scripts/api/data/role-contributor-plus.json new file mode 100644 index 00000000000..ef9ba3aaff6 --- /dev/null +++ b/scripts/api/data/role-contributor-plus.json @@ -0,0 +1,12 @@ +{ + "alias":"contributorPlus", + "name":"ContributorPlus", + "description":"For datasets, a person who can edit License + Terms, then submit them for review, and add collaborators.", + "permissions":[ + "ViewUnpublishedDataset", + "EditDataset", + "DownloadFile", + "DeleteDatasetDraft", + "ManageDatasetPermissions" + ] +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 3b6d4d1ecdf..b51d400d2d4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -1296,6 +1296,93 @@ public void testAddRoles(){ } + @Test + public void testListRoleAssignments() { + Response createAdminUser = UtilIT.createRandomUser(); + String adminUsername = UtilIT.getUsernameFromResponse(createAdminUser); + String adminApiToken = UtilIT.getApiTokenFromResponse(createAdminUser); + UtilIT.makeSuperUser(adminUsername); + + Response createDataverseResponse = UtilIT.createRandomDataverse(adminApiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // Now, let's allow anyone with a Dataverse account (any "random user") + // to create datasets in this dataverse: + + Response grantRole = UtilIT.grantRoleOnDataverse(dataverseAlias, DataverseRole.DS_CONTRIBUTOR, AuthenticatedUsers.get().getIdentifier(), adminApiToken); + grantRole.prettyPrint(); + assertEquals(OK.getStatusCode(), grantRole.getStatusCode()); + + Response createContributorUser = UtilIT.createRandomUser(); + String contributorUsername = UtilIT.getUsernameFromResponse(createContributorUser); + String contributorApiToken = UtilIT.getApiTokenFromResponse(createContributorUser); + + // First, we test listing role assignments on a dataverse which requires "ManageDataversePermissions" + + Response notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken); + assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode()); + + Response roleAssignmentsOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, adminApiToken); + roleAssignmentsOnDataverse.prettyPrint(); + assertEquals(OK.getStatusCode(), roleAssignmentsOnDataverse.getStatusCode()); + + // Second, we test listing role assignments on a dataset which requires "ManageDatasetPermissions" + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, contributorApiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + logger.info("dataset id: " + datasetId); + + Response datasetAsJson = UtilIT.nativeGet(datasetId, adminApiToken); + datasetAsJson.then().assertThat() + .statusCode(OK.getStatusCode()); + + String identifier = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier"); + assertEquals(10, identifier.length()); + + String protocol1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.protocol"); + String authority1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.authority"); + String identifier1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier"); + String datasetPersistentId = protocol1 + ":" + authority1 + "/" + identifier1; + + Response notPermittedToListRoleAssignmentOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken); + assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataset.getStatusCode()); + + // We create a new role that includes "ManageDatasetPermissions" which are required for listing role assignments + // of a dataset and assign it to the contributor user + + String pathToJsonFile = "scripts/api/data/role-contributor-plus.json"; + Response addDataverseRoleResponse = UtilIT.addDataverseRole(pathToJsonFile, dataverseAlias, adminApiToken); + addDataverseRoleResponse.prettyPrint(); + String body = addDataverseRoleResponse.getBody().asString(); + String status = JsonPath.from(body).getString("status"); + assertEquals("OK", status); + + Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "contributorPlus", "@" + contributorUsername, adminApiToken); + giveRandoPermission.prettyPrint(); + assertEquals(200, giveRandoPermission.getStatusCode()); + + // Contributor user should now be able to list dataset role assignments as well + + Response roleAssignmentsOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken); + roleAssignmentsOnDataset.prettyPrint(); + assertEquals(OK.getStatusCode(), roleAssignmentsOnDataset.getStatusCode()); + + // ...but not dataverse role assignments + + notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken); + assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode()); + + // Finally, we clean up and delete the role we created + + Response deleteDataverseRoleResponse = UtilIT.deleteDataverseRole("contributorPlus", adminApiToken); + deleteDataverseRoleResponse.prettyPrint(); + body = deleteDataverseRoleResponse.getBody().asString(); + status = JsonPath.from(body).getString("status"); + assertEquals("OK", status); + } + @Test public void testFileChecksum() { From a8883981daa5d84d4553150804fe59942886d069 Mon Sep 17 00:00:00 2001 From: Eryk Kulikowski Date: Thu, 28 Sep 2023 13:36:19 +0200 Subject: [PATCH 052/546] always_add_validity_field_to_solr_doc --- .../edu/harvard/iq/dataverse/search/IndexServiceBean.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d6d0be7a17b..04bc824c4b1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -811,9 +811,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set Date: Tue, 3 Oct 2023 17:03:38 -0400 Subject: [PATCH 053/546] update auth checks and err handling --- .../harvard/iq/dataverse/api/Datasets.java | 41 +++++++++++++++---- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 110bfcc1553..25839544ce9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -65,6 +65,7 @@ import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -3429,8 +3430,12 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar // ------------------------------------- // (1) Get the user from the ContainerRequestContext // ------------------------------------- - User authUser; - authUser = getRequestUser(crc); + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + return e.getResponse(); + } // ------------------------------------- // (2) Get the Dataset Id @@ -3442,14 +3447,32 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar } catch (WrappedResponse wr) { return wr.getResponse(); } - - JsonObject params = JsonUtil.getJsonObject(jsonBody); - String principal = params.getString("principal"); - // Async Call - globusService.givePermission("identity", principal, "rw", dataset); - - return ok("Permission Granted"); + if(!GlobusOverlayAccessIO.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { + return badRequest("This dataset does not have managed Globus storage"); + } + + if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset) + .canIssue(UpdateDatasetVersionCommand.class)) { + + JsonObject params = JsonUtil.getJsonObject(jsonBody); + String principal = params.getString("principal"); + + // Async Call + int status = globusService.givePermission("identity", principal, "rw", dataset); + switch (status) { + case 201: + return ok("Permission Granted"); + case 400: + return badRequest("Unable to grant permission"); + case 409: + return conflict("Permission already exists"); + default: + return error(null, "Unexpected error when granting permission"); + } + } else { + return forbidden("User doesn't have permission to upload to this dataset"); + } } From c724094dcfffaa83c61f415d572e2e5a8958cef0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 3 Oct 2023 17:03:57 -0400 Subject: [PATCH 054/546] fix constructor, reformat --- .../iq/dataverse/globus/GlobusEndpoint.java | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java index d1e5d19a592..7e555935e2e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java @@ -5,27 +5,34 @@ public class GlobusEndpoint { private String id; private String clientToken; private String basePath; - - + public GlobusEndpoint(String id, String clientToken, String basePath) { - + this.id = id; + this.clientToken = clientToken; + this.basePath = basePath; } - public String getId() { + + public String getId() { return id; } - public void setId(String id) { + + public void setId(String id) { this.id = id; } - public String getClientToken() { + + public String getClientToken() { return clientToken; } - public void setClientToken(String clientToken) { + + public void setClientToken(String clientToken) { this.clientToken = clientToken; } - public String getBasePath() { + + public String getBasePath() { return basePath; } - public void setBasePath(String basePath) { + + public void setBasePath(String basePath) { this.basePath = basePath; } } \ No newline at end of file From ed87e0640788278b5af838ba98efd72413d2586d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 3 Oct 2023 17:04:51 -0400 Subject: [PATCH 055/546] start to monitor access rule changes --- .../dataverse/globus/GlobusServiceBean.java | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 910ee796e0e..ad20b90971b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -206,10 +206,21 @@ public int givePermission(String principalType, String principal, String perm, D result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions)); - if (result.status == 400) { + switch (result.status) { + case 400: + logger.severe("Path " + permissions.getPath() + " is not valid"); - } else if (result.status == 409) { + break; + case 409: logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + break; + case 201: + JsonObject response = JsonUtil.getJsonObject(result.jsonResponse); + if (response != null && response.containsKey("access_id")) { + permissions.setId(response.getString("access_id")); + monitorTemporaryPermissions(permissions, endpoint); + logger.info("Access rule " + permissions.getId() + " was created successfully"); + } } return result.status; @@ -226,9 +237,13 @@ public int givePermission(String principalType, String principal, String perm, D logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); } logger.info("Result status " + result.status); + return result.status; } + } - return result.status; + private void monitorTemporaryPermissions(Permissions permissions, GlobusEndpoint endpoint) { + // TODO Auto-generated method stub + } public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { @@ -324,6 +339,7 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a // Basic // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 logger.info(authType + " " + authCode); + logger.info("For URL: " + url.toString()); connection.setRequestProperty("Authorization", authType + " " + authCode); // connection.setRequestProperty("Content-Type", // "application/x-www-form-urlencoded"); @@ -333,6 +349,7 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a connection.setRequestProperty("Accept", "application/json"); logger.info(jsonString); connection.setDoOutput(true); + OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); wr.write(jsonString); wr.flush(); @@ -1318,7 +1335,7 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); String clientToken = accessToken.getOtherTokens().get(0).getAccessToken(); - +logger.info("clientToken: " + clientToken); endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath); return endpoint; From 6f464bc4697e5b1aee280d4d963c644ca7a80dc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= Date: Wed, 4 Oct 2023 17:58:06 +0200 Subject: [PATCH 056/546] Revert print email on modal --- src/main/webapp/roles-assign.xhtml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/webapp/roles-assign.xhtml b/src/main/webapp/roles-assign.xhtml index 4b355c74d5c..93b9862c55d 100644 --- a/src/main/webapp/roles-assign.xhtml +++ b/src/main/webapp/roles-assign.xhtml @@ -32,7 +32,6 @@ var="roleAssignee" itemLabel="#{roleAssignee.displayInfo.title}" itemValue="#{roleAssignee}" converter="roleAssigneeConverter"> - From 4c67f2a636699d51589fa815511ce4e1b3dc9d1f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 6 Oct 2023 12:13:19 -0400 Subject: [PATCH 057/546] remove inefficient bucket check --- .../iq/dataverse/dataaccess/S3AccessIO.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 822ada0b83e..22216ee5c2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -108,14 +108,13 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { if(!StringUtil.isEmpty(proxy)&&StringUtil.isEmpty(endpoint)) { logger.severe(driverId + " config error: Must specify a custom-endpoint-url if proxy-url is specified"); } - //Not sure this is needed but moving it from the open method for now since it definitely doesn't need to run every time an object is opened. - try { - if (bucketName == null || !s3.doesBucketExistV2(bucketName)) { - throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets."); - } - } catch (SdkClientException sce) { - throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage()); - } + + // FWIW: There used to be a check here to see if the bucket exists. + // It was very redundant (checking every time we access any file) and didn't do + // much but potentially make the failure (in the unlikely case a bucket doesn't + // exist/just disappeared) happen slightly earlier (here versus at the first + // file/metadata access). + } catch (Exception e) { throw new AmazonClientException( "Cannot instantiate a S3 client; check your AWS credentials and region", From 90dfa42c9090ce9e4cf9dab1e8ed57776137a077 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 11 Oct 2023 16:41:38 -0400 Subject: [PATCH 058/546] Redesigned provider mechanism --- .../iq/dataverse/dataaccess/S3AccessIO.java | 71 ++++++++++++++----- 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 22216ee5c2b..ee04bbcb853 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -4,6 +4,7 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.HttpMethod; import com.amazonaws.SdkClientException; +import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.AWSStaticCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; @@ -57,9 +58,11 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; +import java.util.Optional; import java.util.Random; import java.util.function.Predicate; import java.util.logging.Logger; @@ -1180,29 +1183,59 @@ private static AmazonS3 getClient(String driverId) { // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); - /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. + /** Configure credentials for the S3 client. There are multiple mechanisms available. + * Role-based/instance credentials are globally defined while the other mechanisms (profile, static) + * are defined per store. The logic below assures that + * * if a store specific profile or static credentials are explicitly set, they will be used in preference to the global role-based credentials. + * * if a store specific role-based credentials are explicitly set, they will be used in preference to the global instance credentials, + * * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and + * * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found. */ - String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default"); - ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile); - - // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env - // vars or system properties to provide these, but use the secrets config source provided by Payara. - AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( - new BasicAWSCredentials( - config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class).orElse(""), - config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class).orElse("") - )); - - //Add role-based provider as in the default provider chain - InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance(); + ArrayList providers = new ArrayList<>(); + + String s3profile = System.getProperty("dataverse.files." + driverId + ".profile"); + boolean allowInstanceCredentials = true; + // Assume that instance credentials should not be used if the profile is + // actually set for this store or if static creds are provided (below). + if (s3profile != null) { + allowInstanceCredentials = false; + } + // Try to retrieve credentials via Microprofile Config API, too. For production + // use, you should not use env vars or system properties to provide these, but + // use the secrets config source provided by Payara. + Optional accessKey = config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class); + Optional secretKey = config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class); + if (accessKey.isPresent() && secretKey.isPresent()) { + allowInstanceCredentials = false; + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( + new BasicAWSCredentials( + accessKey.orElse(""), + secretKey.orElse(""))); + providers.add(staticCredentials); + } else if (s3profile == null) { + //Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred). + s3profile = "default"; + } + if (s3profile != null) { + ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile); + providers.add(profileCredentials); + } + + if (allowInstanceCredentials) { + // Add role-based provider as in the default provider chain + InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance(); + providers.add(instanceCredentials); + } // Add all providers to chain - the first working provider will be used - // (role-based is first in the default cred provider chain, so we're just + // (role-based is first in the default cred provider chain (if no profile or + // static creds are explicitly set for the store), so we're just // reproducing that, then profile, then static credentials as the fallback) - AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(instanceCredentials, profileCredentials, staticCredentials); + + // As the order is the reverse of how we added providers, we reverse the list here + Collections.reverse(providers); + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(providers); s3CB.setCredentials(providerChain); - + // let's build the client :-) AmazonS3 client = s3CB.build(); driverClientMap.put(driverId, client); From dcca52566958fba3f58698766f9696723fcebfc0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 12 Oct 2023 09:28:42 -0400 Subject: [PATCH 059/546] Good cleanup Co-authored-by: Oliver Bertuch --- .../harvard/iq/dataverse/dataaccess/S3AccessIO.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index ee04bbcb853..a66686ac648 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1209,22 +1209,20 @@ private static AmazonS3 getClient(String driverId) { allowInstanceCredentials = false; AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( new BasicAWSCredentials( - accessKey.orElse(""), - secretKey.orElse(""))); + accessKey.get(), + secretKey.get())); providers.add(staticCredentials); } else if (s3profile == null) { //Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred). s3profile = "default"; } if (s3profile != null) { - ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile); - providers.add(profileCredentials); + providers.add(new ProfileCredentialsProvider(s3profile)); } if (allowInstanceCredentials) { // Add role-based provider as in the default provider chain - InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance(); - providers.add(instanceCredentials); + providers.add(InstanceProfileCredentialsProvider.getInstance()); } // Add all providers to chain - the first working provider will be used // (role-based is first in the default cred provider chain (if no profile or From 4ad95697405512c16ec42b1d242ce620aec2436a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 Oct 2023 16:32:13 -0400 Subject: [PATCH 060/546] partial changes for permission mgmt, etc. --- .../harvard/iq/dataverse/api/Datasets.java | 33 ++- .../dataverse/globus/GlobusServiceBean.java | 218 +++++++++++------- .../iq/dataverse/settings/JvmSettings.java | 1 + 3 files changed, 155 insertions(+), 97 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 25839544ce9..d3ea1b80696 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3363,6 +3363,15 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } catch (WrappedResponse wr) { return wr.getResponse(); } + + JsonObject jsonObject = null; + try { + jsonObject = JsonUtil.getJsonObject(jsonData); + } catch (Exception ex) { + logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage()); + return badRequest("Error parsing json body"); + + } //------------------------------------ // (2b) Make sure dataset does not have package file @@ -3396,7 +3405,7 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); // Async Call - globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser); + globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); return ok("Async call to Globus Upload started "); @@ -3414,9 +3423,10 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, */ @POST @AuthRequired - @Path("{id}/allowGlobusUpload") + @Path("{id}/requestGlobusTransferPaths") @Consumes(MediaType.APPLICATION_JSON) - public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody + @Produces(MediaType.APPLICATION_JSON) + public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody ) throws IOException, ExecutionException, InterruptedException { @@ -3454,15 +3464,18 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset) .canIssue(UpdateDatasetVersionCommand.class)) { - + try { JsonObject params = JsonUtil.getJsonObject(jsonBody); String principal = params.getString("principal"); + int numberOfPaths = params.getInt("numberOfFiles"); + if(numberOfPaths <=0) { + return badRequest("numberOfFiles must be positive"); + } - // Async Call - int status = globusService.givePermission("identity", principal, "rw", dataset); - switch (status) { + JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths); + switch (response.getInt("status")) { case 201: - return ok("Permission Granted"); + return ok(response.getJsonArray("paths")); case 400: return badRequest("Unable to grant permission"); case 409: @@ -3470,6 +3483,10 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar default: return error(null, "Unexpected error when granting permission"); } + } catch (NullPointerException|ClassCastException e) { + return badRequest("Error retrieving principal and numberOfFiles from JSON request body"); + + } } else { return forbidden("User doesn't have permission to upload to this dataset"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index ad20b90971b..49572519696 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1,7 +1,11 @@ package edu.harvard.iq.dataverse.globus; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; import com.google.gson.FieldNamingPolicy; import com.google.gson.GsonBuilder; +import com.nimbusds.oauth2.sdk.pkce.CodeVerifier; + import edu.harvard.iq.dataverse.*; import jakarta.ejb.Asynchronous; @@ -15,7 +19,9 @@ import jakarta.json.JsonArray; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; +import jakarta.json.JsonValue; import jakarta.servlet.http.HttpServletRequest; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; @@ -29,6 +35,8 @@ import java.net.URLEncoder; import java.sql.Timestamp; import java.text.SimpleDateFormat; +import java.time.Duration; +import java.time.temporal.ChronoUnit; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -48,6 +56,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -107,8 +116,10 @@ public void setUserTransferToken(String userTransferToken) { this.userTransferToken = userTransferToken; } - private ArrayList checkPermissions(GlobusEndpoint endpoint, String principalType, String principal) throws MalformedURLException { + private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions) throws MalformedURLException { + String principalType="identity"; + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list"); MakeRequestResponse result = makeRequest(url, "Bearer", endpoint.getClientToken(), "GET", null); @@ -118,20 +129,22 @@ private ArrayList checkPermissions(GlobusEndpoint endpoint, String princ for (int i = 0; i < al.getDATA().size(); i++) { Permissions pr = al.getDATA().get(i); + if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath())) && pr.getPrincipalType().equals(principalType) - && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) { - ids.add(pr.getId()); + && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal))) + &&pr.getPermissions().equals(permissions)) { + return pr.getId(); } else { - logger.info(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType()); + logger.fine(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType()); continue; } } } - - return ids; + return null; } -/* + + /* public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) throws MalformedURLException { if (directory != null && !directory.equals("")) { @@ -165,47 +178,71 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin } } */ - public void deletePermission(String ruleId, Logger globusLogger) throws MalformedURLException { - - if (ruleId.length() > 0) { - AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "")); - - globusLogger.info("Start deleting permissions."); - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - - URL url = new URL( - "https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "DELETE", null); - if (result.status != 200) { - globusLogger.warning("Cannot delete access rule " + ruleId); - } else { - globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + +/** Call to delete a globus rule related to the specified dataset. + * + * @param ruleId - Globus rule id - assumed to be associated with the dataset's file path (should not be called with a user specified rule id w/o further checking) + * @param datasetId - the id of the dataset associated with the rule + * @param globusLogger - a separate logger instance, may be null + */ +public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) { + + if (ruleId.length() > 0) { + if (dataset != null) { + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + if (endpoint != null) { + String accessToken = endpoint.getClientToken(); + if (globusLogger != null) { + globusLogger.info("Start deleting permissions."); + } + try { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + + "/access/" + ruleId); + MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "DELETE", null); + if (result.status != 200) { + if (globusLogger != null) { + globusLogger.warning("Cannot delete access rule " + ruleId); + } else { + // When removed due to a cache ejection, we don't have a globusLogger + logger.warning("Cannot delete access rule " + ruleId); + } + } else { + if (globusLogger != null) { + globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + } + } + } catch (MalformedURLException ex) { + logger.log(Level.WARNING, + "Failed to delete access rule " + ruleId + " on endpoint " + endpoint.getId(), ex); + } } } - } +} - public int givePermission(String principalType, String principal, String perm, Dataset dataset) throws MalformedURLException { + public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) { GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - ArrayList rules = checkPermissions(endpoint, principalType, principal); + String principalType= "identity"; Permissions permissions = new Permissions(); permissions.setDATA_TYPE("access"); permissions.setPrincipalType(principalType); permissions.setPrincipal(principal); permissions.setPath(endpoint.getBasePath() + "/"); - permissions.setPermissions(perm); + permissions.setPermissions("rw"); Gson gson = new GsonBuilder().create(); MakeRequestResponse result = null; - if (rules.size() == 0) { logger.info("Start creating the rule"); + JsonObjectBuilder response = Json.createObjectBuilder(); + + try { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access"); result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions)); + response.add("status", result.status); switch (result.status) { case 400: @@ -215,35 +252,50 @@ public int givePermission(String principalType, String principal, String perm, D logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); break; case 201: - JsonObject response = JsonUtil.getJsonObject(result.jsonResponse); - if (response != null && response.containsKey("access_id")) { - permissions.setId(response.getString("access_id")); - monitorTemporaryPermissions(permissions, endpoint); + JsonObject globusResponse = JsonUtil.getJsonObject(result.jsonResponse); + if (globusResponse != null && globusResponse.containsKey("access_id")) { + permissions.setId(globusResponse.getString("access_id")); + monitorTemporaryPermissions(permissions.getId(), dataset.getId()); logger.info("Access rule " + permissions.getId() + " was created successfully"); + JsonArrayBuilder pathArray = Json.createArrayBuilder(); + for(int i=0;i rulesCache = Caffeine.newBuilder() + .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) + .removalListener((ruleId, datasetId, cause) -> { + //Delete rules that expire + Dataset dataset = datasetSvc.find(datasetId); + deletePermission((String) ruleId, dataset, null); + }) + + .build(); + + + private void monitorTemporaryPermissions(String ruleId, long datasetId) { + rulesCache.put(ruleId, datasetId); } public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { @@ -468,6 +520,7 @@ private MakeRequestResponse findDirectory(String directory, String clientToken, return result; } + /* public boolean giveGlobusPublicPermissions(Dataset dataset) throws UnsupportedEncodingException, MalformedURLException { @@ -478,20 +531,6 @@ public boolean giveGlobusPublicPermissions(Dataset dataset) if (status.status == 200) { - /* - * FilesList fl = parseJson(status.jsonResponse, FilesList.class, false); - * ArrayList files = fl.getDATA(); if (files != null) { for (FileG file: - * files) { if (!file.getName().contains("cached") && - * !file.getName().contains(".thumb")) { int perStatus = - * givePermission("all_authenticated_users", "", "r", clientTokenUser, directory - * + "/" + file.getName(), globusEndpoint); logger.info("givePermission status " - * + perStatus + " for " + file.getName()); if (perStatus == 409) { - * logger.info("Permissions already exist or limit was reached for " + - * file.getName()); } else if (perStatus == 400) { - * logger.info("No file in Globus " + file.getName()); } else if (perStatus != - * 201) { logger.info("Cannot get permission for " + file.getName()); } } } } - */ - int perStatus = givePermission("all_authenticated_users", "", "r", dataset); logger.info("givePermission status " + perStatus); if (perStatus == 409) { @@ -512,7 +551,8 @@ public boolean giveGlobusPublicPermissions(Dataset dataset) return true; } - +*/ + // Generates the URL to launch the Globus app public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); @@ -572,7 +612,7 @@ public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) { @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, + public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { Integer countAll = 0; @@ -606,33 +646,33 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // ToDo - use DataAccess methods? //String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); //datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); + + logger.fine("json: " + JsonUtil.prettyPrint(jsonData)); - Thread.sleep(5000); - - JsonObject jsonObject = null; - try (StringReader rdr = new StringReader(jsonData)) { - jsonObject = Json.createReader(rdr).readObject(); - } catch (Exception jpe) { - jpe.printStackTrace(); - logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}"); - } - logger.info("json: " + JsonUtil.prettyPrint(jsonObject)); - - String taskIdentifier = jsonObject.getString("taskIdentifier"); + String taskIdentifier = jsonData.getString("taskIdentifier"); - String ruleId = ""; - try { - ruleId = jsonObject.getString("ruleId"); - } catch (NullPointerException npe) { - logger.warning("NPE for jsonData object"); - } + String ruleId = null; + Thread.sleep(5000); + // globus task status check GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - if (ruleId.length() > 0) { - deletePermission(ruleId, globusLogger); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + + ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + if(ruleId!=null) { + Long datasetId = rulesCache.getIfPresent(ruleId); + if(datasetId!=null) { + + //Will delete rule + rulesCache.invalidate(ruleId); + } else { + //The cache already expired this rule, in which case it's delay not long enough, or we have some other problem + logger.warning("Rule " + ruleId + " not found in rulesCache"); + deletePermission(ruleId, dataset, globusLogger); + } } // If success, switch to an EditInProgress lock - do this before removing the @@ -674,7 +714,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // List inputList = new ArrayList(); - JsonArray filesJsonArray = jsonObject.getJsonArray("files"); + JsonArray filesJsonArray = jsonData.getJsonArray("files"); if (filesJsonArray != null) { String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); @@ -905,7 +945,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro String taskStatus = getTaskStatus(task); if (ruleId.length() > 0) { - deletePermission(ruleId, globusLogger); + deletePermission(ruleId, dataset, globusLogger); } if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index b4807372b69..f8abe505dca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -48,6 +48,7 @@ public enum JvmSettings { SCOPE_FILES(PREFIX, "files"), FILES_DIRECTORY(SCOPE_FILES, "directory"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), + GLOBUS_RULES_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"), FILES(SCOPE_FILES), BASE_URL(FILES, "base-url"), GLOBUS_TOKEN(FILES, "globus-token"), From 30395309689949a3fc633e3be5fa4c30cc1f27cd Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 16 Oct 2023 16:33:02 -0400 Subject: [PATCH 061/546] check driver type not id --- .../java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index f4cc7d40120..3bc83538679 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -606,7 +606,7 @@ public static String getDriverPrefix(String driverId) { } public static boolean isDirectUploadEnabled(String driverId) { - return (DataAccess.S3.equals(driverId) && Boolean.parseBoolean(System.getProperty("dataverse.files." + DataAccess.S3 + ".upload-redirect"))) || + return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) || Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band")); } From 48144a24cb200e285b5419ab29865293eac17e54 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 16 Oct 2023 17:00:51 -0400 Subject: [PATCH 062/546] adding extra logic to skip things like facets and highlights in searches, unless specifically requested. (#9635) --- .../search/SearchIncludeFragment.java | 91 ++++-- .../dataverse/search/SearchServiceBean.java | 308 +++++++++++------- 2 files changed, 249 insertions(+), 150 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 2ce06541afa..1e42958fe4e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -120,7 +120,6 @@ public class SearchIncludeFragment implements java.io.Serializable { private Long facetCountDatasets = 0L; private Long facetCountFiles = 0L; Map previewCountbyType = new HashMap<>(); - private SolrQueryResponse solrQueryResponseAllTypes; private String sortField; private SortOrder sortOrder; private String currentSort; @@ -132,6 +131,7 @@ public class SearchIncludeFragment implements java.io.Serializable { Map datasetfieldFriendlyNamesBySolrField = new HashMap<>(); Map staticSolrFieldFriendlyNamesBySolrField = new HashMap<>(); private boolean solrIsDown = false; + private boolean solrIsOverloaded = false; private Map numberOfFacets = new HashMap<>(); // private boolean showUnpublished; List filterQueriesDebug = new ArrayList<>(); @@ -279,6 +279,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused SolrQueryResponse solrQueryResponse = null; + SolrQueryResponse solrQueryResponseSecondPass = null; List filterQueriesFinal = new ArrayList<>(); @@ -311,18 +312,11 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused String[] parts = selectedTypesString.split(":"); selectedTypesList.addAll(Arrays.asList(parts)); - List filterQueriesFinalAllTypes = new ArrayList<>(); - String[] arr = selectedTypesList.toArray(new String[selectedTypesList.size()]); - selectedTypesHumanReadable = combine(arr, " OR "); - if (!selectedTypesHumanReadable.isEmpty()) { - typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")"; - } + filterQueriesFinal.addAll(filterQueries); - filterQueriesFinalAllTypes.addAll(filterQueriesFinal); - String allTypesFilterQuery = SearchFields.TYPE + ":(dataverses OR datasets OR files)"; - filterQueriesFinalAllTypes.add(allTypesFilterQuery); + filterQueriesFinal.add(typeFilterQuery); if (page <= 1) { @@ -363,10 +357,60 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. // (why exactly do we need it, again?) // To get the counts we display in the types facets particulary for unselected types - SEK 08/25/2021 - solrQueryResponseAllTypes = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalAllTypes, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); - if (solrQueryResponse.hasError()){ - logger.info(solrQueryResponse.getError()); - setSolrErrorEncountered(true); + // Sure, but we should not waste resources here. We will try to save + // solr some extra work and a) only run this second query IF there is + // one or more unselected type facets; and b) drop all the extra + // parameters from this second query - such as facets and highlights - + // that we do not actually need for the purposes of finding these + // extra numbers. -- L.A. 10/16/2023 + + // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560 + previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), -1L); + previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), -1L); + previewCountbyType.put(BundleUtil.getStringFromBundle("files"), -1L); + + + // This will populate the type facet counts for the types that are + // currently selected on the collection page: + for (FacetCategory facetCategory : solrQueryResponse.getTypeFacetCategories()) { + for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); + } + } + + if (selectedTypesList.size() < 3) { + // If some types are NOT currently selected, we will need to + // run another query to obtain the numbers of the unselected types: + + List filterQueriesFinalSecondPass = new ArrayList<>(); + filterQueriesFinalSecondPass.addAll(filterQueriesFinal); + + List selectedTypesListSecondPass = new ArrayList<>(); + + for (String dvObjectType : previewCountbyType.keySet()) { + if (previewCountbyType.get(dvObjectType) == -1) { + selectedTypesListSecondPass.add(dvObjectType); + } + } + + String[] arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]); + filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); + + if (solrQueryResponseSecondPass != null) { + + solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); + if (solrQueryResponseSecondPass.hasError()) { + logger.info(solrQueryResponse.getError()); + setSolrErrorEncountered(true); + } + + // And now populate the remaining type facets: + for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) { + for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); + } + } + } } } catch (SearchException ex) { @@ -446,17 +490,6 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } - // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560 - previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), 0L); - previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), 0L); - previewCountbyType.put(BundleUtil.getStringFromBundle("files"), 0L); - if (solrQueryResponseAllTypes != null) { - for (FacetCategory facetCategory : solrQueryResponseAllTypes.getTypeFacetCategories()) { - for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { - previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); - } - } - } setDisplayCardValues(); @@ -1020,6 +1053,14 @@ public boolean isSolrIsDown() { public void setSolrIsDown(boolean solrIsDown) { this.solrIsDown = solrIsDown; } + + public boolean isSolrOverloaded() { + return solrIsOverloaded; + } + + public void setSolrIsOverloaded(boolean solrIsOverloaded) { + this.solrIsOverloaded = solrIsOverloaded; + } public boolean isRootDv() { return rootDv; diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 44976d232c2..aa2948eb8cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -101,7 +101,7 @@ public class SearchServiceBean { public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, String query, List filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException { return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null); } - + /** * Import note: "onlyDatatRelatedToMe" relies on filterQueries for providing * access to Private Data for the correct user @@ -122,6 +122,41 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, + String query, + List filterQueries, + String sortField, + String sortOrder, + int paginationStart, + boolean onlyDatatRelatedToMe, + int numResultsPerPage, + boolean retrieveEntities, + String geoPoint, + String geoRadius) throws SearchException { + return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null, true, true); + } + + /** + * @param dataverseRequest + * @param dataverses + * @param query + * @param filterQueries + * @param sortField + * @param sortOrder + * @param paginationStart + * @param onlyDatatRelatedToMe + * @param numResultsPerPage + * @param retrieveEntities - look up dvobject entities with .find() (potentially expensive!) + * @param geoPoint e.g. "35,15" + * @param geoRadius e.g. "5" + * @param addFacets boolean + * @param addHighlights boolean * @return * @throws SearchException */ @@ -136,7 +171,9 @@ public SolrQueryResponse search( int numResultsPerPage, boolean retrieveEntities, String geoPoint, - String geoRadius + String geoRadius, + boolean addFacets, + boolean addHighlights ) throws SearchException { if (paginationStart < 0) { @@ -157,56 +194,62 @@ public SolrQueryResponse search( // solrQuery.setSort(sortClause); // } // solrQuery.setSort(sortClause); - solrQuery.setHighlight(true).setHighlightSnippets(1); - Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); - if (fragSize != null) { - solrQuery.setHighlightFragsize(fragSize); - } - solrQuery.setHighlightSimplePre(""); - solrQuery.setHighlightSimplePost(""); + + List datasetFields = datasetFieldService.findAllOrderedById(); Map solrFieldsToHightlightOnMap = new HashMap<>(); - // TODO: Do not hard code "Name" etc as English here. - solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); - solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); - solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); - solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); - - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); - /** - * @todo Dataverse subject and affiliation should be highlighted but - * this is commented out right now because the "friendly" names are not - * being shown on the dataverse cards. See also - * https://github.com/IQSS/dataverse/issues/1431 - */ + if (addHighlights) { + solrQuery.setHighlight(true).setHighlightSnippets(1); + Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); + if (fragSize != null) { + solrQuery.setHighlightFragsize(fragSize); + } + solrQuery.setHighlightSimplePre(""); + solrQuery.setHighlightSimplePost(""); + + // TODO: Do not hard code "Name" etc as English here. + solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); + solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); + solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); + solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); + + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); + /** + * @todo Dataverse subject and affiliation should be highlighted but + * this is commented out right now because the "friendly" names are + * not being shown on the dataverse cards. See also + * https://github.com/IQSS/dataverse/issues/1431 + */ // solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); // solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); - /** - * @todo: show highlight on file card? - * https://redmine.hmdc.harvard.edu/issues/3848 - */ - solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); - List datasetFields = datasetFieldService.findAllOrderedById(); - for (DatasetFieldType datasetFieldType : datasetFields) { - String solrField = datasetFieldType.getSolrField().getNameSearchable(); - String displayName = datasetFieldType.getDisplayName(); - solrFieldsToHightlightOnMap.put(solrField, displayName); - } - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String solrField = entry.getKey(); - // String displayName = entry.getValue(); - solrQuery.addHighlightField(solrField); + /** + * @todo: show highlight on file card? + * https://redmine.hmdc.harvard.edu/issues/3848 + */ + solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); + + for (DatasetFieldType datasetFieldType : datasetFields) { + String solrField = datasetFieldType.getSolrField().getNameSearchable(); + String displayName = datasetFieldType.getDisplayName(); + solrFieldsToHightlightOnMap.put(solrField, displayName); + } + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String solrField = entry.getKey(); + // String displayName = entry.getValue(); + solrQuery.addHighlightField(solrField); + } } + solrQuery.setParam("fl", "*,score"); solrQuery.setParam("qt", "/select"); solrQuery.setParam("facet", "true"); @@ -214,6 +257,8 @@ public SolrQueryResponse search( * @todo: do we need facet.query? */ solrQuery.setParam("facet.query", "*"); + solrQuery.addFacetField(SearchFields.TYPE); // this one is always performed + for (String filterQuery : filterQueries) { solrQuery.addFilterQuery(filterQuery); } @@ -223,70 +268,73 @@ public SolrQueryResponse search( // See https://solr.apache.org/guide/8_11/spatial-search.html#bbox solrQuery.addFilterQuery("{!bbox sfield=" + SearchFields.GEOLOCATION + "}"); } + + List metadataBlockFacets = new LinkedList<>(); - // ----------------------------------- - // Facets to Retrieve - // ----------------------------------- - solrQuery.addFacetField(SearchFields.METADATA_TYPES); -// solrQuery.addFacetField(SearchFields.HOST_DATAVERSE); -// solrQuery.addFacetField(SearchFields.AUTHOR_STRING); - solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); - solrQuery.addFacetField(SearchFields.METADATA_SOURCE); -// solrQuery.addFacetField(SearchFields.AFFILIATION); - solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR); -// solrQuery.addFacetField(SearchFields.CATEGORY); -// solrQuery.addFacetField(SearchFields.FILE_TYPE_MIME); -// solrQuery.addFacetField(SearchFields.DISTRIBUTOR); -// solrQuery.addFacetField(SearchFields.KEYWORD); - /** - * @todo when a new method on datasetFieldService is available - * (retrieveFacetsByDataverse?) only show the facets that the dataverse - * in question wants to show (and in the right order): - * https://redmine.hmdc.harvard.edu/issues/3490 - * - * also, findAll only returns advancedSearchField = true... we should - * probably introduce the "isFacetable" boolean rather than caring about - * if advancedSearchField is true or false - * - */ + if (addFacets) { + // ----------------------------------- + // Facets to Retrieve + // ----------------------------------- + solrQuery.addFacetField(SearchFields.METADATA_TYPES); + solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); + solrQuery.addFacetField(SearchFields.METADATA_SOURCE); + solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR); + /** + * @todo when a new method on datasetFieldService is available + * (retrieveFacetsByDataverse?) only show the facets that the + * dataverse in question wants to show (and in the right order): + * https://redmine.hmdc.harvard.edu/issues/3490 + * + * also, findAll only returns advancedSearchField = true... we + * should probably introduce the "isFacetable" boolean rather than + * caring about if advancedSearchField is true or false + * + */ - List metadataBlockFacets = new LinkedList<>(); + if (dataverses != null) { + for (Dataverse dataverse : dataverses) { + if (dataverse != null) { + for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { + DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); + solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); + } + // Get all metadata block facets configured to be displayed + metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets()); + } + } + } + + solrQuery.addFacetField(SearchFields.FILE_TYPE); + /** + * @todo: hide the extra line this shows in the GUI... at least it's + * last... + */ + solrQuery.addFacetField(SearchFields.FILE_TAG); + if (!systemConfig.isPublicInstall()) { + solrQuery.addFacetField(SearchFields.ACCESS); + } + } + + //I'm not sure if just adding null here is good for hte permissions system... i think it needs something if(dataverses != null) { for(Dataverse dataverse : dataverses) { // ----------------------------------- // PERMISSION FILTER QUERY // ----------------------------------- - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe); + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe, addFacets); if (permissionFilterQuery != null) { solrQuery.addFilterQuery(permissionFilterQuery); } - if (dataverse != null) { - for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { - DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); - solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); - } - // Get all metadata block facets configured to be displayed - metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets()); - } } } else { - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe); + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe, addFacets); if (permissionFilterQuery != null) { solrQuery.addFilterQuery(permissionFilterQuery); } } - solrQuery.addFacetField(SearchFields.FILE_TYPE); - /** - * @todo: hide the extra line this shows in the GUI... at least it's - * last... - */ - solrQuery.addFacetField(SearchFields.TYPE); - solrQuery.addFacetField(SearchFields.FILE_TAG); - if (!systemConfig.isPublicInstall()) { - solrQuery.addFacetField(SearchFields.ACCESS); - } + /** * @todo: do sanity checking... throw error if negative */ @@ -416,34 +464,44 @@ public SolrQueryResponse search( Boolean datasetValid = (Boolean) solrDocument.getFieldValue(SearchFields.DATASET_VALID); List matchedFields = new ArrayList<>(); - List highlights = new ArrayList<>(); - Map highlightsMap = new HashMap<>(); - Map> highlightsMap2 = new HashMap<>(); - Map highlightsMap3 = new HashMap<>(); - if (queryResponse.getHighlighting().get(id) != null) { - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String field = entry.getKey(); - String displayName = entry.getValue(); - - List highlightSnippets = queryResponse.getHighlighting().get(id).get(field); - if (highlightSnippets != null) { - matchedFields.add(field); - /** - * @todo only SolrField.SolrType.STRING? that's not - * right... knit the SolrField object more into the - * highlighting stuff - */ - SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); - Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); - highlights.add(highlight); - highlightsMap.put(solrField, highlight); - highlightsMap2.put(solrField, highlightSnippets); - highlightsMap3.put(field, highlight); + + SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); + + if (addHighlights) { + List highlights = new ArrayList<>(); + Map highlightsMap = new HashMap<>(); + Map> highlightsMap2 = new HashMap<>(); + Map highlightsMap3 = new HashMap<>(); + if (queryResponse.getHighlighting().get(id) != null) { + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String field = entry.getKey(); + String displayName = entry.getValue(); + + List highlightSnippets = queryResponse.getHighlighting().get(id).get(field); + if (highlightSnippets != null) { + matchedFields.add(field); + /** + * @todo only SolrField.SolrType.STRING? that's not + * right... knit the SolrField object more into the + * highlighting stuff + */ + SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); + Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); + highlights.add(highlight); + highlightsMap.put(solrField, highlight); + highlightsMap2.put(solrField, highlightSnippets); + highlightsMap3.put(field, highlight); + } } + } + solrSearchResult.setHighlightsAsList(highlights); + solrSearchResult.setHighlightsMap(highlightsMap); + solrSearchResult.setHighlightsAsMap(highlightsMap3); } - SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); + + /** * @todo put all this in the constructor? */ @@ -470,9 +528,7 @@ public SolrQueryResponse search( solrSearchResult.setNameSort(nameSort); solrSearchResult.setReleaseOrCreateDate(release_or_create_date); solrSearchResult.setMatchedFields(matchedFields); - solrSearchResult.setHighlightsAsList(highlights); - solrSearchResult.setHighlightsMap(highlightsMap); - solrSearchResult.setHighlightsAsMap(highlightsMap3); + Map parent = new HashMap<>(); String description = (String) solrDocument.getFieldValue(SearchFields.DESCRIPTION); solrSearchResult.setDescriptionNoSnippet(description); @@ -863,7 +919,7 @@ public String getCapitalizedName(String name) { * * @return */ - private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe) { + private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe, boolean addFacets) { User user = dataverseRequest.getUser(); if (user == null) { @@ -922,9 +978,11 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ AuthenticatedUser au = (AuthenticatedUser) user; - // Logged in user, has publication status facet - // - solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); + if (addFacets) { + // Logged in user, has publication status facet + // + solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); + } // ---------------------------------------------------- // (3) Is this a Super User? From 6307292d3858bd62144e313de1b5574b55b4fb36 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 17 Oct 2023 13:07:14 -0400 Subject: [PATCH 063/546] more fixes/cleanup #9635 --- .../search/SearchIncludeFragment.java | 27 ++-- .../dataverse/search/SearchServiceBean.java | 127 ++++++++++-------- 2 files changed, 90 insertions(+), 64 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 1e42958fe4e..958ac0151c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -308,15 +308,23 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused this.setRootDv(true); } + filterQueriesFinal.addAll(filterQueries); + + /** + * Add type queries, for the types (Dataverses, Datasets, Datafiles) + * currently selected: + */ selectedTypesList = new ArrayList<>(); String[] parts = selectedTypesString.split(":"); selectedTypesList.addAll(Arrays.asList(parts)); - - - filterQueriesFinal.addAll(filterQueries); - + logger.info("selected types list size: "+selectedTypesList.size()); + String[] arr = selectedTypesList.toArray(new String[selectedTypesList.size()]); + selectedTypesHumanReadable = combine(arr, " OR "); + if (!selectedTypesHumanReadable.isEmpty()) { + typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")"; + } filterQueriesFinal.add(typeFilterQuery); if (page <= 1) { @@ -383,7 +391,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // run another query to obtain the numbers of the unselected types: List filterQueriesFinalSecondPass = new ArrayList<>(); - filterQueriesFinalSecondPass.addAll(filterQueriesFinal); + filterQueriesFinalSecondPass.addAll(filterQueries); List selectedTypesListSecondPass = new ArrayList<>(); @@ -393,12 +401,13 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } - String[] arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]); + arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]); filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); - + + solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null, false, false); + if (solrQueryResponseSecondPass != null) { - solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); if (solrQueryResponseSecondPass.hasError()) { logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); @@ -410,6 +419,8 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); } } + } else { + logger.warning("null solr response from the second pass type query"); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index aa2948eb8cb..d3ff7e42d15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -195,64 +195,11 @@ public SolrQueryResponse search( // } // solrQuery.setSort(sortClause); - List datasetFields = datasetFieldService.findAllOrderedById(); - Map solrFieldsToHightlightOnMap = new HashMap<>(); - if (addHighlights) { - solrQuery.setHighlight(true).setHighlightSnippets(1); - Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); - if (fragSize != null) { - solrQuery.setHighlightFragsize(fragSize); - } - solrQuery.setHighlightSimplePre(""); - solrQuery.setHighlightSimplePost(""); - - // TODO: Do not hard code "Name" etc as English here. - solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); - solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); - solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); - solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); - - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); - /** - * @todo Dataverse subject and affiliation should be highlighted but - * this is commented out right now because the "friendly" names are - * not being shown on the dataverse cards. See also - * https://github.com/IQSS/dataverse/issues/1431 - */ -// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); -// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); - /** - * @todo: show highlight on file card? - * https://redmine.hmdc.harvard.edu/issues/3848 - */ - solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); - - for (DatasetFieldType datasetFieldType : datasetFields) { - String solrField = datasetFieldType.getSolrField().getNameSearchable(); - String displayName = datasetFieldType.getDisplayName(); - solrFieldsToHightlightOnMap.put(solrField, displayName); - } - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String solrField = entry.getKey(); - // String displayName = entry.getValue(); - solrQuery.addHighlightField(solrField); - } - } solrQuery.setParam("fl", "*,score"); solrQuery.setParam("qt", "/select"); solrQuery.setParam("facet", "true"); + /** * @todo: do we need facet.query? */ @@ -315,7 +262,61 @@ public SolrQueryResponse search( } } - + List datasetFields = datasetFieldService.findAllOrderedById(); + Map solrFieldsToHightlightOnMap = new HashMap<>(); + if (addHighlights) { + solrQuery.setHighlight(true).setHighlightSnippets(1); + Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); + if (fragSize != null) { + solrQuery.setHighlightFragsize(fragSize); + } + solrQuery.setHighlightSimplePre(""); + solrQuery.setHighlightSimplePost(""); + + // TODO: Do not hard code "Name" etc as English here. + solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); + solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); + solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); + solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); + + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); + /** + * @todo Dataverse subject and affiliation should be highlighted but + * this is commented out right now because the "friendly" names are + * not being shown on the dataverse cards. See also + * https://github.com/IQSS/dataverse/issues/1431 + */ +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); + /** + * @todo: show highlight on file card? + * https://redmine.hmdc.harvard.edu/issues/3848 + */ + solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); + + for (DatasetFieldType datasetFieldType : datasetFields) { + String solrField = datasetFieldType.getSolrField().getNameSearchable(); + String displayName = datasetFieldType.getDisplayName(); + solrFieldsToHightlightOnMap.put(solrField, displayName); + } + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String solrField = entry.getKey(); + // String displayName = entry.getValue(); + solrQuery.addHighlightField(solrField); + } + } + //I'm not sure if just adding null here is good for hte permissions system... i think it needs something if(dataverses != null) { for(Dataverse dataverse : dataverses) { @@ -370,7 +371,7 @@ public SolrQueryResponse search( // solrQuery.addNumericRangeFacet(SearchFields.PRODUCTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan); // solrQuery.addNumericRangeFacet(SearchFields.DISTRIBUTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan); solrQuery.setRows(numResultsPerPage); - logger.fine("Solr query:" + solrQuery); + logger.info("Solr query:" + solrQuery); // ----------------------------------- // Make the solr query @@ -378,8 +379,12 @@ public SolrQueryResponse search( QueryResponse queryResponse = null; try { queryResponse = solrClientService.getSolrClient().query(solrQuery); + } catch (RemoteSolrException ex) { String messageFromSolr = ex.getLocalizedMessage(); + + logger.info("message from solr exception: "+messageFromSolr); + String error = "Search Syntax Error: "; String stringToHide = "org.apache.solr.search.SyntaxError: "; if (messageFromSolr.startsWith(stringToHide)) { @@ -393,6 +398,12 @@ public SolrQueryResponse search( exceptionSolrQueryResponse.setError(error); // we can't show anything because of the search syntax error + + // We probably shouldn't be assuming that this is necessarily a + // "search syntax error" - could be anything else too - ? + + + long zeroNumResultsFound = 0; long zeroGetResultsStart = 0; List emptySolrSearchResults = new ArrayList<>(); @@ -408,6 +419,10 @@ public SolrQueryResponse search( } catch (SolrServerException | IOException ex) { throw new SearchException("Internal Dataverse Search Engine Error", ex); } + + int statusCode = queryResponse.getStatus(); + + logger.info("status code of the query response: "+statusCode); SolrDocumentList docs = queryResponse.getResults(); List solrSearchResults = new ArrayList<>(); From 74eb7c551d209c9e460cbaea5572004b0fcad0bc Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 17 Oct 2023 16:09:32 -0400 Subject: [PATCH 064/546] more fixes (#9635) --- .../search/SearchIncludeFragment.java | 24 +++++++++++++++---- .../dataverse/search/SearchServiceBean.java | 2 ++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 958ac0151c6..177186fce49 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -395,9 +395,23 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused List selectedTypesListSecondPass = new ArrayList<>(); - for (String dvObjectType : previewCountbyType.keySet()) { - if (previewCountbyType.get(dvObjectType) == -1) { - selectedTypesListSecondPass.add(dvObjectType); + // @todo: simplify this! + for (String dvObjectTypeLabel : previewCountbyType.keySet()) { + if (previewCountbyType.get(dvObjectTypeLabel) == -1) { + String dvObjectType = null; + + if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("dataverses"))) { + dvObjectType = "dataverses"; + } else if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("datasets"))) { + dvObjectType = "datasets"; + } else if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("files"))) { + dvObjectType = "files"; + } + + if (dvObjectType != null) { + logger.info("adding object type to the second pass query: "+dvObjectType); + selectedTypesListSecondPass.add(dvObjectType); + } } } @@ -409,13 +423,15 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused if (solrQueryResponseSecondPass != null) { if (solrQueryResponseSecondPass.hasError()) { - logger.info(solrQueryResponse.getError()); + logger.info(solrQueryResponseSecondPass.getError()); setSolrErrorEncountered(true); } // And now populate the remaining type facets: for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) { + logger.info("facet category: "+facetCategory.getName()); for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + logger.info("facet label: "+facetLabel.getName()); previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index d3ff7e42d15..18cdbaa6994 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -423,6 +423,7 @@ public SolrQueryResponse search( int statusCode = queryResponse.getStatus(); logger.info("status code of the query response: "+statusCode); + ///logger.info("number of hits: "+queryResponse._size()); SolrDocumentList docs = queryResponse.getResults(); List solrSearchResults = new ArrayList<>(); @@ -823,6 +824,7 @@ public SolrQueryResponse search( facetCategory.setFacetLabel(facetLabelList); if (!facetLabelList.isEmpty()) { if (facetCategory.getName().equals(SearchFields.TYPE)) { + logger.info("type facet encountered"); // the "type" facet is special, these are not typeFacetCategories.add(facetCategory); } else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) { From f1e37ae0ff01e1fe0030202be1883f823bb8d080 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 17 Oct 2023 17:26:54 -0400 Subject: [PATCH 065/546] finally working as it should; much simplified/way less expensive second pass query sent in order to populate the unchecked type count facets. (#9635) --- .../iq/dataverse/search/SearchIncludeFragment.java | 4 +--- .../iq/dataverse/search/SearchServiceBean.java | 13 +++++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 177186fce49..47a5621c3d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -418,7 +418,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]); filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); - solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null, false, false); + solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false); if (solrQueryResponseSecondPass != null) { @@ -429,9 +429,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // And now populate the remaining type facets: for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) { - logger.info("facet category: "+facetCategory.getName()); for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { - logger.info("facet label: "+facetLabel.getName()); previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 18cdbaa6994..be3330080c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -165,7 +165,8 @@ public SolrQueryResponse search( List dataverses, String query, List filterQueries, - String sortField, String sortOrder, + String sortField, + String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage, @@ -189,7 +190,11 @@ public SolrQueryResponse search( // SortClause foo = new SortClause("name", SolrQuery.ORDER.desc); // if (query.equals("*") || query.equals("*:*")) { // solrQuery.setSort(new SortClause(SearchFields.NAME_SORT, SolrQuery.ORDER.asc)); - solrQuery.setSort(new SortClause(sortField, sortOrder)); + if (sortField != null) { + // is it ok not to specify any sort? - there are cases where we + // don't care, and it must cost some extra cycles -- L.A. + solrQuery.setSort(new SortClause(sortField, sortOrder)); + } // } else { // solrQuery.setSort(sortClause); // } @@ -423,7 +428,8 @@ public SolrQueryResponse search( int statusCode = queryResponse.getStatus(); logger.info("status code of the query response: "+statusCode); - ///logger.info("number of hits: "+queryResponse._size()); + logger.info("_size from query response: "+queryResponse._size()); + logger.info("qtime: "+queryResponse.getQTime()); SolrDocumentList docs = queryResponse.getResults(); List solrSearchResults = new ArrayList<>(); @@ -824,7 +830,6 @@ public SolrQueryResponse search( facetCategory.setFacetLabel(facetLabelList); if (!facetLabelList.isEmpty()) { if (facetCategory.getName().equals(SearchFields.TYPE)) { - logger.info("type facet encountered"); // the "type" facet is special, these are not typeFacetCategories.add(facetCategory); } else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) { From c1a19299e547fbc47322dafde74bc75d2e138d9c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 18 Oct 2023 13:48:47 -0400 Subject: [PATCH 066/546] a stub for interecepting a "circuit breaker" 503 from the server (#9635) --- .../dataverse/search/SearchServiceBean.java | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index be3330080c4..1b92c2a4a46 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -382,13 +382,35 @@ public SolrQueryResponse search( // Make the solr query // ----------------------------------- QueryResponse queryResponse = null; + boolean solrTemporarilyUnavailable = false; + try { queryResponse = solrClientService.getSolrClient().query(solrQuery); } catch (RemoteSolrException ex) { String messageFromSolr = ex.getLocalizedMessage(); - logger.info("message from solr exception: "+messageFromSolr); + logger.info("message from tye solr exception: "+messageFromSolr); + logger.info("code from the solr exception: "+ex.code()); + + if (queryResponse != null) { + logger.info("return code: "+queryResponse.getStatus()); + } + + // We probably shouldn't be assuming that this is necessarily a + // "search syntax error", as the code below implies - could be + // something else too - ? + + // Specifically, we now rely on the Solr "circuit breaker" mechanism + // to start dropping requests with 503, when the service is + // overwhelmed with requests load (with the assumption that this is + // a transient condition): + + if (ex.code() == 503) { + solrTemporarilyUnavailable = true; + // actual logic for communicating this state back to the local + // client code TBD (@todo) + } String error = "Search Syntax Error: "; String stringToHide = "org.apache.solr.search.SyntaxError: "; @@ -403,12 +425,7 @@ public SolrQueryResponse search( exceptionSolrQueryResponse.setError(error); // we can't show anything because of the search syntax error - - // We probably shouldn't be assuming that this is necessarily a - // "search syntax error" - could be anything else too - ? - - - + long zeroNumResultsFound = 0; long zeroGetResultsStart = 0; List emptySolrSearchResults = new ArrayList<>(); From ecbb020ed7da390c378fb76f08c9c5fb72677189 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:18:12 -0400 Subject: [PATCH 067/546] add/standardize retrieveSizeFromMedia call --- .../iq/dataverse/dataaccess/FileAccessIO.java | 33 +- .../dataverse/dataaccess/InputStreamIO.java | 5 + .../dataaccess/RemoteOverlayAccessIO.java | 14 +- .../iq/dataverse/dataaccess/S3AccessIO.java | 21 +- .../iq/dataverse/dataaccess/StorageIO.java | 379 +++++++++--------- .../dataverse/dataaccess/SwiftAccessIO.java | 5 + 6 files changed, 241 insertions(+), 216 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d95df1567bd..3e6c802c526 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -35,8 +35,6 @@ import java.util.List; import java.util.function.Predicate; import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; // Dataverse imports: @@ -115,7 +113,7 @@ public void open (DataAccessOption... options) throws IOException { this.setInputStream(fin); setChannel(fin.getChannel()); - this.setSize(getLocalFileSize()); + this.setSize(retrieveSizeFromMedia()); if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") @@ -506,21 +504,6 @@ public void delete() throws IOException { // Auxilary helper methods, filesystem access-specific: - private long getLocalFileSize () { - long fileSize = -1; - - try { - File testFile = getFileSystemPath().toFile(); - if (testFile != null) { - fileSize = testFile.length(); - } - return fileSize; - } catch (IOException ex) { - return -1; - } - - } - public FileInputStream openLocalFileAsInputStream () { FileInputStream in; @@ -742,4 +725,18 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE return toDelete; } + @Override + public long retrieveSizeFromMedia() { + long fileSize = -1; + try { + File testFile = getFileSystemPath().toFile(); + if (testFile != null) { + fileSize = testFile.length(); + } + return fileSize; + } catch (IOException ex) { + return -1; + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index be6f9df0254..de392b74cca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -165,4 +165,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); } + @Override + public long retrieveSizeFromMedia() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index a9653f2ab68..9c1f5ba23aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -64,8 +64,6 @@ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); - - String globusAccessToken = null; protected StorageIO baseStore = null; protected String path = null; @@ -155,7 +153,7 @@ public void open(DataAccessOption... options) throws IOException { this.setSize(dataFile.getFilesize()); } else { logger.fine("Setting size"); - this.setSize(retrieveSize()); + this.setSize(retrieveSizeFromMedia()); } if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { @@ -183,7 +181,8 @@ public void open(DataAccessOption... options) throws IOException { } } - long retrieveSize() { + @Override + public long retrieveSizeFromMedia() { long size = -1; HttpHead head = new HttpHead(baseUrl + "/" + path); try { @@ -383,7 +382,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { @Override public boolean exists() { logger.fine("Exists called"); - return (retrieveSize() != -1); + return (retrieveSizeFromMedia() != -1); } @Override @@ -502,8 +501,9 @@ protected void configureStores(DataAccessRequest req, String driverId, String st if (index > 0) { storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); } - // THe base store needs the baseStoreIdentifier and not the relative URL - fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); + // The base store needs the baseStoreIdentifier and not the relative URL (if it exists) + int endOfId = storageLocation.indexOf("//"); + fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation; switch (baseDriverType) { case DataAccess.S3: diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 822ada0b83e..b0f9f0ffb05 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -207,14 +207,7 @@ public void open(DataAccessOption... options) throws IOException { if (isReadAccess) { - key = getMainFileKey(); - ObjectMetadata objectMetadata = null; - try { - objectMetadata = s3.getObjectMetadata(bucketName, key); - } catch (SdkClientException sce) { - throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); - } - this.setSize(objectMetadata.getContentLength()); + this.setSize(retrieveSizeFromMedia()); if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") @@ -1385,4 +1378,16 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE } return toDelete; } + + @Override + public long retrieveSizeFromMedia() throws IOException { + key = getMainFileKey(); + ObjectMetadata objectMetadata = null; + try { + objectMetadata = s3.getObjectMetadata(bucketName, key); + } catch (SdkClientException sce) { + throw new IOException("Cannot get S3 object " + key + " (" + sce.getMessage() + ")"); + } + return objectMetadata.getContentLength(); + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 3bc83538679..f3c2ef5f513 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -20,7 +20,6 @@ package edu.harvard.iq.dataverse.dataaccess; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; @@ -43,7 +42,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; - /** * * @author Leonid Andreev @@ -55,15 +53,15 @@ public abstract class StorageIO { public StorageIO() { } - + public StorageIO(String storageLocation, String driverId) { - this.driverId=driverId; + this.driverId = driverId; } public StorageIO(T dvObject, DataAccessRequest req, String driverId) { this.dvObject = dvObject; this.req = req; - this.driverId=driverId; + this.driverId = driverId; if (this.req == null) { this.req = new DataAccessRequest(); } @@ -72,18 +70,19 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) { } } - - // Abstract methods to be implemented by the storage drivers: public abstract void open(DataAccessOption... option) throws IOException; protected boolean isReadAccess = false; protected boolean isWriteAccess = false; - //A public store is one in which files may be accessible outside Dataverse and therefore accessible without regard to Dataverse's access controls related to restriction and embargoes. - //Currently, this is just used to warn users at upload time rather than disable restriction/embargo. + // A public store is one in which files may be accessible outside Dataverse and + // therefore accessible without regard to Dataverse's access controls related to + // restriction and embargoes. + // Currently, this is just used to warn users at upload time rather than disable + // restriction/embargo. static protected Map driverPublicAccessMap = new HashMap(); - + public boolean canRead() { return isReadAccess; } @@ -94,115 +93,118 @@ public boolean canWrite() { public abstract String getStorageLocation() throws IOException; - // This method will return a Path, if the storage method is a - // local filesystem. Otherwise should throw an IOException. + // This method will return a Path, if the storage method is a + // local filesystem. Otherwise should throw an IOException. public abstract Path getFileSystemPath() throws IOException; - - public abstract boolean exists() throws IOException; - + + public abstract boolean exists() throws IOException; + public abstract void delete() throws IOException; - + // this method for copies a local Path (for ex., a // temp file, into this DataAccess location): public abstract void savePath(Path fileSystemPath) throws IOException; - + // same, for an InputStream: /** - * This method copies a local InputStream into this DataAccess location. - * Note that the S3 driver implementation of this abstract method is problematic, - * because S3 cannot save an object of an unknown length. This effectively - * nullifies any benefits of streaming; as we cannot start saving until we - * have read the entire stream. - * One way of solving this would be to buffer the entire stream as byte[], - * in memory, then save it... Which of course would be limited by the amount - * of memory available, and thus would not work for streams larger than that. - * So we have eventually decided to save save the stream to a temp file, then - * save to S3. This is slower, but guaranteed to work on any size stream. - * An alternative we may want to consider is to not implement this method - * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, - * similarly to how we handle attempts to open OutputStreams, in this and the - * Swift driver. - * (Not an issue in either FileAccessIO or SwiftAccessIO implementations) + * This method copies a local InputStream into this DataAccess location. Note + * that the S3 driver implementation of this abstract method is problematic, + * because S3 cannot save an object of an unknown length. This effectively + * nullifies any benefits of streaming; as we cannot start saving until we have + * read the entire stream. One way of solving this would be to buffer the entire + * stream as byte[], in memory, then save it... Which of course would be limited + * by the amount of memory available, and thus would not work for streams larger + * than that. So we have eventually decided to save save the stream to a temp + * file, then save to S3. This is slower, but guaranteed to work on any size + * stream. An alternative we may want to consider is to not implement this + * method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. (Not an issue in either + * FileAccessIO or SwiftAccessIO implementations) * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. - */ + */ public abstract void saveInputStream(InputStream inputStream) throws IOException; + public abstract void saveInputStream(InputStream inputStream, Long filesize) throws IOException; - + // Auxiliary File Management: (new as of 4.0.2!) - + // An "auxiliary object" is an abstraction of the traditional DVN/Dataverse - // mechanism of storing extra files related to the man StudyFile/DataFile - - // such as "saved original" and cached format conversions for tabular files, - // thumbnails for images, etc. - in physical files with the same file - // name but various reserved extensions. - - //This function retrieves auxiliary files related to datasets, and returns them as inputstream - public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException ; - + // mechanism of storing extra files related to the man StudyFile/DataFile - + // such as "saved original" and cached format conversions for tabular files, + // thumbnails for images, etc. - in physical files with the same file + // name but various reserved extensions. + + // This function retrieves auxiliary files related to datasets, and returns them + // as inputstream + public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException; + public abstract Channel openAuxChannel(String auxItemTag, DataAccessOption... option) throws IOException; - - public abstract long getAuxObjectSize(String auxItemTag) throws IOException; - - public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException; - - public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException; - - public abstract void backupAsAux(String auxItemTag) throws IOException; - - public abstract void revertBackupAsAux(String auxItemTag) throws IOException; - - // this method copies a local filesystem Path into this DataAccess Auxiliary location: + + public abstract long getAuxObjectSize(String auxItemTag) throws IOException; + + public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException; + + public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException; + + public abstract void backupAsAux(String auxItemTag) throws IOException; + + public abstract void revertBackupAsAux(String auxItemTag) throws IOException; + + // this method copies a local filesystem Path into this DataAccess Auxiliary + // location: public abstract void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException; - + /** - * This method copies a local InputStream into this DataAccess Auxiliary location. - * Note that the S3 driver implementation of this abstract method is problematic, - * because S3 cannot save an object of an unknown length. This effectively - * nullifies any benefits of streaming; as we cannot start saving until we - * have read the entire stream. - * One way of solving this would be to buffer the entire stream as byte[], - * in memory, then save it... Which of course would be limited by the amount - * of memory available, and thus would not work for streams larger than that. - * So we have eventually decided to save save the stream to a temp file, then - * save to S3. This is slower, but guaranteed to work on any size stream. - * An alternative we may want to consider is to not implement this method - * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, - * similarly to how we handle attempts to open OutputStreams, in this and the - * Swift driver. - * (Not an issue in either FileAccessIO or SwiftAccessIO implementations) + * This method copies a local InputStream into this DataAccess Auxiliary + * location. Note that the S3 driver implementation of this abstract method is + * problematic, because S3 cannot save an object of an unknown length. This + * effectively nullifies any benefits of streaming; as we cannot start saving + * until we have read the entire stream. One way of solving this would be to + * buffer the entire stream as byte[], in memory, then save it... Which of + * course would be limited by the amount of memory available, and thus would not + * work for streams larger than that. So we have eventually decided to save save + * the stream to a temp file, then save to S3. This is slower, but guaranteed to + * work on any size stream. An alternative we may want to consider is to not + * implement this method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. (Not an issue in either + * FileAccessIO or SwiftAccessIO implementations) * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. - */ - public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; - public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException; - - public abstract ListlistAuxObjects() throws IOException; - - public abstract void deleteAuxObject(String auxItemTag) throws IOException; - + */ + public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; + + public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) + throws IOException; + + public abstract List listAuxObjects() throws IOException; + + public abstract void deleteAuxObject(String auxItemTag) throws IOException; + public abstract void deleteAllAuxObjects() throws IOException; private DataAccessRequest req; private InputStream in = null; - private OutputStream out; + private OutputStream out; protected Channel channel; protected DvObject dvObject; protected String driverId; - /*private int status;*/ + /* private int status; */ private long size; /** - * Where in the file to seek to when reading (default is zero bytes, the - * start of the file). + * Where in the file to seek to when reading (default is zero bytes, the start + * of the file). */ private long offset; - + private String mimeType; private String fileName; private String varHeader; @@ -215,8 +217,8 @@ public boolean canWrite() { private String swiftContainerName; private boolean isLocalFile = false; - /*private boolean isRemoteAccess = false;*/ - /*private boolean isHttpAccess = false;*/ + /* private boolean isRemoteAccess = false; */ + /* private boolean isHttpAccess = false; */ private boolean noVarHeader = false; // For remote downloads: @@ -229,13 +231,14 @@ public boolean canWrite() { private String remoteUrl; protected String remoteStoreName = null; protected URL remoteStoreUrl = null; - + // For HTTP-based downloads: - /*private GetMethod method = null; - private Header[] responseHeaders;*/ + /* + * private GetMethod method = null; private Header[] responseHeaders; + */ // getters: - + public Channel getChannel() throws IOException { return channel; } @@ -255,16 +258,15 @@ public ReadableByteChannel getReadChannel() throws IOException { return (ReadableByteChannel) channel; } - - public DvObject getDvObject() - { + + public DvObject getDvObject() { return dvObject; } - + public DataFile getDataFile() { return (DataFile) dvObject; } - + public Dataset getDataset() { return (Dataset) dvObject; } @@ -277,9 +279,9 @@ public DataAccessRequest getRequest() { return req; } - /*public int getStatus() { - return status; - }*/ + /* + * public int getStatus() { return status; } + */ public long getSize() { return size; @@ -292,9 +294,9 @@ public long getOffset() { public InputStream getInputStream() throws IOException { return in; } - + public OutputStream getOutputStream() throws IOException { - return out; + return out; } public String getMimeType() { @@ -317,23 +319,23 @@ public String getRemoteUrl() { return remoteUrl; } - public String getTemporarySwiftUrl(){ + public String getTemporarySwiftUrl() { return temporarySwiftUrl; } - + public String getTempUrlExpiry() { return tempUrlExpiry; } - + public String getTempUrlSignature() { return tempUrlSignature; } - + public String getSwiftFileName() { return swiftFileName; } - public String getSwiftContainerName(){ + public String getSwiftContainerName() { return swiftContainerName; } @@ -344,34 +346,32 @@ public String getRemoteStoreName() { public URL getRemoteStoreUrl() { return remoteStoreUrl; } - - /*public GetMethod getHTTPMethod() { - return method; - } - public Header[] getResponseHeaders() { - return responseHeaders; - }*/ + /* + * public GetMethod getHTTPMethod() { return method; } + * + * public Header[] getResponseHeaders() { return responseHeaders; } + */ public boolean isLocalFile() { return isLocalFile; } - - // "Direct Access" StorageIO is used to access a physical storage - // location not associated with any dvObject. (For example, when we - // are deleting a physical file left behind by a DataFile that's - // already been deleted from the database). + + // "Direct Access" StorageIO is used to access a physical storage + // location not associated with any dvObject. (For example, when we + // are deleting a physical file left behind by a DataFile that's + // already been deleted from the database). public boolean isDirectAccess() { - return dvObject == null; + return dvObject == null; } - /*public boolean isRemoteAccess() { - return isRemoteAccess; - }*/ + /* + * public boolean isRemoteAccess() { return isRemoteAccess; } + */ - /*public boolean isHttpAccess() { - return isHttpAccess; - }*/ + /* + * public boolean isHttpAccess() { return isHttpAccess; } + */ public boolean isDownloadSupported() { return isDownloadSupported; @@ -398,9 +398,9 @@ public void setRequest(DataAccessRequest dar) { req = dar; } - /*public void setStatus(int s) { - status = s; - }*/ + /* + * public void setStatus(int s) { status = s; } + */ public void setSize(long s) { size = s; @@ -421,11 +421,11 @@ public void setOffset(long offset) throws IOException { public void setInputStream(InputStream is) { in = is; } - + public void setOutputStream(OutputStream os) { - out = os; - } - + out = os; + } + public void setChannel(Channel c) { channel = c; } @@ -450,45 +450,46 @@ public void setRemoteUrl(String u) { remoteUrl = u; } - public void setTemporarySwiftUrl(String u){ + public void setTemporarySwiftUrl(String u) { temporarySwiftUrl = u; } - - public void setTempUrlExpiry(Long u){ + + public void setTempUrlExpiry(Long u) { tempUrlExpiry = String.valueOf(u); } - + public void setSwiftFileName(String u) { swiftFileName = u; } - - public void setTempUrlSignature(String u){ + + public void setTempUrlSignature(String u) { tempUrlSignature = u; } - public void setSwiftContainerName(String u){ + public void setSwiftContainerName(String u) { swiftContainerName = u; } - /*public void setHTTPMethod(GetMethod hm) { - method = hm; - }*/ + /* + * public void setHTTPMethod(GetMethod hm) { method = hm; } + */ - /*public void setResponseHeaders(Header[] headers) { - responseHeaders = headers; - }*/ + /* + * public void setResponseHeaders(Header[] headers) { responseHeaders = headers; + * } + */ public void setIsLocalFile(boolean f) { isLocalFile = f; } - /*public void setIsRemoteAccess(boolean r) { - isRemoteAccess = r; - }*/ + /* + * public void setIsRemoteAccess(boolean r) { isRemoteAccess = r; } + */ - /*public void setIsHttpAccess(boolean h) { - isHttpAccess = h; - }*/ + /* + * public void setIsHttpAccess(boolean h) { isHttpAccess = h; } + */ public void setIsDownloadSupported(boolean d) { isDownloadSupported = d; @@ -506,12 +507,11 @@ public void setNoVarHeader(boolean nvh) { noVarHeader = nvh; } - // connection management methods: - /*public void releaseConnection() { - if (method != null) { - method.releaseConnection(); - } - }*/ + // connection management methods: + /* + * public void releaseConnection() { if (method != null) { + * method.releaseConnection(); } } + */ public void closeInputStream() { if (in != null) { @@ -528,7 +528,7 @@ public void closeInputStream() { } } } - + public String generateVariableHeader(List dvs) { String varHeader = null; @@ -571,14 +571,14 @@ protected boolean isWriteAccessRequested(DataAccessOption... options) throws IOE return false; } - public boolean isBelowIngestSizeLimit() { - long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1")); - if(limit>0 && getSize()>limit) { - return false; - } else { - return true; - } - } + public boolean isBelowIngestSizeLimit() { + long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1")); + if (limit > 0 && getSize() > limit) { + return false; + } else { + return true; + } + } public boolean downloadRedirectEnabled() { return false; @@ -587,36 +587,38 @@ public boolean downloadRedirectEnabled() { public boolean downloadRedirectEnabled(String auxObjectTag) { return false; } - - public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { + + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) + throws IOException { throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); } - public static boolean isPublicStore(String driverId) { - //Read once and cache - if(!driverPublicAccessMap.containsKey(driverId)) { - driverPublicAccessMap.put(driverId, Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public"))); + // Read once and cache + if (!driverPublicAccessMap.containsKey(driverId)) { + driverPublicAccessMap.put(driverId, + Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public"))); } return driverPublicAccessMap.get(driverId); } - + public static String getDriverPrefix(String driverId) { - return driverId+ DataAccess.SEPARATOR; + return driverId + DataAccess.SEPARATOR; } - + public static boolean isDirectUploadEnabled(String driverId) { - return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) || - Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band")); + return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) + && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) + || Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band")); } - - //Check that storageIdentifier is consistent with store's config - //False will prevent direct uploads + + // Check that storageIdentifier is consistent with store's config + // False will prevent direct uploads static boolean isValidIdentifier(String driverId, String storageId) { return false; } - - //Utility to verify the standard UUID pattern for stored files. + + // Utility to verify the standard UUID pattern for stored files. protected static boolean usesStandardNamePattern(String identifier) { Pattern r = Pattern.compile("^[a-f,0-9]{11}-[a-f,0-9]{12}$"); @@ -626,4 +628,15 @@ protected static boolean usesStandardNamePattern(String identifier) { public abstract List cleanUp(Predicate filter, boolean dryRun) throws IOException; + /** + * A storage-type-specific mechanism for retrieving the size of a file. Intended + * primarily as a way to get the size before it has been recorded in the + * database, e.g. during direct/out-of-band transfers but could be useful to + * check the db values. + * + * @return file size in bytes + * @throws IOException + */ + public abstract long retrieveSizeFromMedia() throws IOException; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 6c84009de3e..0d1dab581fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -954,4 +954,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE } return toDelete; } + + @Override + public long retrieveSizeFromMedia() throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } } From 68ab3f3cb6399d4c73bff0bcc84d9687ab369351 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:18:58 -0400 Subject: [PATCH 068/546] typos, change hash notice --- .../iq/dataverse/globus/GlobusServiceBean.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 49572519696..8aa9915db58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -4,8 +4,6 @@ import com.github.benmanes.caffeine.cache.Caffeine; import com.google.gson.FieldNamingPolicy; import com.google.gson.GsonBuilder; -import com.nimbusds.oauth2.sdk.pkce.CodeVerifier; - import edu.harvard.iq.dataverse.*; import jakarta.ejb.Asynchronous; @@ -21,7 +19,6 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; -import jakarta.json.JsonValue; import jakarta.servlet.http.HttpServletRequest; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; @@ -662,6 +659,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S GlobusEndpoint endpoint = getGlobusEndpoint(dataset); ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + if(ruleId!=null) { Long datasetId = rulesCache.getIfPresent(ruleId); if(datasetId!=null) { @@ -1095,7 +1093,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) String fullPath = id.split("IDsplit")[1]; String fileName = id.split("IDsplit")[2]; - // ToDo: what if the file doesnot exists in s3 + // ToDo: what if the file does not exist in s3 // ToDo: what if checksum calculation failed do { @@ -1107,8 +1105,8 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) } catch (IOException ioex) { count = 3; logger.info(ioex.getMessage()); - globusLogger.info("DataFile (fullPAth " + fullPath - + ") does not appear to be accessible withing Dataverse: "); + globusLogger.info("DataFile (fullPath " + fullPath + + ") does not appear to be accessible within Dataverse: "); } catch (Exception ex) { count = count + 1; ex.printStackTrace(); @@ -1119,7 +1117,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) } while (count < 3); if (checksumVal.length() == 0) { - checksumVal = "NULL"; + checksumVal = "Not available in Dataverse"; } String mimeType = calculatemime(fileName); @@ -1384,4 +1382,5 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { private static boolean isDataverseManaged(String driverId) { return Boolean.getBoolean("dataverse.files." + driverId + ".managed"); } + } From d57b9f048490bcc2a38d8c2fc422e3797bad2fbc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:19:24 -0400 Subject: [PATCH 069/546] add getLocationFromStorage, add tests --- .../iq/dataverse/dataaccess/DataAccess.java | 34 +++++++++++++++---- .../dataverse/dataaccess/DataAccessTest.java | 20 +++++++++++ 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 8387f8110cf..a3345cb7a8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -153,12 +153,34 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) { } public static String getStorageIdFromLocation(String location) { - if(location.contains(SEPARATOR)) { - //It's a full location with a driverId, so strip and reapply the driver id - //NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it - return location.substring(0,location.indexOf(SEPARATOR) +3) + location.substring(location.lastIndexOf('/')+1); - } - return location.substring(location.lastIndexOf('/')+1); + if (location.contains(SEPARATOR)) { + // It's a full location with a driverId, so strip and reapply the driver id + // NOte that this will strip the bucketname out (which s3 uses) but the + // S3IOStorage class knows to look at re-insert it + return location.substring(0, location.indexOf(SEPARATOR) + 3) + + location.substring(location.lastIndexOf('/') + 1); + } + return location.substring(location.lastIndexOf('/') + 1); + } + + /** Changes storageidentifiers of the form + * s3://bucketname/18b39722140-50eb7d3c5ece or file://18b39722140-50eb7d3c5ece to s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece + * and + * 18b39722140-50eb7d3c5ece to 10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece + * @param id + * @param dataset + * @return + */ + public static String getLocationFromStorageId(String id, Dataset dataset) { + String path= dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + if (id.contains(SEPARATOR)) { + // It's a full location with a driverId, so strip and reapply the driver id + // NOte that this will strip the bucketname out (which s3 uses) but the + // S3IOStorage class knows to look at re-insert it + return id.substring(0, id.indexOf(SEPARATOR) + 3) + path + + id.substring(id.lastIndexOf('/') + 1); + } + return path + id.substring(id.lastIndexOf('/') + 1); } public static String getDriverType(String driverId) { diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java index 1ff914adff9..f7ce061fb24 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java @@ -59,4 +59,24 @@ void testCreateNewStorageIO_createsFileAccessIObyDefault() throws IOException { StorageIO storageIo = DataAccess.createNewStorageIO(dataset, "valid-tag"); assertTrue(storageIo.getClass().equals(FileAccessIO.class)); } + + @Test + void testGetLocationFromStorageId() { + Dataset d = new Dataset(); + d.setAuthority("10.5072"); + d.setIdentifier("FK2/ABCDEF"); + assertEquals("s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece", + DataAccess.getLocationFromStorageId("s3://18b39722140-50eb7d3c5ece", d)); + assertEquals("10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece", + DataAccess.getLocationFromStorageId("18b39722140-50eb7d3c5ece", d)); + + } + + @Test + void testGetStorageIdFromLocation() { + assertEquals("file://18b39722140-50eb7d3c5ece", + DataAccess.getStorageIdFromLocation("file://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece")); + assertEquals("s3://18b39722140-50eb7d3c5ece", + DataAccess.getStorageIdFromLocation("s3://bucketname:10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece")); + } } From 34286830d1cfa4849a82909eaff20528980fd717 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:19:53 -0400 Subject: [PATCH 070/546] get size for direct uploads --- .../impl/CreateNewDataFilesCommand.java | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index ac701da1be9..a8be1bd5116 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -3,18 +3,20 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; @@ -83,7 +85,7 @@ public class CreateNewDataFilesCommand extends AbstractCommand sio; + try { + sio = DataAccess.getDirectStorageIO(DataAccess.getLocationFromStorageId(newStorageIdentifier, version.getDataset())); + + // get file size + // Note - some stores (e.g. AWS S3) only offer eventual consistency and a call + // to get the size immediately after uploading may fail. As of the addition of + // PR#9409 adding storage quotas, we are now requiring size to be available + // earlier. If this is seen, adding + // a delay/retry may help + newFileSize = sio.retrieveSizeFromMedia(); + } catch (IOException e) { + // If we don't get a file size, a CommandExecutionException will be thrown later in the code + e.printStackTrace(); + } + } } // Finally, if none of the special cases above were applicable (or // if we were unable to unpack an uploaded file, etc.), we'll just From 2adfa8af01124c31ada3f1801dd5f3dac0fd704e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 09:20:31 -0400 Subject: [PATCH 071/546] refactor, add delete method, etc. --- .../dataaccess/GlobusOverlayAccessIO.java | 157 ++++++++++++------ 1 file changed, 110 insertions(+), 47 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 965dc3c0947..011bb74f720 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -17,11 +17,14 @@ import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; import org.apache.http.util.EntityUtils; +import jakarta.json.Json; import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; /** * @author qqmyers @@ -43,7 +46,6 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); - String globusAccessToken = null; /* * If this is set to true, the store supports Globus transfer in and * Dataverse/the globus app manage file locations, access controls, deletion, @@ -51,35 +53,64 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce */ private boolean dataverseManaged = false; + private String relativeDirectoryPath; + + private String endpointPath; + + private String filename; + + private String endpoint; + public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); - if (dvObject instanceof DataFile) { - globusAccessToken = retrieveGlobusAccessToken(); - } dataverseManaged = isDataverseManaged(this.driverId); + } + + private void parsePath() { + int filenameStart = path.lastIndexOf("/") + 1; + String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); + int pathStart = endpointWithBasePath.indexOf("/"); + logger.info("endpointWithBasePath: " + endpointWithBasePath); + endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); + logger.info("endpointPath: " + endpointPath); + + if (dataverseManaged && (dvObject!=null)) { + + Dataset ds = null; + if (dvObject instanceof Dataset) { + ds = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + ds = ((DataFile) dvObject).getOwner(); + } + relativeDirectoryPath = "/" + ds.getAuthority() + "/" + ds.getIdentifier(); + } else { + relativeDirectoryPath = ""; + } + if (filenameStart > 0) { + relativeDirectoryPath = relativeDirectoryPath + path.substring(0, filenameStart); + } + logger.info("relativeDirectoryPath finally: " + relativeDirectoryPath); + filename = path.substring(filenameStart); + endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; - logger.info("GAT3: " + globusAccessToken); + } public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { this.driverId = driverId; + configureStores(null, driverId, storageLocation); this.dataverseManaged = isDataverseManaged(this.driverId); if (dataverseManaged) { String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); path = parts[1]; } else { this.setIsLocalFile(false); - configureStores(null, driverId, storageLocation); - path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); validatePath(path); logger.fine("Relative path: " + path); } -//ToDo - only when needed? - globusAccessToken = retrieveGlobusAccessToken(); - } - + private String retrieveGlobusAccessToken() { // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId); String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token"); @@ -101,33 +132,16 @@ private void validatePath(String relPath) throws IOException { // Call the Globus API to get the file size @Override - long retrieveSize() { + public long retrieveSizeFromMedia() { + parsePath(); + String globusAccessToken = retrieveGlobusAccessToken(); logger.info("GAT2: " + globusAccessToken); // Construct Globus URL URI absoluteURI = null; try { - int filenameStart = path.lastIndexOf("/") + 1; - String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3); - int pathStart = endpointWithBasePath.indexOf("/"); - logger.info("endpointWithBasePath: " + endpointWithBasePath); - String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); - logger.info("directoryPath: " + directoryPath); - - if (dataverseManaged && (dvObject!=null)) { - Dataset ds = ((DataFile) dvObject).getOwner(); - directoryPath = directoryPath + "/" + ds.getAuthority() + "/" + ds.getIdentifier(); - logger.info("directoryPath now: " + directoryPath); - - } - if (filenameStart > 0) { - directoryPath = directoryPath + path.substring(0, filenameStart); - } - logger.info("directoryPath finally: " + directoryPath); - String filename = path.substring(filenameStart); - String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint - + "/ls?path=" + directoryPath + "&filter=name:" + filename); + + "/ls?path=" + endpointPath + relativeDirectoryPath + "&filter=name:" + filename); HttpGet get = new HttpGet(absoluteURI); logger.info("Token is " + globusAccessToken); @@ -166,25 +180,63 @@ public InputStream getInputStream() throws IOException { @Override public void delete() throws IOException { -// Fix - // Delete is best-effort - we tell the remote server and it may or may not - // implement this call + parsePath(); + // Delete is best-effort - we tell the endpoint to delete don't monitor whether + // it succeeds if (!isDirectAccess()) { throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); } + String globusAccessToken = retrieveGlobusAccessToken(); + // Construct Globus URL + URI absoluteURI = null; try { - HttpDelete del = new HttpDelete(baseUrl + "/" + path); - CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); - try { - int code = response.getStatusLine().getStatusCode(); - switch (code) { + + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/submission_id"); + HttpGet get = new HttpGet(absoluteURI); + + logger.info("Token is " + globusAccessToken); + get.addHeader("Authorization", "Bearer " + globusAccessToken); + CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); + if (response.getStatusLine().getStatusCode() == 200) { + // Get reponse as string + String responseString = EntityUtils.toString(response.getEntity()); + logger.info("Response from " + get.getURI().toString() + " is: " + responseString); + JsonObject responseJson = JsonUtil.getJsonObject(responseString); + String submissionId = responseJson.getString("value"); + logger.info("submission_id for delete is: " + submissionId); + absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/delete"); + HttpPost post = new HttpPost(absoluteURI); + JsonObjectBuilder taskJsonBuilder = Json.createObjectBuilder(); + taskJsonBuilder.add("submission_id", submissionId).add("DATA_TYPE", "delete").add("endpoint", endpoint) + .add("DATA", Json.createArrayBuilder().add(Json.createObjectBuilder().add("DATA_TYPE", "delete_item").add("path", + endpointPath + relativeDirectoryPath + "/" + filename))); + post.setHeader("Content-Type", "application/json"); + post.addHeader("Authorization", "Bearer " + globusAccessToken); + String taskJson= JsonUtil.prettyPrint(taskJsonBuilder.build()); + logger.info("Sending: " + taskJson); + post.setEntity(new StringEntity(taskJson, "utf-8")); + CloseableHttpResponse postResponse = getSharedHttpClient().execute(post, localContext); + int statusCode=postResponse.getStatusLine().getStatusCode(); + logger.info("Response :" + statusCode + ": " +postResponse.getStatusLine().getReasonPhrase()); + switch (statusCode) { + case 202: + // ~Success - delete task was accepted + logger.info("Globus delete initiated: " + EntityUtils.toString(postResponse.getEntity())); + break; case 200: - logger.fine("Sent DELETE for " + baseUrl + "/" + path); + // Duplicate - delete task was already accepted + logger.info("Duplicate Globus delete: " + EntityUtils.toString(postResponse.getEntity())); + break; default: - logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); + logger.warning("Response from " + post.getURI().toString() + " was " + + postResponse.getStatusLine().getStatusCode()); + logger.info(EntityUtils.toString(postResponse.getEntity())); } - } finally { - EntityUtils.consume(response.getEntity()); + + } else { + logger.warning("Response from " + get.getURI().toString() + " was " + + response.getStatusLine().getStatusCode()); + logger.info(EntityUtils.toString(response.getEntity())); } } catch (Exception e) { logger.warning(e.getMessage()); @@ -250,6 +302,16 @@ static boolean isValidIdentifier(String driverId, String storageId) { return true; } + @Override + public String getStorageLocation() throws IOException { + parsePath(); + if (dataverseManaged) { + return this.driverId + DataAccess.SEPARATOR + relativeDirectoryPath + "/" + filename; + } else { + return super.getStorageLocation(); + } + } + public static void main(String[] args) { System.out.println("Running the main method"); if (args.length > 0) { @@ -272,7 +334,7 @@ public static void main(String[] args) { try { GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO( "globus://1234///hdc1/image001.mrc", "globus"); - logger.info("Size is " + gsio.retrieveSize()); + logger.info("Size is " + gsio.retrieveSizeFromMedia()); } catch (IOException e) { // TODO Auto-generated catch block @@ -286,7 +348,7 @@ public static void main(String[] args) { df.setOwner(ds); df.setStorageIdentifier("globus://1234///hdc1/image001.mrc"); GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO(df, null, "globus"); - logger.info("Size2 is " + gsio.retrieveSize()); + logger.info("Size2 is " + gsio.retrieveSizeFromMedia()); } catch (IOException e) { // TODO Auto-generated catch block @@ -294,4 +356,5 @@ public static void main(String[] args) { } } + } From bdba5d8ef8a459314d5b8dccab30190461bbfdea Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 19 Oct 2023 14:03:46 -0400 Subject: [PATCH 072/546] implement signedUrls for globus app, refactor --- .../edu/harvard/iq/dataverse/api/Admin.java | 8 +- .../harvard/iq/dataverse/api/Datasets.java | 75 ++++++++++++++++++- .../edu/harvard/iq/dataverse/api/Files.java | 6 +- .../externaltools/ExternalToolHandler.java | 71 +----------------- .../dataverse/globus/GlobusServiceBean.java | 58 +++++++------- .../iq/dataverse/util/URLTokenUtil.java | 65 ++++++++++++++++ src/main/java/propertyFiles/Bundle.properties | 4 +- .../ExternalToolHandlerTest.java | 11 +-- .../ExternalToolServiceBeanTest.java | 4 +- 9 files changed, 192 insertions(+), 110 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index fd3b9a89e54..1870c7cb508 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -96,7 +96,6 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteTemplateCommand; import edu.harvard.iq.dataverse.engine.command.impl.RegisterDvObjectCommand; -import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.userdata.UserListMaker; @@ -105,6 +104,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import java.io.IOException; @@ -2418,12 +2418,12 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur } String baseUrl = urlInfo.getString("url"); - int timeout = urlInfo.getInt(ExternalToolHandler.TIMEOUT, 10); - String method = urlInfo.getString(ExternalToolHandler.HTTP_METHOD, "GET"); + int timeout = urlInfo.getInt(URLTokenUtil.TIMEOUT, 10); + String method = urlInfo.getString(URLTokenUtil.HTTP_METHOD, "GET"); String signedUrl = UrlSignerUtil.signUrl(baseUrl, timeout, userId, method, key); - return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl)); + return ok(Json.createObjectBuilder().add(URLTokenUtil.SIGNED_URL, signedUrl)); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d3ea1b80696..aad5a95bd8e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -90,6 +90,7 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.MarkupChecker; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; @@ -3328,7 +3329,7 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam(" @POST @AuthRequired - @Path("{id}/addglobusFiles") + @Path("{id}/addGlobusFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @@ -3411,6 +3412,74 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } + /** + * Retrieve the parameters and signed URLs required to perform a globus + * transfer. This api endpoint is expected to be called as a signed callback + * after the globus-dataverse app/other app is launched, but it will accept + * other forms of authentication. + * + * @param crc + * @param datasetId + */ + @GET + @AuthRequired + @Path("{id}/globusUploadParameters") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @QueryParam(value = "locale") String locale) + { + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + return e.getResponse(); + } + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + String storeId = dataset.getEffectiveStorageDriverId(); + if(!DataAccess.getDriverType(storeId).equals(DataAccess.GLOBUS)) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); + } + boolean managed = GlobusOverlayAccessIO.isDataverseManaged(storeId); + + JsonObjectBuilder queryParams = Json.createObjectBuilder(); + queryParams.add("queryParameters", + Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}")) + .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}")) + .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}")) + .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}")) + .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}").add("managed", managed))); + + JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusTransferPaths") + .add(URLTokenUtil.TIMEOUT, 300)); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles") + .add(URLTokenUtil.TIMEOUT, 300)); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing") + .add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files") + .add(URLTokenUtil.TIMEOUT, 300)); + + + URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale); + return ok(tokenUtil.createPostBody(tokenUtil.getParams(queryParams.build()), allowedApiCalls.build())); + } + /** Requests permissions for a given globus user to upload to the dataset * * @param crc @@ -3915,8 +3984,8 @@ public Response getExternalToolDVParams(@Context ContainerRequestContext crc, } - ExternalToolHandler eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); - return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + URLTokenUtil eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()))); } catch (WrappedResponse wr) { return wr.getResponse(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 82811162d52..4c2fa8f68ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -48,6 +48,8 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; @@ -822,10 +824,10 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P return error(BAD_REQUEST, "FileMetadata not found."); } - ExternalToolHandler eth = null; + URLTokenUtil eth = null; eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale); - return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()))); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index de4317464e6..36227c2f883 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -22,12 +22,8 @@ import java.util.logging.Level; import java.util.logging.Logger; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonNumber; import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonString; import jakarta.json.JsonValue; import jakarta.ws.rs.HttpMethod; @@ -41,15 +37,10 @@ */ public class ExternalToolHandler extends URLTokenUtil { - private final ExternalTool externalTool; + public final ExternalTool externalTool; private String requestMethod; - - public static final String HTTP_METHOD="httpMethod"; - public static final String TIMEOUT="timeOut"; - public static final String SIGNED_URL="signedUrl"; - public static final String NAME="name"; - public static final String URL_TEMPLATE="urlTemplate"; + /** @@ -134,10 +125,10 @@ public String handleRequest(boolean preview) { } else { // ToDo - if the allowedApiCalls() are defined, could/should we send them to - // tools using GET as well? + // tools using POST as well? if (requestMethod.equals(HttpMethod.POST)) { - String body = JsonUtil.prettyPrint(createPostBody(params).build()); + String body = JsonUtil.prettyPrint(createPostBody(params, null).build()); try { logger.info("POST Body: " + body); return postFormData(body); @@ -149,60 +140,6 @@ public String handleRequest(boolean preview) { return null; } - public JsonObject getParams(JsonObject toolParameters) { - //ToDo - why an array of object each with a single key/value pair instead of one object? - JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); - - // ToDo return json and print later - JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); - if (!(queryParams == null) && !queryParams.isEmpty()) { - queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { - queryParam.keySet().forEach((key) -> { - String value = queryParam.getString(key); - JsonValue param = getParam(value); - if (param != null) { - paramsBuilder.add(key, param); - } - }); - }); - } - return paramsBuilder.build(); - } - - public JsonObjectBuilder createPostBody(JsonObject params) { - JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); - bodyBuilder.add("queryParameters", params); - String apiCallStr = externalTool.getAllowedApiCalls(); - if (apiCallStr != null && !apiCallStr.isBlank()) { - JsonArray apiArray = JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()); - JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); - apiArray.getValuesAs(JsonObject.class).forEach(((apiObj) -> { - logger.fine(JsonUtil.prettyPrint(apiObj)); - String name = apiObj.getJsonString(NAME).getString(); - String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString(); - int timeout = apiObj.getInt(TIMEOUT); - String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString(); - logger.fine("URL Template: " + urlTemplate); - urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; - String apiPath = replaceTokensWithValues(urlTemplate); - logger.fine("URL WithTokens: " + apiPath); - String url = apiPath; - // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) - ApiToken apiToken = getApiToken(); - if (apiToken != null) { - url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), - httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") - + getApiToken().getTokenString()); - } - logger.fine("Signed URL: " + url); - apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod) - .add(SIGNED_URL, url).add(TIMEOUT, timeout)); - })); - bodyBuilder.add("signedUrls", apisBuilder); - } - return bodyBuilder; - } - private String postFormData(String allowedApis) throws IOException, InterruptedException { String url = null; HttpClient client = HttpClient.newHttpClient(); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 8aa9915db58..2c0edd070f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -20,6 +20,7 @@ import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; import jakarta.servlet.http.HttpServletRequest; +import jakarta.ws.rs.HttpMethod; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -45,6 +46,8 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.apache.commons.codec.binary.StringUtils; + import com.google.gson.Gson; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -58,6 +61,7 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; @Stateless @@ -120,7 +124,6 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list"); MakeRequestResponse result = makeRequest(url, "Bearer", endpoint.getClientToken(), "GET", null); - ArrayList ids = new ArrayList(); if (result.status == 200) { AccessList al = parseJson(result.jsonResponse, AccessList.class, false); @@ -282,7 +285,7 @@ private String getUniqueFilePath(GlobusEndpoint endpoint) { //Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) - .removalListener((ruleId, datasetId, cause) -> { + .evictionListener((ruleId, datasetId, cause) -> { //Delete rules that expire Dataset dataset = datasetSvc.find(datasetId); deletePermission((String) ruleId, dataset, null); @@ -575,12 +578,23 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) } catch (Exception e) { logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId); } - //Use URLTokenUtil for params currently in common with external tools. + // Use URLTokenUtil for params currently in common with external tools. URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode); String appUrl; if (upload) { appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + String callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId() + + "/globusUploadParameters?locale=" + localeCode; + if (apiToken != null) { + callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(), + HttpMethod.GET, + JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + apiToken.getTokenString()); + } else { + // Shouldn't happen + logger.warning("unable to get api token for user: " + user.getIdentifier()); + } + appUrl = appUrl + "&callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback)); } else { if (df == null) { appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") @@ -637,39 +651,27 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S globusLogger = logger; } - globusLogger.info("Starting an globusUpload "); + Thread.sleep(5000); - - // ToDo - use DataAccess methods? - //String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); - //datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); - logger.fine("json: " + JsonUtil.prettyPrint(jsonData)); String taskIdentifier = jsonData.getString("taskIdentifier"); - String ruleId = null; - - Thread.sleep(5000); - // globus task status check GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); + globusLogger.info("Starting an globusUpload "); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - - ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); - - if(ruleId!=null) { + String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + logger.info("Found rule: " + ruleId); + if (ruleId != null) { Long datasetId = rulesCache.getIfPresent(ruleId); - if(datasetId!=null) { - - //Will delete rule - rulesCache.invalidate(ruleId); - } else { - //The cache already expired this rule, in which case it's delay not long enough, or we have some other problem - logger.warning("Rule " + ruleId + " not found in rulesCache"); - deletePermission(ruleId, dataset, globusLogger); + if (datasetId != null) { + + // Will delete rule + rulesCache.invalidate(ruleId); } } @@ -836,6 +838,10 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); } } + if (ruleId != null) { + deletePermission(ruleId, dataset, globusLogger); + globusLogger.info("Removed upload permission: " + ruleId); + } } public String addFilesAsync(String curlCommand, Logger globusLogger) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index 4ae76a7b8db..216237105aa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -6,6 +6,10 @@ import java.util.regex.Pattern; import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonValue; import edu.harvard.iq.dataverse.DataFile; @@ -13,6 +17,8 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.json.JsonUtil; public class URLTokenUtil { @@ -22,6 +28,13 @@ public class URLTokenUtil { protected final FileMetadata fileMetadata; protected ApiToken apiToken; protected String localeCode; + + + public static final String HTTP_METHOD="httpMethod"; + public static final String TIMEOUT="timeOut"; + public static final String SIGNED_URL="signedUrl"; + public static final String NAME="name"; + public static final String URL_TEMPLATE="urlTemplate"; /** * File level @@ -193,6 +206,58 @@ private String getTokenValue(String value) { throw new IllegalArgumentException("Cannot replace reserved word: " + value); } + public JsonObjectBuilder createPostBody(JsonObject params, JsonArray allowedApiCalls) { + JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); + bodyBuilder.add("queryParameters", params); + if (allowedApiCalls != null && !allowedApiCalls.isEmpty()) { + JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); + allowedApiCalls.getValuesAs(JsonObject.class).forEach(((apiObj) -> { + logger.fine(JsonUtil.prettyPrint(apiObj)); + String name = apiObj.getJsonString(NAME).getString(); + String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString(); + int timeout = apiObj.getInt(TIMEOUT); + String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString(); + logger.fine("URL Template: " + urlTemplate); + urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; + String apiPath = replaceTokensWithValues(urlTemplate); + logger.fine("URL WithTokens: " + apiPath); + String url = apiPath; + // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) + ApiToken apiToken = getApiToken(); + if (apiToken != null) { + url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), + httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + + getApiToken().getTokenString()); + } + logger.fine("Signed URL: " + url); + apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod) + .add(SIGNED_URL, url).add(TIMEOUT, timeout)); + })); + bodyBuilder.add("signedUrls", apisBuilder); + } + return bodyBuilder; + } + + public JsonObject getParams(JsonObject toolParameters) { + //ToDo - why an array of object each with a single key/value pair instead of one object? + JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); + + // ToDo return json and print later + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); + if (!(queryParams == null) && !queryParams.isEmpty()) { + queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { + queryParam.keySet().forEach((key) -> { + String value = queryParam.getString(key); + JsonValue param = getParam(value); + if (param != null) { + paramsBuilder.add(key, param); + } + }); + }); + } + return paramsBuilder.build(); + } + public static String getScriptForUrl(String url) { String msg = BundleUtil.getStringFromBundle("externaltools.enable.browser.popups"); String script = "const newWin = window.open('" + url + "', target='_blank'); if (!newWin || newWin.closed || typeof newWin.closed == \"undefined\") {alert(\"" + msg + "\");}"; diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 3497b23eb94..88f819b417b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2645,8 +2645,8 @@ datasets.api.privateurl.anonymized.error.released=Can't create a URL for anonymi datasets.api.creationdate=Date Created datasets.api.modificationdate=Last Modified Date datasets.api.curationstatus=Curation Status -datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this installation of Dataverse. -datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this installation of Dataverse. +datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this dataset. +datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this dataset. diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java index 21bb6633204..6f0132e2bc9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; @@ -53,7 +54,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { Exception expectedException1 = null; String nullLocaleCode = null; try { - ExternalToolHandler externalToolHandler1 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); + URLTokenUtil externalToolHandler1 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); } catch (Exception ex) { expectedException1 = ex; } @@ -71,7 +72,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { DataFile dataFile = new DataFile(); dataFile.setId(42l); try { - ExternalToolHandler externalToolHandler1 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, nullFileMetadata, nullLocaleCode); + URLTokenUtil externalToolHandler1 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, nullFileMetadata, nullLocaleCode); } catch (Exception ex) { expectedException1 = ex; } @@ -92,7 +93,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { .build().toString()); Exception expectedException2 = null; try { - ExternalToolHandler externalToolHandler2 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); + URLTokenUtil externalToolHandler2 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); } catch (Exception ex) { expectedException2 = ex; } @@ -225,10 +226,10 @@ public void testGetToolUrlWithAllowedApiCalls() { assertTrue(et != null); System.out.println("allowedApiCalls et created"); System.out.println(et.getAllowedApiCalls()); - ExternalToolHandler externalToolHandler = new ExternalToolHandler(et, ds, at, null); + URLTokenUtil externalToolHandler = new ExternalToolHandler(et, ds, at, null); System.out.println("allowedApiCalls eth created"); JsonObject jo = externalToolHandler - .createPostBody(externalToolHandler.getParams(JsonUtil.getJsonObject(et.getToolParameters()))).build(); + .createPostBody(externalToolHandler.getParams(JsonUtil.getJsonObject(et.getToolParameters())), JsonUtil.getJsonArray(et.getAllowedApiCalls())).build(); assertEquals(1, jo.getJsonObject("queryParameters").getInt("datasetId")); String signedUrl = jo.getJsonArray("signedUrls").getJsonObject(0).getString("signedUrl"); // The date and token will change each time but check for the constant parts of diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java index 9337949f605..4f5af8b97b0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java @@ -9,6 +9,8 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import java.util.ArrayList; import java.util.List; import jakarta.json.Json; @@ -49,7 +51,7 @@ public void testfindAll() { externalToolTypes.add(externalToolType); ExternalTool.Scope scope = ExternalTool.Scope.FILE; ExternalTool externalTool = new ExternalTool("displayName", "toolName", "description", externalToolTypes, scope, "http://foo.com", "{}", DataFileServiceBean.MIME_TYPE_TSV_ALT); - ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null); + URLTokenUtil externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null); List externalTools = new ArrayList<>(); externalTools.add(externalTool); List availableExternalTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); From f056d6c051bf784ca4808e8757efa9afcaf7778c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 19 Oct 2023 15:10:14 -0400 Subject: [PATCH 073/546] minor incremental changes (#9635) --- .../search/SearchIncludeFragment.java | 30 +++++++++++++++---- .../dataverse/search/SearchServiceBean.java | 6 ++-- .../dataverse/search/SolrQueryResponse.java | 10 ++++++- src/main/webapp/search-include-fragment.xhtml | 24 +++++++++++++-- 4 files changed, 57 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 47a5621c3d6..14274a09399 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -131,7 +131,8 @@ public class SearchIncludeFragment implements java.io.Serializable { Map datasetfieldFriendlyNamesBySolrField = new HashMap<>(); Map staticSolrFieldFriendlyNamesBySolrField = new HashMap<>(); private boolean solrIsDown = false; - private boolean solrIsOverloaded = false; + private boolean solrIsTemporarilyUnavailable = false; + private boolean solrFacetsDisabled = false; private Map numberOfFacets = new HashMap<>(); // private boolean showUnpublished; List filterQueriesDebug = new ArrayList<>(); @@ -361,6 +362,14 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused if (solrQueryResponse.hasError()){ logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); + } + // Solr "temporarily unavailable" is the condition triggered by + // receiving a 503 from the search engine, that is in turn a result + // of one of the Solr "circuit breakers" being triggered by excessive + // load. We treat this condition as distinct from "Solr is down", + // on the assumption that it is transitive. + if (solrQueryResponse.isSolrTemporarilyUnavailable()) { + setSolrTemporarilyUnavailable(true); } // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. // (why exactly do we need it, again?) @@ -386,7 +395,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } - if (selectedTypesList.size() < 3) { + if (selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable()) { // If some types are NOT currently selected, we will need to // run another query to obtain the numbers of the unselected types: @@ -1079,14 +1088,23 @@ public void setSolrIsDown(boolean solrIsDown) { this.solrIsDown = solrIsDown; } - public boolean isSolrOverloaded() { - return solrIsOverloaded; + public boolean isSolrTemporarilyUnavailable() { + return solrIsTemporarilyUnavailable; } - public void setSolrIsOverloaded(boolean solrIsOverloaded) { - this.solrIsOverloaded = solrIsOverloaded; + public void setSolrTemporarilyUnavailable(boolean solrIsTemporarilyUnavailable) { + this.solrIsTemporarilyUnavailable = solrIsTemporarilyUnavailable; } + public boolean isFacetsDisabled() { + return solrFacetsDisabled; + } + + public void setFacetsDisabled(boolean solrFacetsDisabled) { + this.solrFacetsDisabled = solrFacetsDisabled; + } + + public boolean isRootDv() { return rootDv; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 1b92c2a4a46..6e410488794 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -382,7 +382,6 @@ public SolrQueryResponse search( // Make the solr query // ----------------------------------- QueryResponse queryResponse = null; - boolean solrTemporarilyUnavailable = false; try { queryResponse = solrClientService.getSolrClient().query(solrQuery); @@ -397,6 +396,8 @@ public SolrQueryResponse search( logger.info("return code: "+queryResponse.getStatus()); } + SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery); + // We probably shouldn't be assuming that this is necessarily a // "search syntax error", as the code below implies - could be // something else too - ? @@ -407,9 +408,9 @@ public SolrQueryResponse search( // a transient condition): if (ex.code() == 503) { - solrTemporarilyUnavailable = true; // actual logic for communicating this state back to the local // client code TBD (@todo) + exceptionSolrQueryResponse.setSolrTemporarilyUnavailable(true); } String error = "Search Syntax Error: "; @@ -421,7 +422,6 @@ public SolrQueryResponse search( error += messageFromSolr; } logger.info(error); - SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery); exceptionSolrQueryResponse.setError(error); // we can't show anything because of the search syntax error diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java index 893099ff08d..27e79cb1fc2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java @@ -26,6 +26,7 @@ public class SolrQueryResponse { private String error; private Map dvObjectCounts = new HashMap<>(); private Map publicationStatusCounts = new HashMap<>(); + private boolean solrTemporarilyUnavailable = false; public static String DATAVERSES_COUNT_KEY = "dataverses_count"; public static String DATASETS_COUNT_KEY = "datasets_count"; @@ -91,7 +92,14 @@ public JsonObjectBuilder getPublicationStatusCountsAsJSON(){ } return this.getMapCountsAsJSON(publicationStatusCounts); } - + + public boolean isSolrTemporarilyUnavailable() { + return solrTemporarilyUnavailable; + } + + public void setSolrTemporarilyUnavailable(boolean solrTemporarilyUnavailable) { + this.solrTemporarilyUnavailable = solrTemporarilyUnavailable; + } public JsonObjectBuilder getDvObjectCountsAsJSON(){ diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index 718df813348..8397a14136e 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -88,12 +88,24 @@
+ + + +
+
+
+ + +
+
+
+
-
+
#{msg.rendered()} From 00a17071c358b7ebee09e77130cb7319c665dfb5 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 10 Nov 2023 13:38:36 -0500 Subject: [PATCH 151/546] Revert "allow longer custom questions" This reverts commit ba4d178f5c541ec88ea0879ec5c715bda529f2c9. --- src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java index d880da5b4a8..2cb6f27c3e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java +++ b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java @@ -2,7 +2,7 @@ import java.io.Serializable; import java.util.List; import jakarta.persistence.*; -import jakarta.validation.constraints.NotBlank; +import org.hibernate.validator.constraints.NotBlank; /** * @@ -41,7 +41,7 @@ public void setId(Long id) { private String questionType; @NotBlank(message = "{custom.questiontext}") - @Column( nullable = false, columnDefinition = "TEXT") + @Column( nullable = false ) private String questionString; private boolean required; From d3fbee58262ac439a0b10f4ca7e1494dea4a6c5d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 10 Nov 2023 13:38:43 -0500 Subject: [PATCH 152/546] Revert "add return null if commandexception" This reverts commit aa7eceeb762eca045127cf91acb35d6c62b00d79. --- src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java index 8b09291d052..9fb584a9133 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java @@ -320,7 +320,7 @@ public String save() { logger.info("Guestbook Page Command Exception. Dataverse: " + dataverse.getName()); logger.info(ex.toString()); FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("guestbook.save.fail"), " - " + ex.toString())); - return null; + //logger.severe(ex.getMessage()); } editMode = null; String msg = (create)? BundleUtil.getStringFromBundle("guestbook.create"): BundleUtil.getStringFromBundle("guestbook.save"); From 4b347c7ec13591ba38ffa55fbde394cce2b8bcfe Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 10 Nov 2023 17:47:17 -0500 Subject: [PATCH 153/546] doc update --- .../source/developers/big-data-support.rst | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 1917967b3f3..d38f7f27a68 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -149,20 +149,30 @@ Globus File Transfer Note: Globus file transfer is still experimental but feedback is welcome! See :ref:`support`. -Users can transfer files via `Globus `_ into and out of datasets when their Dataverse installation is configured to use a Globus accessible S3 store and a community-developed `dataverse-globus `_ "transfer" app has been properly installed and configured. +Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) +and a community-developed `dataverse-globus `_ app has been properly installed and configured. Due to differences in the access control models of a Dataverse installation and Globus, enabling the Globus capability on a store will disable the ability to restrict and embargo files in that store. -As Globus aficionados know, Globus endpoints can be in a variety of places, from data centers to personal computers. This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). +Globus endpoints can be in a variety of places, from data centers to personal computers. +This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). -Globus transfer uses a very efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: +Globus transfer uses an efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: * robust file transfer capable of restarting after network or endpoint failures * third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation -Globus transfer requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution. Users will need a Globus account which could be obtained via their institution or directly from Globus (at no cost). +Dataverse supports three options for using Globus, two involving transfer to Dataverse-managed endpoints and one allowing Dataverse to reference files on remote endpoints. +Dataverse-managed endpoints must be Globus 'guest collections' hosted on either a file-system-based endpoint or an S3-based endpoint (the latter requires use of the Globus +S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint. +Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost). -The setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. +For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. +Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access). + +All of Dataverse's Globus capabilities are now store-based (see the store documentation) and therefore different collections/datasets can be configured to use different Globus-capable stores (or normal file, S3 stores, etc.) + +More details of the setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document `_ and the references therein. As described in that document, Globus transfers can be initiated by choosing the Globus option in the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, "Globus Transfer" is one of the download options in the "Access Dataset" menu and optionally the file landing page download menu (if/when supported in the dataverse-globus app). From 6ad55eb689071921857a9f97135e97dd2e71c076 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 10 Nov 2023 17:50:16 -0500 Subject: [PATCH 154/546] Support multiple ref endpoints for non-managed case --- .../harvard/iq/dataverse/api/Datasets.java | 72 ++++---- .../dataaccess/GlobusAccessibleStore.java | 14 +- .../dataaccess/GlobusOverlayAccessIO.java | 166 +++++++++++++----- .../dataaccess/RemoteOverlayAccessIO.java | 47 +++-- .../dataverse/globus/GlobusServiceBean.java | 31 +++- 5 files changed, 226 insertions(+), 104 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index b1c528f3fd9..a57f373f106 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3601,13 +3601,11 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat } JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); - if (managed) { - - allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths") + String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths"; + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName) .add(URLTokenUtil.HTTP_METHOD, "POST") - .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusTransferPaths") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusPaths") .add(URLTokenUtil.TIMEOUT, 300)); - } allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles") .add(URLTokenUtil.HTTP_METHOD, "POST") .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles") @@ -3632,7 +3630,7 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat */ @POST @AuthRequired - @Path("{id}/requestGlobusTransferPaths") + @Path("{id}/requestGlobusPaths") @Consumes(MediaType.APPLICATION_JSON) @Produces(MediaType.APPLICATION_JSON) public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody @@ -3666,35 +3664,45 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP } catch (WrappedResponse wr) { return wr.getResponse(); } - - if(!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { - return badRequest("This dataset does not have managed Globus storage"); - } - if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset) .canIssue(UpdateDatasetVersionCommand.class)) { - try { + JsonObject params = JsonUtil.getJsonObject(jsonBody); - String principal = params.getString("principal"); - int numberOfPaths = params.getInt("numberOfFiles"); - if(numberOfPaths <=0) { - return badRequest("numberOfFiles must be positive"); - } - - JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths); - switch (response.getInt("status")) { - case 201: - return ok(response.getJsonObject("paths")); - case 400: - return badRequest("Unable to grant permission"); - case 409: - return conflict("Permission already exists"); - default: - return error(null, "Unexpected error when granting permission"); - } - } catch (NullPointerException|ClassCastException e) { - return badRequest("Error retrieving principal and numberOfFiles from JSON request body"); - + if (!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { + try { + JsonArray referencedFiles = params.getJsonArray("referencedFiles"); + if (referencedFiles == null || referencedFiles.size() == 0) { + return badRequest("No referencedFiles specified"); + } + JsonObject fileMap = globusService.requestReferenceFileIdentifiers(dataset, referencedFiles); + return (ok(fileMap)); + } catch (Exception e) { + return badRequest(e.getLocalizedMessage()); + } + } else { + try { + String principal = params.getString("principal"); + int numberOfPaths = params.getInt("numberOfFiles"); + if (numberOfPaths <= 0) { + return badRequest("numberOfFiles must be positive"); + } + + JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths); + switch (response.getInt("status")) { + case 201: + return ok(response.getJsonObject("paths")); + case 400: + return badRequest("Unable to grant permission"); + case 409: + return conflict("Permission already exists"); + default: + return error(null, "Unexpected error when granting permission"); + } + + } catch (NullPointerException | ClassCastException e) { + return badRequest("Error retrieving principal and numberOfFiles from JSON request body"); + + } } } else { return forbidden("User doesn't have permission to upload to this dataset"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index 1d98044b2b5..afc7556481a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -7,8 +7,7 @@ public interface GlobusAccessibleStore { static final String MANAGED = "managed"; - static final String GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH = "globus-transfer-endpoint-with-basepath"; - static final String GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS = "globus-reference-endpoints-with-basepaths"; + static final String TRANSFER_ENDPOINT_WITH_BASEPATH = "transfer-endpoint-with-basepath"; static final String GLOBUS_TOKEN = "globus-token"; public static boolean isDataverseManaged(String driverId) { @@ -16,37 +15,36 @@ public static boolean isDataverseManaged(String driverId) { } public static String getTransferEndpointId(String driverId) { - String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH); + String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH); int pathStart = endpointWithBasePath.indexOf("/"); return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; } public static String getTransferPath(String driverId) { - String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH); + String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH); int pathStart = endpointWithBasePath.indexOf("/"); return pathStart > 0 ? endpointWithBasePath.substring(pathStart) : ""; } public static JsonArray getReferenceEndpointsWithPaths(String driverId) { - String[] endpoints = StorageIO.getConfigParamForDriver(driverId, GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); + String[] endpoints = StorageIO.getConfigParamForDriver(driverId, RemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); JsonArrayBuilder builder = Json.createArrayBuilder(); for(int i=0;i/// * - * baseUrl: globus:// + * transfer and reference endpoint formats: + * reference endpoints separated by a comma * */ public class GlobusOverlayAccessIO extends RemoteOverlayAccessIO implements GlobusAccessibleStore { @@ -50,7 +53,7 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce * Dataverse/the globus app manage file locations, access controls, deletion, * etc. */ - private boolean dataverseManaged = false; + private Boolean dataverseManaged = null; private String relativeDirectoryPath; @@ -58,22 +61,59 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce private String filename; + private String[] allowedEndpoints; private String endpoint; public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); - dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); } + + public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { + this.driverId = driverId; + configureStores(null, driverId, storageLocation); + if (isManaged()) { + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); + path = parts[1]; + } else { + this.setIsLocalFile(false); + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Referenced path: " + path); + } + } + private boolean isManaged() { + if(dataverseManaged==null) { + dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); + } + return dataverseManaged; + } + + private String retrieveGlobusAccessToken() { + String globusToken = getConfigParam(GlobusAccessibleStore.GLOBUS_TOKEN); + + + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + return accessToken.getOtherTokens().get(0).getAccessToken(); + } + + private void parsePath() { int filenameStart = path.lastIndexOf("/") + 1; - String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf(DataAccess.SEPARATOR) + 3); + String endpointWithBasePath = null; + if (!isManaged()) { + endpointWithBasePath = findMatchingEndpoint(path, allowedEndpoints); + } else { + endpointWithBasePath = allowedEndpoints[0]; + } + //String endpointWithBasePath = baseEndpointPath.substring(baseEndpointPath.lastIndexOf(DataAccess.SEPARATOR) + 3); int pathStart = endpointWithBasePath.indexOf("/"); logger.info("endpointWithBasePath: " + endpointWithBasePath); endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); logger.info("endpointPath: " + endpointPath); + - if (dataverseManaged && (dvObject!=null)) { + if (isManaged() && (dvObject!=null)) { Dataset ds = null; if (dvObject instanceof Dataset) { @@ -95,40 +135,36 @@ private void parsePath() { } - public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { - this.driverId = driverId; - configureStores(null, driverId, storageLocation); - this.dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); - if (dataverseManaged) { - String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); - path = parts[1]; - } else { - this.setIsLocalFile(false); - path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); - validatePath(path); - logger.fine("Relative path: " + path); + private static String findMatchingEndpoint(String path, String[] allowedEndpoints) { + for(int i=0;i 0) { diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index aafab038ae2..5463254140d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -65,7 +65,10 @@ public class RemoteOverlayAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); + // A single baseUrl of the form http(s):// where this store can reference data static final String BASE_URL = "base-url"; + // Multiple endpoints where data can be referenced from. Multiple endpoints are separated by a comma. Multiple endpoints are only supported by the GlobalOverlayAccessIO at present. + static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; static final String BASE_STORE = "base-store"; static final String SECRET_KEY = "secret-key"; static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; @@ -74,7 +77,7 @@ public class RemoteOverlayAccessIO extends StorageIO { protected StorageIO baseStore = null; protected String path = null; - protected String baseUrl = null; + private String baseUrl = null; protected static HttpClientContext localContext = HttpClientContext.create(); protected PoolingHttpClientConnectionManager cm = null; @@ -110,7 +113,7 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE logger.fine("Relative path: " + path); } - private void validatePath(String relPath) throws IOException { + protected void validatePath(String relPath) throws IOException { try { URI absoluteURI = new URI(baseUrl + "/" + relPath); if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { @@ -457,19 +460,8 @@ int getUrlExpirationMinutes() { } protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - baseUrl = getConfigParam(BASE_URL); - if (baseUrl == null) { - throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); - } else { - try { - new URI(baseUrl); - } catch (Exception e) { - logger.warning( - "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); - throw new IOException("Can't interpret base-url as a URI"); - } - - } + configureEndpoints(); + if (baseStore == null) { String baseDriverId = getBaseStoreIdFor(driverId); @@ -543,6 +535,31 @@ protected void configureStores(DataAccessRequest req, String driverId, String st } } + /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but + * the derived GlobusOverlayAccessIO can support multiple endpoints. + * @throws IOException + */ + protected void configureEndpoints() throws IOException { + baseUrl = getConfigParam(BASE_URL); + if (baseUrl == null) { + //Will accept the first endpoint using the newer setting + baseUrl = getConfigParam(REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*")[0]; + if (baseUrl == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); + } + } + if (baseUrl != null) { + try { + new URI(baseUrl); + } catch (Exception e) { + logger.warning( + "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage()); + throw new IOException("Can't interpret base-url as a URI"); + } + + } + } + // Convenience method to assemble the path, starting with the DOI // authority/identifier/, that is needed to create a base store via // DataAccess.getDirectStorageIO - the caller has to add the store type specific diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index dab0e36852c..3dee3bd498f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -6,7 +6,6 @@ import com.google.gson.FieldNamingPolicy; import com.google.gson.GsonBuilder; import edu.harvard.iq.dataverse.*; - import jakarta.ejb.Asynchronous; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; @@ -20,6 +19,8 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; +import jakarta.json.JsonString; +import jakarta.json.JsonValue.ValueType; import jakarta.json.stream.JsonParsingException; import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.HttpMethod; @@ -57,7 +58,6 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; -import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -284,6 +284,33 @@ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int return response.build(); } + public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) { + String driverId = dataset.getEffectiveStorageDriverId(); + JsonArray endpoints = GlobusAccessibleStore.getReferenceEndpointsWithPaths(driverId); + + JsonObjectBuilder fileMap = Json.createObjectBuilder(); + referencedFiles.forEach(value -> { + if (value.getValueType() != ValueType.STRING) { + throw new JsonParsingException("ReferencedFiles must be strings", null); + } + String referencedFile = ((JsonString) value).getString(); + boolean valid = false; + for (int i = 0; i < endpoints.size(); i++) { + if (referencedFile.startsWith(((JsonString) endpoints.get(i)).getString())) { + valid = true; + } + } + if (!valid) { + throw new IllegalArgumentException( + "Referenced file " + referencedFile + " is not in an allowed endpoint/path"); + } + String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId); + fileMap.add(referencedFile, + storageIdentifier + "//" + referencedFile); + }); + return fileMap.build(); + } + //Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) From 48f02dde7f22b21e28c8d635df904b79532f042a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 10 Nov 2023 17:56:56 -0500 Subject: [PATCH 155/546] handle file not found case --- .../iq/dataverse/dataaccess/GlobusOverlayAccessIO.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 0dec7133fb5..f42f5443108 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -25,6 +25,7 @@ import org.apache.http.util.EntityUtils; import jakarta.json.Json; +import jakarta.json.JsonArray; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; @@ -189,7 +190,11 @@ public long retrieveSizeFromMedia() { String responseString = EntityUtils.toString(response.getEntity()); logger.info("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); - return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size"); + JsonArray dataArray = responseJson.getJsonArray("DATA"); + if (dataArray != null && dataArray.size() != 0) { + //File found + return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size"); + } } else { logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); From c33f07aad938f4707e6985ddeeec801969e4a3fc Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Sat, 11 Nov 2023 14:38:00 -0500 Subject: [PATCH 156/546] Add logic to leave settings as found before test --- .../edu/harvard/iq/dataverse/api/ProvIT.java | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java index 3bfa3d72fbd..6b9b59f431d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java @@ -30,7 +30,12 @@ public static void setUpClass() { @Test public void testFreeformDraftActions() { - UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + boolean provEnabled = provCollectionStatus.getStatusCode() == 200; + if(!provEnabled){ + UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } + Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); createDepositor.then().assertThat() @@ -85,15 +90,20 @@ public void testFreeformDraftActions() { datasetVersions.prettyPrint(); datasetVersions.then().assertThat() .body("data[0].versionState", equalTo("DRAFT")); - - UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - + if(!provEnabled){ + UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } + } @Test public void testAddProvFile() { - UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + boolean provEnabled = provCollectionStatus.getStatusCode() == 200; + if(!provEnabled){ + UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); @@ -213,12 +223,8 @@ public void testAddProvFile() { deleteProvJson.then().assertThat() .statusCode(FORBIDDEN.getStatusCode()); //cannot delete json of a published dataset - UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); -// Command removed, redundant -// Response deleteProvFreeForm = UtilIT.deleteProvFreeForm(dataFileId.toString(), apiTokenForDepositor); -// deleteProvFreeForm.prettyPrint(); -// deleteProvFreeForm.then().assertThat() -// .statusCode(OK.getStatusCode()); - + if(!provEnabled){ + UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } } } From 6beafcef4855c2a35cfe6d61408a5625a285885e Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Sat, 11 Nov 2023 22:09:22 -0500 Subject: [PATCH 157/546] Change format to MD of the QA guide --- doc/sphinx-guides/source/index.rst | 2 +- doc/sphinx-guides/source/qa/conclusion.md | 11 -------- doc/sphinx-guides/source/qa/index.md | 10 +++++++ doc/sphinx-guides/source/qa/index.rst | 14 ---------- .../{manual-testing.rst => manual-testing.md} | 27 +++++++++---------- ...her-approaches.rst => other-approaches.md} | 24 ++++++++--------- .../source/qa/{overview.rst => overview.md} | 23 ++++++++-------- ...ormance-tests.rst => performance-tests.md} | 21 ++++++++------- ...ion.rst => test-automation-integration.md} | 24 ++++++++--------- ...tructure.rst => testing-infrastructure.md} | 15 +++++------ 10 files changed, 77 insertions(+), 94 deletions(-) delete mode 100644 doc/sphinx-guides/source/qa/conclusion.md create mode 100644 doc/sphinx-guides/source/qa/index.md delete mode 100755 doc/sphinx-guides/source/qa/index.rst rename doc/sphinx-guides/source/qa/{manual-testing.rst => manual-testing.md} (92%) rename doc/sphinx-guides/source/qa/{other-approaches.rst => other-approaches.md} (95%) rename doc/sphinx-guides/source/qa/{overview.rst => overview.md} (95%) rename doc/sphinx-guides/source/qa/{performance-tests.rst => performance-tests.md} (91%) rename doc/sphinx-guides/source/qa/{test-automation-integration.rst => test-automation-integration.md} (78%) rename doc/sphinx-guides/source/qa/{testing-infrastructure.rst => testing-infrastructure.md} (82%) diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index 9d3d49ef4f2..3184160b387 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -20,7 +20,7 @@ These documentation guides are for the |version| version of Dataverse. To find g developers/index container/index style/index - qa/index + qa/index.md How the Guides Are Organized ---------------------------- diff --git a/doc/sphinx-guides/source/qa/conclusion.md b/doc/sphinx-guides/source/qa/conclusion.md deleted file mode 100644 index 233dc3cdf3d..00000000000 --- a/doc/sphinx-guides/source/qa/conclusion.md +++ /dev/null @@ -1,11 +0,0 @@ -Conclusion -========== - -QA is awesome. Do you know what else is awesome? Markdown. - -It's easy to create a [link](https://dataverse.org), for example, and nested bullets don't need extra indentation: - -- foo - - one - - two -- bar diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md new file mode 100644 index 00000000000..c190d823bef --- /dev/null +++ b/doc/sphinx-guides/source/qa/index.md @@ -0,0 +1,10 @@ +# QA Guide + +```{toctree} +overview.md +testing-infrastructure.md +performance-tests.md +manual-testing.md +test-automation-integration.md +other-approaches.md +``` \ No newline at end of file diff --git a/doc/sphinx-guides/source/qa/index.rst b/doc/sphinx-guides/source/qa/index.rst deleted file mode 100755 index dd8c046fddc..00000000000 --- a/doc/sphinx-guides/source/qa/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -QA Guide -======== - -**Contents:** - -.. toctree:: - - overview - testing-infrastructure - performance-tests - manual-testing - test-automation-integration - other-approaches - conclusion diff --git a/doc/sphinx-guides/source/qa/manual-testing.rst b/doc/sphinx-guides/source/qa/manual-testing.md similarity index 92% rename from doc/sphinx-guides/source/qa/manual-testing.rst rename to doc/sphinx-guides/source/qa/manual-testing.md index 8e50e6b6b08..bf6f16f7911 100644 --- a/doc/sphinx-guides/source/qa/manual-testing.rst +++ b/doc/sphinx-guides/source/qa/manual-testing.md @@ -1,23 +1,22 @@ -Manual Testing Approach -======================= +# Manual Testing Approach -.. contents:: |toctitle| - :local: +```{contents} +:depth: 3 +``` +## Introduction -Introduction ------------- We use a risk-based, manual testing approach to achieve the most benefit with limited resources. This means we want to catch bugs where they are likely to exist, ensure core functions work, and failures do not have catastrophic results. In practice this means we do a brief positive check of core functions on each build called a smoke test, we test the most likely place for new bugs to exist, the area where things have changed, and attempt to prevent catastrophic failure by asking about the scope and reach of the code and how failures may occur. If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by the Design group and by the community. -Examining a Pull Pequest for Test Cases: ----------------------------------------- -What Problem Does it Solve? -++++++++++++++++++++++++++++++++++++++++++++ +## Examining a Pull Pequest for Test Cases: + +### What Problem Does it Solve? + Read the top part of the pull request for a description, notes for reviewers, and usually a how-to test section. Does it make sense? If not, read the underlying ticket it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it. -How is it Configured? -+++++++++++++++++++++ +### How is it Configured? + Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. An admin will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields are added, try the export feature. Of course, try the suggestions under how to test. Those may be sufficient, but you should always think about it based on what it does. @@ -32,8 +31,8 @@ Check permissions. Is this feature limited to a specific set of users? Can it be Think about risk. Is the feature or function part of a critical area such as permissions? Does the functionality modify data? You may do more testing when the risk is higher. -Smoke Test ------------ +## Smoke Test + 1. Go to the homepage on https://dataverse-internal.iq.harvard.edu. Scroll to the bottom to ensure the build number is the one you intend to test from Jenkins. 2. Create a new user: I use a formulaic name with my initials and date and make the username and password the same, eg. kc080622. diff --git a/doc/sphinx-guides/source/qa/other-approaches.rst b/doc/sphinx-guides/source/qa/other-approaches.md similarity index 95% rename from doc/sphinx-guides/source/qa/other-approaches.rst rename to doc/sphinx-guides/source/qa/other-approaches.md index bd92e7d22d8..b50d9d0cf11 100644 --- a/doc/sphinx-guides/source/qa/other-approaches.rst +++ b/doc/sphinx-guides/source/qa/other-approaches.md @@ -1,13 +1,13 @@ -Other approaches to deploying and testing -========================================= +# Other approaches to deploying and testing -.. contents:: |toctitle| - :local: +```{contents} +:depth: 3 +``` This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. I’m assuming if a developer is testing, they would likely just deploy to their dev environment. That might be ok but not sure the env is fully configured enough to offer a real-world testing scenario. An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built. -Tips and tricks ---------------- +## Tips and tricks + - Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (I have one on Google Drive). This potentially will help with future testing. - When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice. @@ -17,8 +17,8 @@ Tips and tricks - When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it. - Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup. -Workflow for Completing QA on a PR ------------------------------------ +## Workflow for Completing QA on a PR + 1. Assign the PR you are working on to yourself. @@ -106,8 +106,8 @@ Workflow for Completing QA on a PR Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions. -Checklist for Completing QA on a PR ------------------------------------- +## Checklist for Completing QA on a PR + 1. Build the docs 2. Smoke test the pr @@ -115,8 +115,8 @@ Checklist for Completing QA on a PR 4. Regression test 5. Test any upgrade instructions -Checklist for QA on Release ---------------------------- +## Checklist for QA on Release + 1. Review Consolidated Release Notes, in particular upgrade instructions. 2. Conduct performance testing and compare with the previous release. diff --git a/doc/sphinx-guides/source/qa/overview.rst b/doc/sphinx-guides/source/qa/overview.md similarity index 95% rename from doc/sphinx-guides/source/qa/overview.rst rename to doc/sphinx-guides/source/qa/overview.md index 153fab1a28f..51b38ee0921 100644 --- a/doc/sphinx-guides/source/qa/overview.rst +++ b/doc/sphinx-guides/source/qa/overview.md @@ -1,26 +1,25 @@ -Overview -======== +# Overview -.. contents:: |toctitle| - :local: +```{contents} +:depth: 3 +``` +## Introduction -Introduction ------------- This document describes the testing process used by QA at IQSS and provides a guide for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community. -Workflow --------- +## Workflow + The basic workflow is bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub Kanban board. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common develop branch and ultimately released as part of the product. Before a pull request is merged it must be reviewed by a member of the development team from a coding perspective, it must pass automated integration tests before moving to QA. There it is tested manually, exercising the UI using three common browser types and any business logic it implements. Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions are tested. Once this passes and any bugs that are found are corrected, the automated integration tests are confirmed to be passing, the PR is merged into development, the PR is closed, and the branch is deleted. At this point, the pr moves from the QA column automatically into the Done column and the process repeats with the next pr until it is decided to make a release. -Release Cadence and Sprints ---------------------------- +## Release Cadence and Sprints + A release likely spans multiple two-week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time. The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub. -Performance Testing and Deployment ----------------------------------- +## Performance Testing and Deployment + The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named Locust, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds (I believe), it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page. Once the performance has been tested and recorded in a Google spreadsheet for this proposed version, the release will be prepared and posted. diff --git a/doc/sphinx-guides/source/qa/performance-tests.rst b/doc/sphinx-guides/source/qa/performance-tests.md similarity index 91% rename from doc/sphinx-guides/source/qa/performance-tests.rst rename to doc/sphinx-guides/source/qa/performance-tests.md index 1bfde798100..7075d7f1776 100644 --- a/doc/sphinx-guides/source/qa/performance-tests.rst +++ b/doc/sphinx-guides/source/qa/performance-tests.md @@ -1,21 +1,22 @@ -Performance Testing -=================== +# Performance Testing -.. contents:: |toctitle| - :local: +```{contents} +:depth: 3 +``` + +## Introduction -Introduction ------------- To run performance tests, we have a performance test cluster on AWS that employs web, database, and Solr. The database contains a copy of production that is updated weekly on Sundays. To ensure the homepage content is consistent between test runs across releases, two scripts set the datasets that will appear on the homepage. There is a script on the web server in the default CentOS user dir and one on the database server in the default CentOS user dir. Run these scripts before conducting the tests. -Access ------- +## Access + Access to performance cluster instances requires ssh keys, see Leonid. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. -Special Notes âš ï¸ ------------------ +## Special Notes âš ï¸ + Please note the performance database is also used occasionally by Julian and the Curation team to generate prod reports so a courtesy check with Julian would be good before taking over the env. + Executing the Performance Script -------------------------------- To execute the performance test script, you need to install a local copy of the database-helper-scripts project (https://github.com/IQSS/dataverse-helper-scripts), written by Raman. I have since produced a stripped-down script that calls just the DB and ds and works with python3. diff --git a/doc/sphinx-guides/source/qa/test-automation-integration.rst b/doc/sphinx-guides/source/qa/test-automation-integration.md similarity index 78% rename from doc/sphinx-guides/source/qa/test-automation-integration.rst rename to doc/sphinx-guides/source/qa/test-automation-integration.md index 13c48105f91..5e9d00cd461 100644 --- a/doc/sphinx-guides/source/qa/test-automation-integration.rst +++ b/doc/sphinx-guides/source/qa/test-automation-integration.md @@ -1,15 +1,15 @@ -Test automation and integration test -==================================== +# Test automation and integration test -.. contents:: |toctitle| - :local: +```{contents} +:depth: 3 +``` This test suite is added to and maintained by development. It is generally advisable for code contributors to add integration tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base’s code paths as possible, every time to catch bugs. This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency. -Building and Deploying a Pull Request from Jenkins to Dataverse-Internal: -------------------------------------------------------------------------- +## Building and Deploying a Pull Request from Jenkins to Dataverse-Internal: + 1. Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. @@ -17,12 +17,12 @@ Building and Deploying a Pull Request from Jenkins to Dataverse-Internal: 3. Log on to jenkins.dataverse.org, select the IQSS_Dataverse_Internal project, and configure the repository URL and branch specifier to match the ones from the pull request. For example: - - 8372-gdcc-xoai-library has IQSS implied - | **Repository URL:** https://github.com/IQSS/dataverse.git - | **Branch specifier:** \*/8372-gdcc-xoai-library - - GlobalDataverseCommunityConsortium:GDCC/DC-3B - | **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git - | **Branch specifier:** \*/GDCC/DC-3B. + * 8372-gdcc-xoai-library has IQSS implied + - **Repository URL:** https://github.com/IQSS/dataverse.git + - **Branch specifier:** */8372-gdcc-xoai-library + * GlobalDataverseCommunityConsortium:GDCC/DC-3B + - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git + - **Branch specifier:** */GDCC/DC-3B. 4. Click Build Now and note the build number in progress. diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.rst b/doc/sphinx-guides/source/qa/testing-infrastructure.md similarity index 82% rename from doc/sphinx-guides/source/qa/testing-infrastructure.rst rename to doc/sphinx-guides/source/qa/testing-infrastructure.md index d35bc6e9a23..fb66bc4d099 100644 --- a/doc/sphinx-guides/source/qa/testing-infrastructure.rst +++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md @@ -1,16 +1,15 @@ -Infrastructure for Testing -========================== +# Infrastructure for Testing -.. contents:: |toctitle| - :local: +```{contents} +:depth: 3 +``` +## Dataverse Internal -Dataverse Internal -------------------- To build and test a PR, we use a build named IQSS_Dataverse_Internal on jenkins.dataverse.org, which deploys the .war file to an AWS instance named dataverse-internal.iq.harvard.edu. Login to Jenkins requires a username and password. Check with Don Sizemore. Login to the dataverse-internal server requires a key, see Leonid. -Guides Server -------------- +## Guides Server + There is also a guides build project named guides.dataverse.org. Any test builds of guides are deployed to a named directory** on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. Login to the guides server requires a key, see Don Sizemore. From 3407fb9f813984c857ef7708af7d6dc239b8f8ee Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 13 Nov 2023 07:04:15 -0500 Subject: [PATCH 158/546] Add ProvIT to integration-tests.txt --- tests/integration-tests.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 18911b3164a..bb3bc7f9ce6 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,ProvIT From 2842cdaf246c531b04449ac4c8b20fc4a09c2668 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 13 Nov 2023 08:42:31 -0500 Subject: [PATCH 159/546] Move this change into BeforeAll/AfterAll --- .../edu/harvard/iq/dataverse/api/ProvIT.java | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java index 6b9b59f431d..69a87869fe1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java @@ -11,6 +11,9 @@ import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.AfterAll; + import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.notNullValue; @@ -20,22 +23,24 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; public class ProvIT { + + private static boolean provEnabled = false; @BeforeAll - public static void setUpClass() { + public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + + provEnabled = provCollectionStatus.getStatusCode() == 200; + if(!provEnabled){ + UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } } @Test public void testFreeformDraftActions() { - Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - boolean provEnabled = provCollectionStatus.getStatusCode() == 200; - if(!provEnabled){ - UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - } - Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); createDepositor.then().assertThat() @@ -90,20 +95,11 @@ public void testFreeformDraftActions() { datasetVersions.prettyPrint(); datasetVersions.then().assertThat() .body("data[0].versionState", equalTo("DRAFT")); - if(!provEnabled){ - UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - } - + } @Test - public void testAddProvFile() { - - Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - boolean provEnabled = provCollectionStatus.getStatusCode() == 200; - if(!provEnabled){ - UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - } + public void testAddProvFile() { Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); @@ -223,6 +219,11 @@ public void testAddProvFile() { deleteProvJson.then().assertThat() .statusCode(FORBIDDEN.getStatusCode()); //cannot delete json of a published dataset + + } + + @AfterAll + public static void tearDownClass() { if(!provEnabled){ UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); } From 437e7ccd480dbae405238faffb9fff8a8317218d Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 13 Nov 2023 09:56:16 -0500 Subject: [PATCH 160/546] #9464 remove unused import --- src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index fabb33e328a..557b7df202b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -128,7 +128,6 @@ import java.util.Optional; import java.util.stream.Collectors; import jakarta.servlet.http.HttpServletResponse; -import jakarta.validation.constraints.NotNull; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.StreamingOutput; From d029cacc9aae5e361869b73f7e76661c5ab8d549 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 13 Nov 2023 11:35:28 -0500 Subject: [PATCH 161/546] remove extra whitespace #10112 --- src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java index 69a87869fe1..a944c6aa926 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java @@ -27,12 +27,12 @@ public class ProvIT { private static boolean provEnabled = false; @BeforeAll - public static void setUpClass() { + public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); - + provEnabled = provCollectionStatus.getStatusCode() == 200; - if(!provEnabled){ + if (!provEnabled) { UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); } } @@ -99,7 +99,7 @@ public void testFreeformDraftActions() { } @Test - public void testAddProvFile() { + public void testAddProvFile() { Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); From c09034d638147c5cd618e5ff4a460e1840b8cd0a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 13 Nov 2023 11:37:16 -0500 Subject: [PATCH 162/546] organize imports #10112 --- .../java/edu/harvard/iq/dataverse/api/ProvIT.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java index a944c6aa926..33323ff4239 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java @@ -1,27 +1,23 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import io.restassured.RestAssured; import io.restassured.path.json.JsonPath; import io.restassured.response.Response; import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonObject; -import static jakarta.ws.rs.core.Response.Status.CREATED; -import static jakarta.ws.rs.core.Response.Status.OK; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; +import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.junit.jupiter.api.AfterAll; - +import static jakarta.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.notNullValue; - +import org.junit.jupiter.api.AfterAll; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; - public class ProvIT { private static boolean provEnabled = false; From a3d323599be4bcc6ad688a8b99135bd4447fbb02 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 13 Nov 2023 16:07:53 -0500 Subject: [PATCH 163/546] various improvements to the QA Guide #10101 --- doc/sphinx-guides/source/developers/intro.rst | 2 + .../source/developers/testing.rst | 4 + .../source/developers/version-control.rst | 2 + doc/sphinx-guides/source/qa/index.md | 4 +- doc/sphinx-guides/source/qa/manual-testing.md | 31 +++---- .../source/qa/other-approaches.md | 91 +++++++++---------- doc/sphinx-guides/source/qa/overview.md | 15 ++- .../source/qa/performance-tests.md | 6 +- .../source/qa/test-automation-integration.md | 35 ------- .../source/qa/test-automation.md | 35 +++++++ .../source/qa/testing-infrastructure.md | 12 ++- 11 files changed, 119 insertions(+), 118 deletions(-) delete mode 100644 doc/sphinx-guides/source/qa/test-automation-integration.md create mode 100644 doc/sphinx-guides/source/qa/test-automation.md diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst index a01a8066897..3eddfbe8d2d 100755 --- a/doc/sphinx-guides/source/developers/intro.rst +++ b/doc/sphinx-guides/source/developers/intro.rst @@ -37,6 +37,8 @@ Roadmap For the Dataverse Software development roadmap, please see https://www.iq.harvard.edu/roadmap-dataverse-project +.. _kanban-board: + Kanban Board ------------ diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst index abecaa09fad..57733f25406 100755 --- a/doc/sphinx-guides/source/developers/testing.rst +++ b/doc/sphinx-guides/source/developers/testing.rst @@ -426,6 +426,10 @@ target/coverage-it/index.html is the place to start reading the code coverage re Load/Performance Testing ------------------------ +See also :doc:`/qa/performance-tests` in the QA Guide. + +.. _locust: + Locust ~~~~~~ diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 31fc0a4e602..f46411ebd7f 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -34,6 +34,8 @@ The "master" Branch The "`master `_" branch represents released versions of the Dataverse Software. As mentioned in the :doc:`making-releases` section, at release time we update the master branch to include all the code for that release. Commits are never made directly to master. Rather, master is updated only when we merge code into it from the "develop" branch. +.. _develop-branch: + The "develop" Branch ******************** diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md index c190d823bef..08deb7ee27d 100644 --- a/doc/sphinx-guides/source/qa/index.md +++ b/doc/sphinx-guides/source/qa/index.md @@ -5,6 +5,6 @@ overview.md testing-infrastructure.md performance-tests.md manual-testing.md -test-automation-integration.md +test-automation.md other-approaches.md -``` \ No newline at end of file +``` diff --git a/doc/sphinx-guides/source/qa/manual-testing.md b/doc/sphinx-guides/source/qa/manual-testing.md index bf6f16f7911..9f365aae59f 100644 --- a/doc/sphinx-guides/source/qa/manual-testing.md +++ b/doc/sphinx-guides/source/qa/manual-testing.md @@ -9,23 +9,23 @@ We use a risk-based, manual testing approach to achieve the most benefit with li If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by the Design group and by the community. -## Examining a Pull Pequest for Test Cases: +## Examining a Pull Request for Test Cases -### What Problem Does it Solve? +### What Problem Does It Solve? -Read the top part of the pull request for a description, notes for reviewers, and usually a how-to test section. Does it make sense? If not, read the underlying ticket it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it. +Read the top part of the pull request for a description, notes for reviewers, and usually a "how to test" section. Does it make sense? If not, read the underlying issue it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it. -### How is it Configured? +### How is It Configured? -Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. An admin will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! +Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. A sysadmin or superuser will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! -Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields are added, try the export feature. Of course, try the suggestions under how to test. Those may be sufficient, but you should always think about it based on what it does. +Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields are added, try the export feature. Of course, try the suggestions under "how to test." Those may be sufficient, but you should always think about the pull request based on what it does. Try adding, modifying, and deleting any objects involved. This is probably covered by using the feature but a good basic approach to keep in mind. -Make sure any server logging is appropriate. You should tail the server log while running your tests. Watch for unreported errors or stack traces especially chatty logging. If you do find a bug you will need to report the stack trace from the server.log +Make sure any server logging is appropriate. You should tail the server log while running your tests. Watch for unreported errors or stack traces especially chatty logging. If you do find a bug you will need to report the stack trace from the server.log. Err on the side of providing the developer too much of server.log rather than too little. -Exercise the UI if there is one. I tend to use Chrome for most of my basic testing as it’s used twice as much as the next most commonly used browser, according to our site’s Google Analytics. I first go through all the options in the UI. Then, if all works, I’ll spot-check using Firefox and Safari. +Exercise the UI if there is one. We tend to use Chrome for most of my basic testing as it's used twice as much as the next most commonly used browser, according to our site's Google Analytics. First go through all the options in the UI. Then, if all works, spot-check using Firefox and Safari. Check permissions. Is this feature limited to a specific set of users? Can it be accessed by a guest or by a non-privileged user? How about pasting a privileged page URL into a non-privileged user’s browser? @@ -33,11 +33,10 @@ Think about risk. Is the feature or function part of a critical area such as per ## Smoke Test - -1. Go to the homepage on https://dataverse-internal.iq.harvard.edu. Scroll to the bottom to ensure the build number is the one you intend to test from Jenkins. -2. Create a new user: I use a formulaic name with my initials and date and make the username and password the same, eg. kc080622. -3. Create a dataverse: I use the same username -4. Create a dataset: I use the same username; I fill in the required fields (I do not use a template). -5. Upload 3 different types of files: I use a tabular file, 50by1000.dta, an image file, and a text file. -6. Publish the dataset. -7. Download a file. +1. Go to the homepage on . Scroll to the bottom to ensure the build number is the one you intend to test from Jenkins. +1. Create a new user: It's fine to use a formulaic name with your initials and date and make the username and password the same, eg. kc080622. +1. Create a dataverse: You can use the same username. +1. Create a dataset: You can use the same username; fill in the required fields (do not use a template). +1. Upload 3 different types of files: You can use a tabular file, 50by1000.dta, an image file, and a text file. +1. Publish the dataset. +1. Download a file. diff --git a/doc/sphinx-guides/source/qa/other-approaches.md b/doc/sphinx-guides/source/qa/other-approaches.md index b50d9d0cf11..cf679c3f442 100644 --- a/doc/sphinx-guides/source/qa/other-approaches.md +++ b/doc/sphinx-guides/source/qa/other-approaches.md @@ -1,125 +1,120 @@ -# Other approaches to deploying and testing +# Other Approaches to Deploying and Testing ```{contents} :depth: 3 ``` -This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. I’m assuming if a developer is testing, they would likely just deploy to their dev environment. That might be ok but not sure the env is fully configured enough to offer a real-world testing scenario. An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built. +This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built. -## Tips and tricks +## Tips and Tricks - -- Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (I have one on Google Drive). This potentially will help with future testing. -- When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice. -- Always tail the server.log file while testing. Open a terminal window to the test instance and tail -F server.log. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure. -- When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done. -- When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked. -- When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it. -- Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup. +- Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing. +- When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice. +- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure. +- When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done. +- When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked. +- When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it. +- Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup. ## Workflow for Completing QA on a PR +1. Assign the PR you are working on to yourself. -1. Assign the PR you are working on to yourself. - -2. What does it do? +1. What does it do? Read the description at the top of the PR, any release notes, documentation, and the original issue. -3. Does it address the issue it closes? +1. Does it address the issue it closes? The PR should address the issue entirely unless otherwise noted. -4. How do you test it? +1. How do you test it? - Look at the “how to test section†at the top of the pull request. Does it make sense? This likely won’t be the only testing you perform. You can develop further tests from the original issue or problem description, from the description of functionality, the documentation, configuration, and release notes. Also consider trying to reveal bugs by trying to break it: try bad or missing data, very large values or volume of data, exceed any place that may have a limit or boundary. + Look at the “how to test" section at the top of the pull request. Does it make sense? This likely won’t be the only testing you perform. You can develop further tests from the original issue or problem description, from the description of functionality, the documentation, configuration, and release notes. Also consider trying to reveal bugs by trying to break it: try bad or missing data, very large values or volume of data, exceed any place that may have a limit or boundary. -5. Does it have or need documentation? +1. Does it have or need documentation? - Small changes or fixes usually don’t have doc but new features or extensions of a feature or new configuration options should have documentation. + Small changes or fixes usually don’t have docs but new features or extensions of a feature or new configuration options should have documentation. -6. Does it have or need release notes? +1. Does it have or need release notes? Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed. -7. Does it use a DB, flyway script? +1. Does it use a DB, Flyway script? Good to know since it may collide with another existing one by version or it could be a one way transform of your DB so back up your test DB before. Also, happens during deployment so be on the lookout for any issues. -8. Validate the documentation. +1. Validate the documentation. Build the doc using Jenkins, does it build without errors? Read it through for sense. Use it for test cases and to understand the feature. -9. Build and deploy the pull request. +1. Build and deploy the pull request. Normally this is done using Jenkins and automatically deployed to the QA test machine. -10. Configure if required +1. Configure if required If needed to operate and everyone installing or upgrading will use this, configure now as all testing will use it. -11. Smoke test the branch. +1. Smoke test the branch. Standard, minimal test of core functionality. -12. Regression test-related or potentially affected features +1. Regression test-related or potentially affected features If config is optional and testing without config turned on, do some spot checks/ regression tests of related or potentially affected areas. -13. Configure if optional +1. Configure if optional What is the default, enabled or disabled? Is that clearly indicated? Test both. By config here we mean enabling the functionality versus choosing a particular config option. Some complex features have config options in addition to enabling. Those will also need to be tested. -14. Test all the new or changed functionality. +1. Test all the new or changed functionality. The heart of the PR, what is this PR adding or fixing? Is it all there and working? -15. Regression test related or potentially affected features. +1. Regression test related or potentially affected features. - Sometimes new stuff modifies and extends other functionality or functionality that is shared with other aspects of the system, e.g. Export, Import. Check the underlying functionality that was also modified but in a spot check or briefer manner. + Sometimes new stuff modifies and extends other functionality or functionality that is shared with other aspects of the system, e.g. export, import. Check the underlying functionality that was also modified but in a spot check or briefer manner. -16. Report any issues found within the PR +1. Report any issues found within the PR It can be easy to lose track of what you’ve found, steps to reproduce, and any errors or stack traces from the server log. Add these in a numbered list to a comment in the pr. Easier to check off when fixed and to work on. Add large amounts of text as in the server log as attached, meaningfully named files. -17. Retest all fixes, spot check feature functionality, smoke test +1. Retest all fixes, spot check feature functionality, smoke test Similar to your initial testing, it is only narrower. -18. Test Upgrade Instructions, if required +1. Test upgrade instructions, if required Some features build upon the existing architecture but require modifications, such as adding a new column to the DB or changing or adding data. It is crucial that this works properly for our 100+ installations. This testing should be performed at the least on the prior version with basic data objects (collection, dataset, files) and any other data that will be updated by this feature. Using the sample data from the prior version would be good or deploying to dataverse-internal and upgrading there would be a good test. Remember to back up your DB before doing a transformative upgrade so that you can repeat it later if you find a bug. -19. Make sure the integration tests in the PR have been completed and passed. - +1. Make sure the API tests in the PR have been completed and passed. + They are run with each commit to the PR and take approximately 42 minutes to run. -20. Merge PR +1. Merge PR Click merge to include this PR into the common develop branch. -21. Delete merged branch +1. Delete merged branch Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions. ## Checklist for Completing QA on a PR - 1. Build the docs -2. Smoke test the pr -3. Test the new functionality -4. Regression test -5. Test any upgrade instructions +1. Smoke test the pr +1. Test the new functionality +1. Regression test +1. Test any upgrade instructions ## Checklist for QA on Release - -1. Review Consolidated Release Notes, in particular upgrade instructions. -2. Conduct performance testing and compare with the previous release. -3. Perform clean install and smoke test. -4. Potentially follow upgrade instructions. Though they have been performed incrementally for each PR, the sequence may need checking - +1. Review Consolidated Release Notes, in particular upgrade instructions. +1. Conduct performance testing and compare with the previous release. +1. Perform clean install and smoke test. +1. Potentially follow upgrade instructions. Though they have been performed incrementally for each PR, the sequence may need checking diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md index 51b38ee0921..d3364fbbbf9 100644 --- a/doc/sphinx-guides/source/qa/overview.md +++ b/doc/sphinx-guides/source/qa/overview.md @@ -6,11 +6,11 @@ ## Introduction -This document describes the testing process used by QA at IQSS and provides a guide for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community. +This guide describes the testing process used by QA at IQSS and provides a reference for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community. ## Workflow -The basic workflow is bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub Kanban board. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common develop branch and ultimately released as part of the product. Before a pull request is merged it must be reviewed by a member of the development team from a coding perspective, it must pass automated integration tests before moving to QA. There it is tested manually, exercising the UI using three common browser types and any business logic it implements. Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions are tested. Once this passes and any bugs that are found are corrected, the automated integration tests are confirmed to be passing, the PR is merged into development, the PR is closed, and the branch is deleted. At this point, the pr moves from the QA column automatically into the Done column and the process repeats with the next pr until it is decided to make a release. +The basic workflow is as follows. Bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub {ref}`kanban-board`. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common {ref}`develop branch ` and ultimately released as part of the product. Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements. Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Done column and the process repeats with the next PR until it is decided to {doc}`make a release `. ## Release Cadence and Sprints @@ -20,13 +20,10 @@ The decision to make a release can be based on the time since the last release, ## Performance Testing and Deployment -The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named Locust, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds (I believe), it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page. +The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named {ref}`Locust `, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds, it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product, and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page. -Once the performance has been tested and recorded in a Google spreadsheet for this proposed version, the release will be prepared and posted. +Once the performance has been tested and recorded in a [Google spreadsheet](https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit?usp=sharing) for this proposed version, the release will be prepared and posted. -Preparing the release consists of writing and reviewing the release notes compiled from individual notes in PRs that have been merged for this release. A PR is made for the notes and merged. Next, increment the version numbers in certain code files, produce a PR with those changes, and merge that into the common development branch. Last, a PR is made to merge and develop into the master branch. Once that is merged a guide build with the new release version is made from the master branch. Last, a release war file is built from the master and an installer is built from the master branch and includes the newly built war file. - -Publishing the release consists of creating a new draft release on GitHub, posting the release notes, uploading the .war file and the installer .zip file, and any ancillary files used to configure this release. The latest link for the guides should be updated on the guides server to point to the newest version. Once that is all in place, specify the version name and the master branch at the top of the GitHub draft release and publish. This will tag the master branch with the version number and make the release notes and files available to the public. - -Once released, post to Dataverse general about the release and when possible, deploy to demo and production. +## Making a Release +See {doc}`/developers/making-releases` in the Developer Guide. diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md index 7075d7f1776..a5981dcfbe9 100644 --- a/doc/sphinx-guides/source/qa/performance-tests.md +++ b/doc/sphinx-guides/source/qa/performance-tests.md @@ -10,7 +10,7 @@ To run performance tests, we have a performance test cluster on AWS that employs ## Access -Access to performance cluster instances requires ssh keys, see Leonid. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. +Access to performance cluster instances requires ssh keys. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. ## Special Notes âš ï¸ @@ -19,6 +19,4 @@ Please note the performance database is also used occasionally by Julian and the Executing the Performance Script -------------------------------- -To execute the performance test script, you need to install a local copy of the database-helper-scripts project (https://github.com/IQSS/dataverse-helper-scripts), written by Raman. I have since produced a stripped-down script that calls just the DB and ds and works with python3. - -The automated integration test runs happen on each commit to a PR on an AWS instance and should be reviewed to be passing before merging into development. Their status can be seen on the PR page near the bottom, above the merge button. See Don Sizemore or Phil for questions. +To execute the performance test script, you need to install a local copy of the database-helper-scripts project at . We have since produced a stripped-down script that calls just the DB and ds and works with python3. diff --git a/doc/sphinx-guides/source/qa/test-automation-integration.md b/doc/sphinx-guides/source/qa/test-automation-integration.md deleted file mode 100644 index 5e9d00cd461..00000000000 --- a/doc/sphinx-guides/source/qa/test-automation-integration.md +++ /dev/null @@ -1,35 +0,0 @@ -# Test automation and integration test - -```{contents} -:depth: 3 -``` - -This test suite is added to and maintained by development. It is generally advisable for code contributors to add integration tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base’s code paths as possible, every time to catch bugs. - -This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency. - -## Building and Deploying a Pull Request from Jenkins to Dataverse-Internal: - - -1. Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. - -2. From the pull request page, click the copy icon next to the pull request branch name. - -3. Log on to jenkins.dataverse.org, select the IQSS_Dataverse_Internal project, and configure the repository URL and branch specifier to match the ones from the pull request. For example: - - * 8372-gdcc-xoai-library has IQSS implied - - **Repository URL:** https://github.com/IQSS/dataverse.git - - **Branch specifier:** */8372-gdcc-xoai-library - * GlobalDataverseCommunityConsortium:GDCC/DC-3B - - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git - - **Branch specifier:** */GDCC/DC-3B. - -4. Click Build Now and note the build number in progress. - -5. Once complete, go to https://dataverse-internal.iq.harvard.edu and check that the deployment succeeded, and that the homepage displays the latest build number. - -6. If for some reason it didn’t deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (su - dataverse, /usr/local/payara5/bin/asadmin list-applications, /usr/local/payara5/bin/asadmin undeploy dataverse-5.11.1, /usr/local/payara5/bin/asadmin deploy /tmp/dataverse-5.11.1.war) - -7. If that didn’t work, you may have run into a flyway DB script collision error but that should be indicated by the server.log - -8. Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to tail -F server.log in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting. \ No newline at end of file diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md new file mode 100644 index 00000000000..ba8e5296d47 --- /dev/null +++ b/doc/sphinx-guides/source/qa/test-automation.md @@ -0,0 +1,35 @@ +# Test Automation + +```{contents} +:depth: 3 +``` + +The API test suite is added to and maintained by development. (See {doc}`/developers/testing` in the Developer Guide.) It is generally advisable for code contributors to add API tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base's code paths as possible, every time to catch bugs. + +This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency. + +## Building and Deploying a Pull Request from Jenkins to Dataverse-Internal + + +1. Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. + +1. From the pull request page, click the copy icon next to the pull request branch name. + +1. Log on to , select the `IQSS_Dataverse_Internal` project, and configure the repository URL and branch specifier to match the ones from the pull request. For example: + + * 8372-gdcc-xoai-library has IQSS implied + - **Repository URL:** https://github.com/IQSS/dataverse.git + - **Branch specifier:** */8372-gdcc-xoai-library + * GlobalDataverseCommunityConsortium:GDCC/DC-3B + - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git + - **Branch specifier:** */GDCC/DC-3B. + +1. Click "Build Now" and note the build number in progress. + +1. Once complete, go to and check that the deployment succeeded, and that the homepage displays the latest build number. + +1. If for some reason it didn’t deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara5/bin/asadmin list-applications; /usr/local/payara5/bin/asadmin undeploy dataverse-5.11.1; /usr/local/payara5/bin/asadmin deploy /tmp/dataverse-5.11.1.war`) + +1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. + +1. Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting. diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md index fb66bc4d099..45b3b360ac7 100644 --- a/doc/sphinx-guides/source/qa/testing-infrastructure.md +++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md @@ -6,10 +6,14 @@ ## Dataverse Internal -To build and test a PR, we use a build named IQSS_Dataverse_Internal on jenkins.dataverse.org, which deploys the .war file to an AWS instance named dataverse-internal.iq.harvard.edu. -Login to Jenkins requires a username and password. Check with Don Sizemore. Login to the dataverse-internal server requires a key, see Leonid. +To build and test a PR, we use a build named `IQSS_Dataverse_Internal` on , which deploys the .war file to an AWS instance named . ## Guides Server -There is also a guides build project named guides.dataverse.org. Any test builds of guides are deployed to a named directory** on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. -Login to the guides server requires a key, see Don Sizemore. +There is also a guides build project named `guides.dataverse.org`. Any test builds of guides are deployed to a named directory on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. + +Note that changes to guides can also be previewed on Read the Docs. In the pull request, look for a link like . This Read the Docs preview is also mentioned under also {doc}`/developers/documentation`. + +## Other Servers + +We can spin up additional AWS EC2 instances as needed. See {doc}`/developers/deployment` in the Developer Guide for the scripts we use. From 7650eb308ed5cb8805981e77b252ceb2e3c760c2 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 13 Nov 2023 16:35:25 -0500 Subject: [PATCH 164/546] Removes the title from content and add label --- doc/sphinx-guides/source/qa/manual-testing.md | 3 ++- doc/sphinx-guides/source/qa/other-approaches.md | 3 ++- doc/sphinx-guides/source/qa/overview.md | 3 ++- doc/sphinx-guides/source/qa/performance-tests.md | 3 ++- doc/sphinx-guides/source/qa/test-automation.md | 3 ++- doc/sphinx-guides/source/qa/testing-infrastructure.md | 3 ++- 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/qa/manual-testing.md b/doc/sphinx-guides/source/qa/manual-testing.md index 9f365aae59f..580e5153394 100644 --- a/doc/sphinx-guides/source/qa/manual-testing.md +++ b/doc/sphinx-guides/source/qa/manual-testing.md @@ -1,6 +1,7 @@ # Manual Testing Approach -```{contents} +```{contents} Contents: +:local: :depth: 3 ``` ## Introduction diff --git a/doc/sphinx-guides/source/qa/other-approaches.md b/doc/sphinx-guides/source/qa/other-approaches.md index cf679c3f442..2e2ef906191 100644 --- a/doc/sphinx-guides/source/qa/other-approaches.md +++ b/doc/sphinx-guides/source/qa/other-approaches.md @@ -1,6 +1,7 @@ # Other Approaches to Deploying and Testing -```{contents} +```{contents} Contents: +:local: :depth: 3 ``` diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md index d3364fbbbf9..c4f66446ca3 100644 --- a/doc/sphinx-guides/source/qa/overview.md +++ b/doc/sphinx-guides/source/qa/overview.md @@ -1,6 +1,7 @@ # Overview -```{contents} +```{contents} Contents: +:local: :depth: 3 ``` diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md index a5981dcfbe9..f433226d4ff 100644 --- a/doc/sphinx-guides/source/qa/performance-tests.md +++ b/doc/sphinx-guides/source/qa/performance-tests.md @@ -1,6 +1,7 @@ # Performance Testing -```{contents} +```{contents} Contents: +:local: :depth: 3 ``` diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md index ba8e5296d47..c2b649df498 100644 --- a/doc/sphinx-guides/source/qa/test-automation.md +++ b/doc/sphinx-guides/source/qa/test-automation.md @@ -1,6 +1,7 @@ # Test Automation -```{contents} +```{contents} Contents: +:local: :depth: 3 ``` diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md index 45b3b360ac7..7a4bda626fc 100644 --- a/doc/sphinx-guides/source/qa/testing-infrastructure.md +++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md @@ -1,6 +1,7 @@ # Infrastructure for Testing -```{contents} +```{contents} Contents: +:local: :depth: 3 ``` From 75789e0f94d36fce1270b0714bd5e516f356d8ee Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 13 Nov 2023 19:06:26 -0500 Subject: [PATCH 165/546] current state of the flyway script (work in progress/likely to change) #8549 --- .../V6.0.0.3__8549-collection-quotas.sql | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql diff --git a/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql new file mode 100644 index 00000000000..f74d9bebe30 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql @@ -0,0 +1,70 @@ +-- Storage size column added: +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS storagesize BIGINT; + +-- (work in progress! the table structure may change/the column may be moved out into +-- its own table. but the mechanics of the recursion are working) + +-- The somewhat convoluted queries below populate the storage sizes for the entire +-- DvObject tree, fast. It IS possible, to do it all with one recursive PostgresQL +-- query, that will crawl the tree from the leaves (DataFiles) up and add up the +-- sizes for all the Datasets/Collections above. Unfortunately, that takes some hours +-- on a database the size of the one at IQSS. So what we are doing instead is compute +-- the total sizes of all the *directly* linked objects, with 3 linear queries. This +-- will correctly calculate the sizes of all the Datasets (since they can only +-- contain DataFiles, directly, without any extra hierarchy possible) and those +-- Collections that only contain Datasets; but not the sizes of Collections that +-- have sub-collections. To take any sub-collections into account we are then running +-- a recursive query - but then we only need to run it on the tree of Collections only, +-- which should make it manageably fast on any real life instance. + +UPDATE dvobject SET storagesize=0; +-- For datafiles, the storage size = main file size by default: +-- (we are excluding any harvested files) +UPDATE dvobject SET storagesize=COALESCE(f.filesize,0) FROM datafile f, dataset d WHERE f.id = dvobject.id AND dvobject.owner_id = d.id AND d.harvestingclient_id IS null; +-- ... but for ingested tabular files the size of the saved original needs to be added, since +-- those also take space: +-- (should be safe to assume that there are no *harvested ingested* files) +UPDATE dvobject SET storagesize=dvobject.storagesize + COALESCE(datatable.originalFileSize,0) FROM datatable WHERE datatable.datafile_id = dvobject.id; +-- Now we can calculate storage sizes of each individual dataset (a simple sum +-- of the storage sizes of all the files in the dataset): +-- (excluding the harvested datasets; this is less important, since there should be +-- significantly fewer datasets than files, but might as well) +UPDATE dvobject SET storagesize=o.combinedStorageSize +FROM (SELECT datasetobject.id, SUM(fileobject.storagesize) AS combinedStorageSize +FROM dvobject fileobject, dvobject datasetobject +WHERE fileobject.owner_id = datasetobject.id +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; +-- ... and then we can repeat the same for collections, by setting the storage size +-- to the sum of the storage sizes of the datasets *directly* in each collection: +-- (no attemp is made yet to recursively count the sizes all the chilld sub-collections) +UPDATE dvobject SET storagesize=o.combinedStorageSize +FROM (SELECT collectionobject.id, SUM(datasetobject.storagesize) AS combinedStorageSize +FROM dvobject datasetobject, dvobject collectionobject +WHERE datasetobject.owner_id = collectionobject.id +AND datasetobject.storagesize IS NOT null +GROUP BY collectionobject.id) o WHERE o.id = dvobject.id AND dvobject.dtype='Dataverse'; + +-- And now we will update the storage sizes of all the Collection ("Dataverse") objects +-- that contain sub-collections, *recursively*, to add their sizes to the totals: +WITH RECURSIVE treestorage (id, owner_id, storagesize, dtype) AS +( + -- All dataverses: + SELECT id, owner_id, storagesize, dtype + FROM dvobject + WHERE dtype = 'Dataverse' + + UNION + + -- Recursive Member: + SELECT dvobject.id, treestorage.owner_id, dvobject.storagesize, treestorage.dtype + FROM treestorage, dvobject + WHERE treestorage.id = dvobject.owner_id + AND dvobject.dtype = 'Dataverse' +) + +UPDATE dvobject SET storagesize=storagesize+(SELECT COALESCE(SUM(storagesize),0) +FROM treestorage WHERE owner_id=dvobject.id) +--FROM treestorage ts +--WHERE ts.owner_id=dvobject.id +WHERE dvobject.dtype = 'Dataverse' +AND dvobject.id IN (SELECT owner_id FROM treestorage WHERE owner_id IS NOT null); From c49036bf3d67d22cec384a8fe4f7cb23ed3d9a46 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 15 Nov 2023 12:06:43 +0000 Subject: [PATCH 166/546] Added: includeDeaccessioned support to getDatasetVersionCitation API endpoint --- .../harvard/iq/dataverse/api/Datasets.java | 9 ++++++-- .../harvard/iq/dataverse/api/DatasetsIT.java | 21 ++++++++++++++++++- .../edu/harvard/iq/dataverse/api/UtilIT.java | 3 ++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 292aba0cee3..68c618b0f1f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3995,9 +3995,14 @@ public Response getPrivateUrlDatasetVersionCitation(@PathParam("privateUrlToken" @GET @AuthRequired @Path("{id}/versions/{versionId}/citation") - public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { return response(req -> ok( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getCitation(true, false)), getRequestUser(crc)); + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned).getCitation(true, false)), getRequestUser(crc)); } @POST diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 56bf53c1c99..d20f1e8a58b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3371,13 +3371,32 @@ public void getDatasetVersionCitation() { createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); - Response getDatasetVersionCitationResponse = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_DRAFT, apiToken); + Response getDatasetVersionCitationResponse = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_DRAFT, false, apiToken); getDatasetVersionCitationResponse.prettyPrint(); getDatasetVersionCitationResponse.then().assertThat() .statusCode(OK.getStatusCode()) // We check that the returned message contains information expected for the citation string .body("data.message", containsString("DRAFT VERSION")); + + // Test Deaccessioned + Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // includeDeaccessioned false + Response getDatasetVersionCitationNotDeaccessioned = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, false, apiToken); + getDatasetVersionCitationNotDeaccessioned.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + // includeDeaccessioned true + Response getDatasetVersionCitationDeaccessioned = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, true, apiToken); + getDatasetVersionCitationDeaccessioned.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", containsString("DEACCESSIONED VERSION")); } @Test diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e3a7fd0cfc3..2336bf8beb8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3345,10 +3345,11 @@ static Response getPrivateUrlDatasetVersionCitation(String privateUrlToken) { return response; } - static Response getDatasetVersionCitation(Integer datasetId, String version, String apiToken) { + static Response getDatasetVersionCitation(Integer datasetId, String version, boolean includeDeaccessioned, String apiToken) { Response response = given() .header(API_TOKEN_HTTP_HEADER, apiToken) .contentType("application/json") + .queryParam("includeDeaccessioned", includeDeaccessioned) .get("/api/datasets/" + datasetId + "/versions/" + version + "/citation"); return response; } From 75ff2fbad275a4543525ac0dc62f65d3eaa0e5c1 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 15 Nov 2023 12:10:14 +0000 Subject: [PATCH 167/546] Added: API docs for #10104 --- doc/sphinx-guides/source/api/native-api.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 1992390410c..2e3a0b2af08 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2502,6 +2502,16 @@ Get Citation curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER" +By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below. + +If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``. + +Usage example: + +.. code-block:: bash + + curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER&includeDeaccessioned=true" + Get Citation by Private URL Token ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From be631af6e5fd5dd181aebdb0ee8a2dd1da3ff789 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 15 Nov 2023 12:12:31 +0000 Subject: [PATCH 168/546] Added: release notes for #10104 --- doc/release-notes/10104-dataset-citation-deaccessioned.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/10104-dataset-citation-deaccessioned.md diff --git a/doc/release-notes/10104-dataset-citation-deaccessioned.md b/doc/release-notes/10104-dataset-citation-deaccessioned.md new file mode 100644 index 00000000000..0ba06d729c4 --- /dev/null +++ b/doc/release-notes/10104-dataset-citation-deaccessioned.md @@ -0,0 +1 @@ +The getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. From 2fb81f6b5e1a5c735b937600b0dd74ee47d236a1 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 15 Nov 2023 10:01:52 -0500 Subject: [PATCH 169/546] altering circuit breakers for qa --- conf/solr/9.3.0/solrconfig.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/solr/9.3.0/solrconfig.xml b/conf/solr/9.3.0/solrconfig.xml index b89315cdaa9..9705faa7009 100644 --- a/conf/solr/9.3.0/solrconfig.xml +++ b/conf/solr/9.3.0/solrconfig.xml @@ -588,10 +588,10 @@ check for "Circuit Breakers tripped" in logs and the corresponding error message should tell you what transpired (if the failure was caused by tripped circuit breakers). --> - + 5 + - + 5 + - + + - + + From 74d36b64d0fc36afafa5382952050239737ebe1a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 16 Nov 2023 11:24:30 -0500 Subject: [PATCH 171/546] #9686 preliminary check in --- .../java/edu/harvard/iq/dataverse/Dataset.java | 14 +------------- .../java/edu/harvard/iq/dataverse/DvObject.java | 17 +++++++++++++++++ .../V6.0.0.3__9686-move-harvestingclient-id.sql | 8 ++++++++ 3 files changed, 26 insertions(+), 13 deletions(-) create mode 100644 src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 245bdf0efd2..ad72ada20e9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -742,21 +742,9 @@ public void setDatasetExternalCitations(List datasetEx this.datasetExternalCitations = datasetExternalCitations; } - @ManyToOne - @JoinColumn(name="harvestingClient_id") - private HarvestingClient harvestedFrom; - - public HarvestingClient getHarvestedFrom() { - return this.harvestedFrom; - } - public void setHarvestedFrom(HarvestingClient harvestingClientConfig) { - this.harvestedFrom = harvestingClientConfig; - } - public boolean isHarvested() { - return this.harvestedFrom != null; - } + private String harvestIdentifier; diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 9e7f3f3fe96..16237203d78 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import java.sql.Timestamp; @@ -351,6 +352,22 @@ public GlobalId getGlobalId() { return globalId; } + @ManyToOne + @JoinColumn(name="harvestingClient_id") + private HarvestingClient harvestedFrom; + + public HarvestingClient getHarvestedFrom() { + return this.harvestedFrom; + } + + public void setHarvestedFrom(HarvestingClient harvestingClientConfig) { + this.harvestedFrom = harvestingClientConfig; + } + + public boolean isHarvested() { + return this.harvestedFrom != null; + } + public abstract T accept(Visitor v); @Override diff --git a/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql new file mode 100644 index 00000000000..23d66701b99 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql @@ -0,0 +1,8 @@ +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS harvestingclient_id BIGINT; + +update dvobject dvo set harvestingclient_id = s.harvestingclient_id from +(select id, harvestingclient_id from dataset d) s +where s.id = dvo.id; + +--ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id; + From 5c045120d6660ee0b07501cadfb06aaf9f083f6b Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 16 Nov 2023 13:42:51 -0500 Subject: [PATCH 172/546] #9686 rename migration script --- ...lient-id.sql => V6.0.0.4__9686-move-harvestingclient-id.sql} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename src/main/resources/db/migration/{V6.0.0.3__9686-move-harvestingclient-id.sql => V6.0.0.4__9686-move-harvestingclient-id.sql} (72%) diff --git a/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.0.0.4__9686-move-harvestingclient-id.sql similarity index 72% rename from src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql rename to src/main/resources/db/migration/V6.0.0.4__9686-move-harvestingclient-id.sql index 23d66701b99..0e4c9a58a93 100644 --- a/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql +++ b/src/main/resources/db/migration/V6.0.0.4__9686-move-harvestingclient-id.sql @@ -1,7 +1,7 @@ ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS harvestingclient_id BIGINT; update dvobject dvo set harvestingclient_id = s.harvestingclient_id from -(select id, harvestingclient_id from dataset d) s +(select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s where s.id = dvo.id; --ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id; From a376b4e3f4bacc8dc651b7048d9a323535dc92f7 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 17 Nov 2023 10:01:33 -0500 Subject: [PATCH 173/546] Add condition for 401 when a invalid key is provided and create changelog on API Guide --- doc/sphinx-guides/source/api/changelog.rst | 13 +++++++++++++ doc/sphinx-guides/source/api/index.rst | 1 + .../java/edu/harvard/iq/dataverse/api/AccessIT.java | 11 ++++++----- 3 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 doc/sphinx-guides/source/api/changelog.rst diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst new file mode 100644 index 00000000000..b78d268db33 --- /dev/null +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -0,0 +1,13 @@ +API Changelog +============= + +.. contents:: |toctitle| + :local: + :depth: 1 + +6.0.0 +----- + +Changes +~~~~~~~ + - **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. diff --git a/doc/sphinx-guides/source/api/index.rst b/doc/sphinx-guides/source/api/index.rst index c9e79098546..dd195aa9d62 100755 --- a/doc/sphinx-guides/source/api/index.rst +++ b/doc/sphinx-guides/source/api/index.rst @@ -24,3 +24,4 @@ API Guide linkeddatanotification apps faq + changelog \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java index 42e21e53101..d08f916243f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java @@ -198,6 +198,8 @@ public void testDownloadSingleFile() { //Not logged in non-restricted Response anonDownloadOriginal = UtilIT.downloadFileOriginal(tabFile1Id); Response anonDownloadConverted = UtilIT.downloadFile(tabFile1Id); + Response anonDownloadConvertedNullKey = UtilIT.downloadFile(tabFile1Id, null); + // ... and download the same tabular data file, but without the variable name header added: Response anonDownloadTabularNoHeader = UtilIT.downloadTabularFileNoVarHeader(tabFile1Id); // ... and download the same tabular file, this time requesting the "format=tab" explicitly: @@ -206,6 +208,8 @@ public void testDownloadSingleFile() { assertEquals(OK.getStatusCode(), anonDownloadConverted.getStatusCode()); assertEquals(OK.getStatusCode(), anonDownloadTabularNoHeader.getStatusCode()); assertEquals(OK.getStatusCode(), anonDownloadTabularWithFormatName.getStatusCode()); + assertEquals(UNAUTHORIZED.getStatusCode(), anonDownloadConvertedNullKey.getStatusCode()); + int origSizeAnon = anonDownloadOriginal.getBody().asByteArray().length; int convertSizeAnon = anonDownloadConverted.getBody().asByteArray().length; int tabularSizeNoVarHeader = anonDownloadTabularNoHeader.getBody().asByteArray().length; @@ -423,10 +427,7 @@ private HashMap readZipResponse(InputStream iStrea } String name = entry.getName(); -// String s = String.format("Entry: %s len %d added %TD", -// entry.getName(), entry.getSize(), -// new Date(entry.getTime())); -// System.out.println(s); + // Once we get the entry from the zStream, the zStream is // positioned read to read the raw data, and we keep @@ -466,7 +467,7 @@ private HashMap readZipResponse(InputStream iStrea @Test public void testRequestAccess() throws InterruptedException { - + String pathToJsonFile = "scripts/api/data/dataset-create-new.json"; Response createDatasetResponse = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken); createDatasetResponse.prettyPrint(); From 63725d75c115352ff9d0bb94f2e5b6b4d7ca5d05 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 11:07:17 -0500 Subject: [PATCH 174/546] remove cruft: mdc logs #9115 --- mdc-logs/raw-mdc-2019-01-07.log | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 mdc-logs/raw-mdc-2019-01-07.log diff --git a/mdc-logs/raw-mdc-2019-01-07.log b/mdc-logs/raw-mdc-2019-01-07.log deleted file mode 100644 index d7a6386160e..00000000000 --- a/mdc-logs/raw-mdc-2019-01-07.log +++ /dev/null @@ -1,6 +0,0 @@ -#Fields: event_time client_ip session_cookie_id user_cookie_id user_id request_url identifier filename size user-agent title publisher publisher_id authors publication_date version other_id target_url publication_year -2019-01-07T15:14:51-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:15:15-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:16:04-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV - - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:16:14-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV 168298bae7c-2c5bbc1a9c8c 1 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 -2019-01-07T15:16:19-0500 0:0:0:0:0:0:0:1 9f4209d3c177d3cb77f4d06cf3ba - :guest http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV doi:10.5072/FK2/XTT5BV 168298bb8ce-337d8df49763 4026 Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 Dataset One - 1 Smith, Robert| Kew, Susie 2019-01-07T18:20:54Z 1 - http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV 2019 From 2433114ec7b8430753bc730056a07e24ac0bb5d3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 11:20:03 -0500 Subject: [PATCH 175/546] fix bullet #10060 #10070 --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index b78d268db33..a1cffd84f33 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -10,4 +10,4 @@ API Changelog Changes ~~~~~~~ - - **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. +- **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. From e0350e735551270f9bd23bfa226b6946282df467 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 17 Nov 2023 11:38:53 -0500 Subject: [PATCH 176/546] Change 6.0.0 to 6.0 --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index a1cffd84f33..086ff4a20e5 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -5,7 +5,7 @@ API Changelog :local: :depth: 1 -6.0.0 +6.0 ----- Changes From 437e3b94edf89a2245310709c07d8238c0df4235 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Fri, 17 Nov 2023 11:42:17 -0500 Subject: [PATCH 177/546] Update doc/sphinx-guides/source/api/changelog.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 086ff4a20e5..2698ba3debf 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -10,4 +10,4 @@ API Changelog Changes ~~~~~~~ -- **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. +- **/api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. From 640f69e39f71244b9ba1d7f534180a6b4c8b58cc Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 13:19:14 -0500 Subject: [PATCH 178/546] add release note for API changelog #10060 --- doc/release-notes/10060-api-changelog.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/10060-api-changelog.md diff --git a/doc/release-notes/10060-api-changelog.md b/doc/release-notes/10060-api-changelog.md new file mode 100644 index 00000000000..56ac96e3564 --- /dev/null +++ b/doc/release-notes/10060-api-changelog.md @@ -0,0 +1,3 @@ +We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html + +See also #10060. From 83a66aac65db2f7634b3917d332b0e4253be3c84 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Fri, 17 Nov 2023 14:55:58 -0500 Subject: [PATCH 179/546] Update doc/sphinx-guides/source/api/changelog.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 2698ba3debf..f518a9b542d 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -10,4 +10,4 @@ API Changelog Changes ~~~~~~~ -- **/api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. +- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed to happy (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. From 70edaa789e84c99b110036c232155337afb5c459 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 17 Nov 2023 15:02:32 -0500 Subject: [PATCH 180/546] Remove "to happy " --- doc/sphinx-guides/source/api/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index f518a9b542d..d6742252d27 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -10,4 +10,4 @@ API Changelog Changes ~~~~~~~ -- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed to happy (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. +- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. From 73593acb1bcdb9ba1d62e47310753e905b2546dd Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 17 Nov 2023 15:17:28 -0500 Subject: [PATCH 181/546] #9464 query by dvo. update IT --- .../dataverse/metrics/MetricsServiceBean.java | 33 ++++++++++--------- .../harvard/iq/dataverse/api/MetricsIT.java | 14 +++++--- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 79369207963..832dda5ced9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat + "from datasetversion\n" + "where versionstate='RELEASED' \n" + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n") - + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "") - + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n " : "") + + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "") + + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n " : "") + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n") + "group by dataset_id) as subq group by subq.date order by date;" @@ -156,11 +156,11 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat * @param d */ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { - String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -189,7 +189,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + "and \n" @@ -212,8 +212,9 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + " from datasetversion\n" + " join dataset on dataset.id = datasetversion.dataset_id\n" + + " join dvobject on dataset.id = dvobject.id\n" + " where versionstate='RELEASED'\n" + - " and dataset.harvestingclient_id is null\n" + + " and dvobject.harvestingclient_id is null\n" + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + " group by dataset_id\n" + "))\n"; @@ -225,7 +226,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // so the query is simpler: String harvestOriginClause = "(\n" + " datasetversion.dataset_id = dataset.id\n" + - " AND dataset.harvestingclient_id IS NOT null \n" + + " AND dvobject.harvestingclient_id IS NOT null \n" + " AND date_trunc('month', datasetversion.createtime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + ")\n"; @@ -244,7 +245,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n" + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n" + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n") + + "JOIN dvobject ON dvobject.id = dataset.id\n" + "WHERE\n" + originClause + "AND datasetfieldtype.name = 'subject'\n" @@ -258,11 +259,11 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio } public long datasetsPastDays(int days, String dataLocation, Dataverse d) { - String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -276,7 +277,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and \n" @@ -304,7 +305,7 @@ public JsonArray filesTimeSeries(Dataverse d) { + "where datasetversion.id=filemetadata.datasetversion_id\n" + "and versionstate='RELEASED' \n" + "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n" - + "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " + + "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " + ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ") + "group by filemetadata.id) as subq group by subq.date order by date;"); logger.log(Level.FINE, "Metric query: {0}", query); @@ -327,11 +328,11 @@ public long filesToMonth(String yyyymm, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED'\n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" - + "and dataset.harvestingclient_id is null\n" + + "and dvobject.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); @@ -350,11 +351,11 @@ public long filesPastDays(int days, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED'\n" + "and releasetime > current_date - interval '" + days + "' day\n" + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") - + "and dataset.harvestingclient_id is null\n" + + "and dvobject.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java index e3328eefb4a..b961a86dc0b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java @@ -30,7 +30,7 @@ public static void cleanUpClass() { @Test public void testGetDataversesToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsDataversesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -54,7 +54,7 @@ public void testGetDataversesToMonth() { @Test public void testGetDatasetsToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -77,7 +77,7 @@ public void testGetDatasetsToMonth() { @Test public void testGetFilesToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsFilesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -100,7 +100,7 @@ public void testGetFilesToMonth() { @Test public void testGetDownloadsToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -283,6 +283,12 @@ public void testGetDatasetsBySubject() { response = UtilIT.metricsDatasetsBySubject("dataLocation=local"); response.then().assertThat() .statusCode(OK.getStatusCode()); + + //Test ok when passing remote + response = UtilIT.metricsDatasetsBySubject("dataLocation=remote"); + response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); } @Test From d0fc9affdf52dfd60461520adb20a6c7d30e7d6b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 17 Nov 2023 15:31:50 -0500 Subject: [PATCH 182/546] refactor to avoid overloaded methods in constructors --- .../AbstractRemoteOverlayAccessIO.java | 335 ++++++++++++++++++ .../dataaccess/GlobusAccessibleStore.java | 4 +- .../dataaccess/GlobusOverlayAccessIO.java | 51 ++- .../dataaccess/RemoteOverlayAccessIO.java | 315 +--------------- .../dataaccess/RemoteOverlayAccessIOTest.java | 1 - 5 files changed, 390 insertions(+), 316 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java new file mode 100644 index 00000000000..8adaf746210 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -0,0 +1,335 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.channels.Channel; +import java.nio.file.Path; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.List; +import java.util.function.Predicate; +import java.util.logging.Logger; + +import javax.net.ssl.SSLContext; + +import org.apache.http.Header; +import org.apache.http.client.config.CookieSpecs; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpHead; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.protocol.HTTP; +import org.apache.http.ssl.SSLContextBuilder; +import org.apache.http.util.EntityUtils; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; + +public abstract class AbstractRemoteOverlayAccessIO extends StorageIO { + + protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); + protected static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; + static final String BASE_STORE = "base-store"; + protected static final String SECRET_KEY = "secret-key"; + static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; + protected static final String REMOTE_STORE_NAME = "remote-store-name"; + protected static final String REMOTE_STORE_URL = "remote-store-url"; + protected StorageIO baseStore = null; + protected String path = null; + protected PoolingHttpClientConnectionManager cm = null; + CloseableHttpClient httpclient = null; + protected static HttpClientContext localContext = HttpClientContext.create(); + + protected int timeout = 1200; + protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); + protected static boolean trustCerts = false; + protected int httpConcurrency = 4; + + public static String getBaseStoreIdFor(String driverId) { + return getConfigParamForDriver(driverId, BASE_STORE); + } + + public AbstractRemoteOverlayAccessIO() { + super(); + } + + public AbstractRemoteOverlayAccessIO(String storageLocation, String driverId) { + super(storageLocation, driverId); + } + + public AbstractRemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) { + super(dvObject, req, driverId); + } + + @Override + public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { + return baseStore.openAuxChannel(auxItemTag, options); + } + + @Override + public boolean isAuxObjectCached(String auxItemTag) throws IOException { + return baseStore.isAuxObjectCached(auxItemTag); + } + + @Override + public long getAuxObjectSize(String auxItemTag) throws IOException { + return baseStore.getAuxObjectSize(auxItemTag); + } + + @Override + public Path getAuxObjectAsPath(String auxItemTag) throws IOException { + return baseStore.getAuxObjectAsPath(auxItemTag); + } + + @Override + public void backupAsAux(String auxItemTag) throws IOException { + baseStore.backupAsAux(auxItemTag); + } + + @Override + public void revertBackupAsAux(String auxItemTag) throws IOException { + baseStore.revertBackupAsAux(auxItemTag); + } + + @Override + public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + baseStore.savePathAsAux(fileSystemPath, auxItemTag); + } + + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); + } + + /** + * @param inputStream InputStream we want to save + * @param auxItemTag String representing this Auxiliary type ("extension") + * @throws IOException if anything goes wrong. + */ + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag); + } + + @Override + public List listAuxObjects() throws IOException { + return baseStore.listAuxObjects(); + } + + @Override + public void deleteAuxObject(String auxItemTag) throws IOException { + baseStore.deleteAuxObject(auxItemTag); + } + + @Override + public void deleteAllAuxObjects() throws IOException { + baseStore.deleteAllAuxObjects(); + } + + @Override + public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { + return baseStore.getAuxFileAsInputStream(auxItemTag); + } + + protected int getUrlExpirationMinutes() { + String optionValue = getConfigParam(URL_EXPIRATION_MINUTES); + if (optionValue != null) { + Integer num; + try { + num = Integer.parseInt(optionValue); + } catch (NumberFormatException ex) { + num = null; + } + if (num != null) { + return num; + } + } + return 60; + } + + public CloseableHttpClient getSharedHttpClient() { + if (httpclient == null) { + try { + initHttpPool(); + httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); + + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + logger.warning(ex.getMessage()); + } + } + return httpclient; + } + + private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { + if (trustCerts) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; + + sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed + // certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + + Registry registry = RegistryBuilder.create() + .register("https", connectionFactory).build(); + cm = new PoolingHttpClientConnectionManager(registry); + } else { + cm = new PoolingHttpClientConnectionManager(); + } + cm.setDefaultMaxPerRoute(httpConcurrency); + cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); + } + + @Override + abstract public long retrieveSizeFromMedia(); + + @Override + public boolean exists() { + logger.fine("Exists called"); + return (retrieveSizeFromMedia() != -1); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); + } + + @Override + public String getStorageLocation() throws IOException { + String fullStorageLocation = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStorageLocation); + int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); + if (driverIndex >= 0) { + fullStorageLocation = fullStorageLocation + .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + if (this.getDvObject() instanceof Dataset) { + throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); + } else if (this.getDvObject() instanceof DataFile) { + fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; + } else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStorageLocation: " + fullStorageLocation); + return fullStorageLocation; + } + protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + + if (baseStore == null) { + String baseDriverId = getBaseStoreIdFor(driverId); + String fullStorageLocation = null; + String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE, + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if (dvObject instanceof Dataset) { + baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + } else { + if (this.getDvObject() != null) { + fullStorageLocation = getStoragePath(); + + // S3 expects :/// + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") + + "/" + fullStorageLocation; + break; + default: + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); + throw new IOException("Not supported"); + } + + } else if (storageLocation != null) { + // ://// + // remoteDriverId:// is removed if coming through directStorageIO + int index = storageLocation.indexOf(DataAccess.SEPARATOR); + if (index > 0) { + storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); + } + // The base store needs the baseStoreIdentifier and not the relative URL (if it exists) + int endOfId = storageLocation.indexOf("//"); + fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation; + + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") + + "/" + fullStorageLocation; + break; + default: + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); + throw new IOException("Not supported"); + } + } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + } + if (baseDriverType.contentEquals(DataAccess.S3)) { + ((S3AccessIO) baseStore).setMainDriver(false); + } + } + remoteStoreName = getConfigParam(REMOTE_STORE_NAME); + try { + remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL)); + } catch (MalformedURLException mfue) { + logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); + } + } + + protected String getStoragePath() throws IOException { + String fullStoragePath = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStoragePath); + int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); + if (driverIndex >= 0) { + fullStoragePath = fullStoragePath + .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStoragePath.indexOf("//"); + if (suffixIndex >= 0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); + } + if (getDvObject() instanceof Dataset) { + fullStoragePath = getDataset().getAuthorityForFileStorage() + "/" + + getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (getDvObject() instanceof DataFile) { + fullStoragePath = getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStoragePath: " + fullStoragePath); + return fullStoragePath; + } + + + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index afc7556481a..ce75395c883 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -28,7 +28,7 @@ public static String getTransferPath(String driverId) { } public static JsonArray getReferenceEndpointsWithPaths(String driverId) { - String[] endpoints = StorageIO.getConfigParamForDriver(driverId, RemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); + String[] endpoints = StorageIO.getConfigParamForDriver(driverId, AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); JsonArrayBuilder builder = Json.createArrayBuilder(); for(int i=0;i extends RemoteOverlayAccessIO implements GlobusAccessibleStore { +public class GlobusOverlayAccessIO extends AbstractRemoteOverlayAccessIO implements GlobusAccessibleStore { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); /* @@ -67,11 +68,19 @@ public class GlobusOverlayAccessIO extends RemoteOverlayAcce public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); + configureGlobusEndpoints(); + configureStores(req, driverId, null); + logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); + + logger.fine("Relative path: " + path); } public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { this.driverId = driverId; + configureGlobusEndpoints(); configureStores(null, driverId, storageLocation); if (isManaged()) { String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); @@ -83,6 +92,7 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE logger.fine("Referenced path: " + path); } } + private boolean isManaged() { if(dataverseManaged==null) { dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); @@ -146,7 +156,6 @@ private static String findMatchingEndpoint(String path, String[] allowedEndpoint return null; } - @Override protected void validatePath(String relPath) throws IOException { if (isManaged()) { if (!usesStandardNamePattern(relPath)) { @@ -363,8 +372,7 @@ public String getStorageLocation() throws IOException { * the derived GlobusOverlayAccessIO can support multiple endpoints. * @throws IOException */ - @Override - protected void configureEndpoints() throws IOException { + protected void configureGlobusEndpoints() throws IOException { allowedEndpoints = getAllowedEndpoints(this.driverId); logger.info("Set allowed endpoints: " + Arrays.toString(allowedEndpoints)); } @@ -435,5 +443,40 @@ public static void main(String[] args) { } } + + + @Override + public void open(DataAccessOption... option) throws IOException { + // TODO Auto-generated method stub + + } + + + @Override + public Path getFileSystemPath() throws IOException { + // TODO Auto-generated method stub + return null; + } + + + @Override + public void savePath(Path fileSystemPath) throws IOException { + // TODO Auto-generated method stub + + } + + + @Override + public void saveInputStream(InputStream inputStream) throws IOException { + // TODO Auto-generated method stub + + } + + + @Override + public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { + // TODO Auto-generated method stub + + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 5463254140d..1616bfabf96 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -11,45 +11,23 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; -import java.net.URL; import java.nio.channels.Channel; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.Path; -import java.security.KeyManagementException; -import java.security.KeyStoreException; -import java.security.NoSuchAlgorithmException; import java.util.List; -import java.util.function.Predicate; -import java.util.logging.Logger; import org.apache.http.Header; -import org.apache.http.client.config.CookieSpecs; -import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpHead; -import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.config.Registry; -import org.apache.http.config.RegistryBuilder; -import org.apache.http.conn.socket.ConnectionSocketFactory; -import org.apache.http.conn.ssl.NoopHostnameVerifier; -import org.apache.http.conn.ssl.SSLConnectionSocketFactory; -import org.apache.http.conn.ssl.TrustAllStrategy; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.protocol.HTTP; -import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; -import javax.net.ssl.SSLContext; - /** * @author qqmyers */ @@ -61,40 +39,20 @@ * * baseUrl: http(s):// */ -public class RemoteOverlayAccessIO extends StorageIO { - - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); +public class RemoteOverlayAccessIO extends AbstractRemoteOverlayAccessIO { // A single baseUrl of the form http(s):// where this store can reference data static final String BASE_URL = "base-url"; - // Multiple endpoints where data can be referenced from. Multiple endpoints are separated by a comma. Multiple endpoints are only supported by the GlobalOverlayAccessIO at present. - static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; - static final String BASE_STORE = "base-store"; - static final String SECRET_KEY = "secret-key"; - static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; - static final String REMOTE_STORE_NAME = "remote-store-name"; - static final String REMOTE_STORE_URL = "remote-store-url"; - - protected StorageIO baseStore = null; - protected String path = null; - private String baseUrl = null; - - protected static HttpClientContext localContext = HttpClientContext.create(); - protected PoolingHttpClientConnectionManager cm = null; - CloseableHttpClient httpclient = null; - protected int timeout = 1200; - protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) - .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); - protected static boolean trustCerts = false; - protected int httpConcurrency = 4; + String baseUrl = null; public RemoteOverlayAccessIO() { + super(); } public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); this.setIsLocalFile(false); + configureRemoteEndpoints(); configureStores(req, driverId, null); logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); @@ -106,6 +64,7 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { super(null, null, driverId); this.setIsLocalFile(false); + configureRemoteEndpoints(); configureStores(null, driverId, storageLocation); path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); @@ -296,105 +255,12 @@ public void delete() throws IOException { } - @Override - public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { - return baseStore.openAuxChannel(auxItemTag, options); - } - - @Override - public boolean isAuxObjectCached(String auxItemTag) throws IOException { - return baseStore.isAuxObjectCached(auxItemTag); - } - - @Override - public long getAuxObjectSize(String auxItemTag) throws IOException { - return baseStore.getAuxObjectSize(auxItemTag); - } - - @Override - public Path getAuxObjectAsPath(String auxItemTag) throws IOException { - return baseStore.getAuxObjectAsPath(auxItemTag); - } - - @Override - public void backupAsAux(String auxItemTag) throws IOException { - baseStore.backupAsAux(auxItemTag); - } - - @Override - public void revertBackupAsAux(String auxItemTag) throws IOException { - baseStore.revertBackupAsAux(auxItemTag); - } - - @Override - // this method copies a local filesystem Path into this DataAccess Auxiliary - // location: - public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { - baseStore.savePathAsAux(fileSystemPath, auxItemTag); - } - - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); - } - - /** - * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") - * @throws IOException if anything goes wrong. - */ - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag); - } - - @Override - public List listAuxObjects() throws IOException { - return baseStore.listAuxObjects(); - } - - @Override - public void deleteAuxObject(String auxItemTag) throws IOException { - baseStore.deleteAuxObject(auxItemTag); - } - - @Override - public void deleteAllAuxObjects() throws IOException { - baseStore.deleteAllAuxObjects(); - } - - @Override - public String getStorageLocation() throws IOException { - String fullStorageLocation = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStorageLocation); - int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if (driverIndex >= 0) { - fullStorageLocation = fullStorageLocation - .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - if (this.getDvObject() instanceof Dataset) { - throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); - } else if (this.getDvObject() instanceof DataFile) { - fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStorageLocation: " + fullStorageLocation); - return fullStorageLocation; - } - @Override public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); } - @Override - public boolean exists() { - logger.fine("Exists called"); - return (retrieveSizeFromMedia() != -1); - } - @Override public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( @@ -407,11 +273,6 @@ public OutputStream getOutputStream() throws UnsupportedDataAccessOperationExcep "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); } - @Override - public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { - return baseStore.getAuxFileAsInputStream(auxItemTag); - } - @Override public boolean downloadRedirectEnabled() { String optionValue = getConfigParam(StorageIO.DOWNLOAD_REDIRECT); @@ -443,103 +304,12 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary } } - int getUrlExpirationMinutes() { - String optionValue = getConfigParam(URL_EXPIRATION_MINUTES); - if (optionValue != null) { - Integer num; - try { - num = Integer.parseInt(optionValue); - } catch (NumberFormatException ex) { - num = null; - } - if (num != null) { - return num; - } - } - return 60; - } - - protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - configureEndpoints(); - - - if (baseStore == null) { - String baseDriverId = getBaseStoreIdFor(driverId); - String fullStorageLocation = null; - String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE, - DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if (dvObject instanceof Dataset) { - baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); - } else { - if (this.getDvObject() != null) { - fullStorageLocation = getStoragePath(); - - // S3 expects :/// - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") - + "/" + fullStorageLocation; - break; - default: - logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " - + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); - throw new IOException("Not supported"); - } - - } else if (storageLocation != null) { - // ://// - // remoteDriverId:// is removed if coming through directStorageIO - int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if (index > 0) { - storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); - } - // The base store needs the baseStoreIdentifier and not the relative URL (if it exists) - int endOfId = storageLocation.indexOf("//"); - fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation; - - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") - + "/" + fullStorageLocation; - break; - default: - logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " - + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); - throw new IOException("Not supported"); - } - } - baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); - } - if (baseDriverType.contentEquals(DataAccess.S3)) { - ((S3AccessIO) baseStore).setMainDriver(false); - } - } - remoteStoreName = getConfigParam(REMOTE_STORE_NAME); - try { - remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL)); - } catch (MalformedURLException mfue) { - logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); - } - } /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but * the derived GlobusOverlayAccessIO can support multiple endpoints. * @throws IOException */ - protected void configureEndpoints() throws IOException { + protected void configureRemoteEndpoints() throws IOException { baseUrl = getConfigParam(BASE_URL); if (baseUrl == null) { //Will accept the first endpoint using the newer setting @@ -560,70 +330,6 @@ protected void configureEndpoints() throws IOException { } } - // Convenience method to assemble the path, starting with the DOI - // authority/identifier/, that is needed to create a base store via - // DataAccess.getDirectStorageIO - the caller has to add the store type specific - // prefix required. - protected String getStoragePath() throws IOException { - String fullStoragePath = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStoragePath); - int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if (driverIndex >= 0) { - fullStoragePath = fullStoragePath - .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - int suffixIndex = fullStoragePath.indexOf("//"); - if (suffixIndex >= 0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); - } - if (this.getDvObject() instanceof Dataset) { - fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" - + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (this.getDvObject() instanceof DataFile) { - fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStoragePath: " + fullStoragePath); - return fullStoragePath; - } - - public CloseableHttpClient getSharedHttpClient() { - if (httpclient == null) { - try { - initHttpPool(); - httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); - - } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { - logger.warning(ex.getMessage()); - } - } - return httpclient; - } - - private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { - if (trustCerts) { - // use the TrustSelfSignedStrategy to allow Self Signed Certificates - SSLContext sslContext; - SSLConnectionSocketFactory connectionFactory; - - sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); - // create an SSL Socket Factory to use the SSLContext with the trust self signed - // certificate strategy - // and allow all hosts verifier. - connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); - - Registry registry = RegistryBuilder.create() - .register("https", connectionFactory).build(); - cm = new PoolingHttpClientConnectionManager(registry); - } else { - cm = new PoolingHttpClientConnectionManager(); - } - cm.setDefaultMaxPerRoute(httpConcurrency); - cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); - } - @Override public void savePath(Path fileSystemPath) throws IOException { throw new UnsupportedDataAccessOperationException( @@ -660,13 +366,4 @@ static boolean isValidIdentifier(String driverId, String storageId) { } return true; } - - public static String getBaseStoreIdFor(String driverId) { - return getConfigParamForDriver(driverId, BASE_STORE); - } - - @Override - public List cleanUp(Predicate filter, boolean dryRun) throws IOException { - return baseStore.cleanUp(filter, dryRun); - } } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index 5affc01aff0..1c371881ba6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -8,7 +8,6 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.mocks.MocksFactory; import edu.harvard.iq.dataverse.util.UrlSignerUtil; From 2500bccc5fa438bf2dff4e5aa887e816099a51e3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 15 Nov 2023 14:04:22 -0500 Subject: [PATCH 183/546] assert current /bag-info.txt behavior #8760 Also, add a superuser-only API for downloading files (such as bags) from the file system so we can make assertions about them in our tests. --- .../iq/dataverse/api/AbstractApiBean.java | 7 ++ .../edu/harvard/iq/dataverse/api/Admin.java | 25 ++++- .../edu/harvard/iq/dataverse/api/BagIT.java | 101 +++++++++++++++++- 3 files changed, 128 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 027f9e0fcb1..58565bcc9d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -45,11 +45,13 @@ import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; +import java.io.InputStream; import java.net.URI; import java.util.Arrays; import java.util.Collections; @@ -726,6 +728,11 @@ protected Response ok(String data, MediaType mediaType, String downloadFilename) return res.build(); } + protected Response ok(InputStream inputStream) { + ResponseBuilder res = Response.ok().entity(inputStream).type(MediaType.valueOf(FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT)); + return res.build(); + } + protected Response created( String uri, JsonObjectBuilder bld ) { return Response.created( URI.create(uri) ) .entity( Json.createObjectBuilder() diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index fd3b9a89e54..684ed32dff8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -107,6 +107,7 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.UrlSignerUtil; +import java.io.FileInputStream; import java.io.IOException; import java.io.OutputStream; @@ -2425,5 +2426,27 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl)); } - + + /** + * For testing only. Download a file from the file system. + */ + @GET + @AuthRequired + @Path("/localfile") + public Response getLocalFile(@Context ContainerRequestContext crc, @QueryParam("pathToFile") String pathToFile) { + try { + AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc); + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + try { + return ok(new FileInputStream(pathToFile)); + } catch (IOException ex) { + return error(Status.BAD_REQUEST, ex.toString()); + } + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java index e7210bc45a9..fae9cf95156 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java @@ -1,17 +1,32 @@ package edu.harvard.iq.dataverse.api; -import io.restassured.RestAssured; -import io.restassured.response.Response; import edu.harvard.iq.dataverse.engine.command.impl.LocalSubmitToArchiveCommand; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import io.restassured.RestAssured; +import static io.restassured.RestAssured.given; +import io.restassured.response.Response; import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.OK; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.Enumeration; +import java.util.Scanner; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; import org.junit.jupiter.api.AfterAll; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; public class BagIT { + static String bagitExportDir = "/tmp"; + @BeforeAll public static void setUpClass() { @@ -25,14 +40,14 @@ public static void setUpClass() { setArchiverSettings.then().assertThat() .statusCode(OK.getStatusCode()); - Response setBagItLocalPath = UtilIT.setSetting(":BagItLocalPath", "/tmp"); + Response setBagItLocalPath = UtilIT.setSetting(":BagItLocalPath", bagitExportDir); setBagItLocalPath.then().assertThat() .statusCode(OK.getStatusCode()); } @Test - public void testBagItExport() { + public void testBagItExport() throws IOException { Response createUser = UtilIT.createRandomUser(); createUser.then().assertThat().statusCode(OK.getStatusCode()); @@ -63,6 +78,78 @@ public void testBagItExport() { archiveDataset.prettyPrint(); archiveDataset.then().assertThat().statusCode(OK.getStatusCode()); + // spaceName comes from LocalSubmitToArchiveCommand + String spaceName = datasetPid.replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); + // spacename: doi-10-5072-fk2-fosg5q + + String pathToZip = bagitExportDir + "/" + spaceName + "v1.0" + ".zip"; + + try { + // give the bag time to generate + Thread.sleep(3000); + } catch (InterruptedException ex) { + } + + // A bag could look like this: + //doi-10-5072-FK2-DKUTDUv-1-0/data/ + //doi-10-5072-FK2-DKUTDUv-1-0/data/Darwin's Finches/ + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/ + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/pid-mapping.txt + //doi-10-5072-FK2-DKUTDUv-1-0/manifest-md5.txt + //doi-10-5072-FK2-DKUTDUv-1-0/bagit.txt + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/oai-ore.jsonld + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/datacite.xml + //doi-10-5072-FK2-DKUTDUv-1-0/bag-info.txt + // --- + // bag-info.txt could look like this: + //Contact-Name: Finch, Fiona + //Contact-Email: finch@mailinator.com + //Source-Organization: Dataverse Installation () + //Organization-Address: + //Organization-Email: + //External-Description: Darwin's finches (also known as the Galápagos finches) are a group of about + // fifteen species of passerine birds. + //Bagging-Date: 2023-11-14 + //External-Identifier: https://doi.org/10.5072/FK2/LZIGBC + //Bag-Size: 0 bytes + //Payload-Oxum: 0.0 + //Internal-Sender-Identifier: Root:Darwin's Finches + Response downloadBag = downloadLocalFile(pathToZip, apiToken); + downloadBag.then().assertThat().statusCode(OK.getStatusCode()); + Path outputPath = Paths.get("/tmp/foo.zip"); + java.nio.file.Files.copy(downloadBag.getBody().asInputStream(), outputPath, StandardCopyOption.REPLACE_EXISTING); + + ZipFile zipFile = new ZipFile(outputPath.toString()); + Enumeration entries = zipFile.entries(); + String sourceOrg = null; + String orgAddress = null; + String orgEmail = null; + while (entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + String name = entry.getName(); + System.out.println("name: " + name); + if (name.endsWith("bag-info.txt")) { + InputStream stream = zipFile.getInputStream(entry); + Scanner s = new Scanner(stream).useDelimiter("\\A"); + String result = s.hasNext() ? s.next() : ""; + System.out.println("result: " + result); + String[] lines = result.split("\n"); + for (String line : lines) { + if (line.startsWith("Source-Organization")) { + sourceOrg = line; + } else if (line.startsWith("Organization-Address")) { + orgAddress = line; + } else if (line.startsWith("Organization-Email")) { + orgEmail = line; + } else { + } + } + } + } + assertEquals("Source-Organization: Dataverse Installation ()", sourceOrg.trim()); + assertEquals("Organization-Address: ", orgAddress.trim()); + assertEquals("Organization-Email: ", orgEmail.trim()); } @AfterAll @@ -75,4 +162,10 @@ public static void tearDownClass() { } + static Response downloadLocalFile(String pathToFile, String apiToken) { + return given() + .header("X-Dataverse-key", apiToken) + .get("/api/admin/localfile?pathToFile=" + pathToFile); + } + } From 7240e870d35fda4ec96a4ee0e0b488a9c4fc3d4f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 15 Nov 2023 16:03:15 -0500 Subject: [PATCH 184/546] configurable BagIt source org name, address, email #8760 These values were used while testing: DATAVERSE_BAGIT_SOURCEORG_NAME=LibraScholar DATAVERSE_BAGIT_SOURCEORG_ADDRESS=123 Wisdom Way\nCambridge, MA\nUSA DATAVERSE_BAGIT_SOURCEORG_EMAIL=hello@dataverse.librascholar.edu --- .../iq/dataverse/settings/JvmSettings.java | 7 +++++++ .../iq/dataverse/util/bagit/BagGenerator.java | 15 ++++++++++----- src/main/java/propertyFiles/Bundle.properties | 4 ---- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index cc3272413c7..2f59350906c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -150,6 +150,13 @@ public enum JvmSettings { SCOPE_NETCDF(PREFIX, "netcdf"), GEO_EXTRACT_S3_DIRECT_UPLOAD(SCOPE_NETCDF, "geo-extract-s3-direct-upload"), + // BAGIT SETTINGS + SCOPE_BAGIT(PREFIX, "bagit"), + SCOPE_BAGIT_SOURCEORG(SCOPE_BAGIT, "sourceorg"), + BAGIT_SOURCE_ORG_NAME(SCOPE_BAGIT_SOURCEORG, "name"), + BAGIT_SOURCEORG_ADDRESS(SCOPE_BAGIT_SOURCEORG, "address"), + BAGIT_SOURCEORG_EMAIL(SCOPE_BAGIT_SOURCEORG, "email"), + ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index baba1a0cb43..b7c44014b80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -74,7 +74,9 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; +import java.util.Optional; public class BagGenerator { @@ -822,17 +824,20 @@ private String generateInfoFile() { logger.warning("No contact info available for BagIt Info file"); } - info.append("Source-Organization: " + BundleUtil.getStringFromBundle("bagit.sourceOrganization")); + String orgName = JvmSettings.BAGIT_SOURCE_ORG_NAME.lookupOptional(String.class).orElse("Dataverse Installation ()"); + String orgAddress = JvmSettings.BAGIT_SOURCEORG_ADDRESS.lookupOptional(String.class).orElse(""); + String orgEmail = JvmSettings.BAGIT_SOURCEORG_EMAIL.lookupOptional(String.class).orElse(""); + + info.append("Source-Organization: " + orgName); // ToDo - make configurable info.append(CRLF); - info.append("Organization-Address: " + WordUtils.wrap( - BundleUtil.getStringFromBundle("bagit.sourceOrganizationAddress"), 78, CRLF + " ", true)); + info.append("Organization-Address: " + WordUtils.wrap(orgAddress, 78, CRLF + " ", true)); + info.append(CRLF); // Not a BagIt standard name - info.append( - "Organization-Email: " + BundleUtil.getStringFromBundle("bagit.sourceOrganizationEmail")); + info.append("Organization-Email: " + orgEmail); info.append(CRLF); info.append("External-Description: "); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 79887f7e76c..972e5e35601 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2379,10 +2379,6 @@ api.prov.error.freeformMissingJsonKey=The JSON object you send must have a key c api.prov.error.freeformNoText=No provenance free form text available for this file. api.prov.error.noDataFileFound=Could not find a file based on ID. -bagit.sourceOrganization=Dataverse Installation () -bagit.sourceOrganizationAddress= -bagit.sourceOrganizationEmail= - bagit.checksum.validation.error=Invalid checksum for file "{0}". Manifest checksum={2}, calculated checksum={3}, type={1} bagit.checksum.validation.exception=Error while calculating checksum for file "{0}". Checksum type={1}, error={2} bagit.validation.bag.file.not.found=Invalid BagIt package: "{0}" From b2c62510e71e6436c2905796b9cc6a24a04b35d0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 14:06:06 -0500 Subject: [PATCH 185/546] add docs and release note for bag-info.txt config #8760 --- doc/release-notes/8760-bagit.md | 15 ++++++ .../source/installation/config.rst | 46 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 doc/release-notes/8760-bagit.md diff --git a/doc/release-notes/8760-bagit.md b/doc/release-notes/8760-bagit.md new file mode 100644 index 00000000000..30601857309 --- /dev/null +++ b/doc/release-notes/8760-bagit.md @@ -0,0 +1,15 @@ +For BagIT export, it is now possible to configure the following information in bag-info.txt... + +Source-Organization: Harvard Dataverse +Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA +Organization-Email: support@dataverse.harvard.edu + +... using new JVM/MPCONFIG options: + +- dataverse.bagit.sourceorg.name +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.email + +Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. + +For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 13a7367de44..df311fcdaca 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1608,6 +1608,25 @@ The workflow id returned in this call (or available by doing a GET of /api/admin Once these steps are taken, new publication requests will automatically trigger submission of an archival copy to the specified archiver, Chronopolis' DuraCloud component in this example. For Chronopolis, as when using the API, it is currently the admin's responsibility to snap-shot the DuraCloud space and monitor the result. Failure of the workflow, (e.g. if DuraCloud is unavailable, the configuration is wrong, or the space for this dataset already exists due to a prior publication action or use of the API), will create a failure message but will not affect publication itself. +.. _bag-info.txt: + +Configuring bag-info.txt +++++++++++++++++++++++++ + +Out of the box, placeholder values like below will be placed in bag-info.txt: + +.. code-block:: text + + Source-Organization: Dataverse Installation () + Organization-Address: + Organization-Email: + +To customize these values for your institution, use the following JVM options: + +- :ref:`dataverse.bagit.sourceorg.name` +- :ref:`dataverse.bagit.sourceorg.address` +- :ref:`dataverse.bagit.sourceorg.email` + Going Live: Launching Your Production Deployment ------------------------------------------------ @@ -2506,6 +2525,33 @@ See also :ref:`guestbook-at-request-api` in the API Guide, and . Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_GUESTBOOK_AT_REQUEST``. +.. _dataverse.bagit.sourceorg.name: + +dataverse.bagit.sourceorg.name +++++++++++++++++++++++++++++++ + +The name for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_NAME``. + +.. _dataverse.bagit.sourceorg.address: + +dataverse.bagit.sourceorg.address ++++++++++++++++++++++++++++++++++ + +The mailing address for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. The example in https://datatracker.ietf.org/doc/html/rfc8493 uses commas as separators: ``1 Main St., Cupertino, California, 11111``. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_ADDRESS``. + +.. _dataverse.bagit.sourceorg.email: + +dataverse.bagit.sourceorg.email ++++++++++++++++++++++++++++++++ + +The email for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. + +Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. + .. _feature-flags: Feature Flags From fa6f850b28e8dea1dd2dff542814e29fd7865153 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Nov 2023 16:07:30 -0500 Subject: [PATCH 186/546] limit to downloading from /tmp, add docs #8760 --- doc/release-notes/8760-download-tmp-file.md | 3 +++ doc/sphinx-guides/source/api/changelog.rst | 7 +++++ doc/sphinx-guides/source/api/native-api.rst | 10 +++++++ .../edu/harvard/iq/dataverse/api/Admin.java | 13 +++++++--- .../edu/harvard/iq/dataverse/api/AdminIT.java | 26 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/BagIT.java | 10 ++----- .../edu/harvard/iq/dataverse/api/UtilIT.java | 7 +++++ 7 files changed, 64 insertions(+), 12 deletions(-) create mode 100644 doc/release-notes/8760-download-tmp-file.md diff --git a/doc/release-notes/8760-download-tmp-file.md b/doc/release-notes/8760-download-tmp-file.md new file mode 100644 index 00000000000..7623a91ac9a --- /dev/null +++ b/doc/release-notes/8760-download-tmp-file.md @@ -0,0 +1,3 @@ +A new API has been added for testing purposes that allows files to be downloaded from /tmp. + +See diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index d6742252d27..7d6545999ca 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -5,6 +5,13 @@ API Changelog :local: :depth: 1 +6.1 +--- + +New +~~~ +- **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`. + 6.0 ----- diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 1992390410c..5b1e7410a4f 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -5349,6 +5349,16 @@ A curl example using an ``ID`` Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`. +.. _download-file-from-tmp: + +Download File from /tmp +~~~~~~~~~~~~~~~~~~~~~~~ + +As a superuser:: + + GET /api/admin/downloadTmpFile?fullyQualifiedPathToFile=/tmp/foo.txt + +Note that this API is probably only useful for testing. MyData ------ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 684ed32dff8..4da1962853a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -123,6 +123,7 @@ import jakarta.ws.rs.QueryParam; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.StreamingOutput; +import java.nio.file.Paths; /** * Where the secure, setup API calls live. @@ -2428,12 +2429,12 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur } /** - * For testing only. Download a file from the file system. + * For testing only. Download a file from /tmp. */ @GET @AuthRequired - @Path("/localfile") - public Response getLocalFile(@Context ContainerRequestContext crc, @QueryParam("pathToFile") String pathToFile) { + @Path("/downloadTmpFile") + public Response downloadTmpFile(@Context ContainerRequestContext crc, @QueryParam("fullyQualifiedPathToFile") String fullyQualifiedPathToFile) { try { AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc); if (!user.isSuperuser()) { @@ -2442,8 +2443,12 @@ public Response getLocalFile(@Context ContainerRequestContext crc, @QueryParam(" } catch (WrappedResponse wr) { return wr.getResponse(); } + java.nio.file.Path normalizedPath = Paths.get(fullyQualifiedPathToFile).normalize(); + if (!normalizedPath.toString().startsWith("/tmp")) { + return error(Status.BAD_REQUEST, "Path must begin with '/tmp' but after normalization was '" + normalizedPath +"'."); + } try { - return ok(new FileInputStream(pathToFile)); + return ok(new FileInputStream(fullyQualifiedPathToFile)); } catch (IOException ex) { return error(Status.BAD_REQUEST, ex.toString()); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 0c5de662e8a..91ba67b10ff 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -854,6 +854,32 @@ public void testBannerMessages(){ } + /** + * For a successful download from /tmp, see BagIT. Here we are doing error + * checking. + */ + @Test + public void testDownloadTmpFile() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response tryToDownloadAsNonSuperuser = UtilIT.downloadTmpFile("/tmp/foo", apiToken); + tryToDownloadAsNonSuperuser.then().assertThat().statusCode(FORBIDDEN.getStatusCode()); + + Response toggleSuperuser = UtilIT.makeSuperUser(username); + toggleSuperuser.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response tryToDownloadEtcPasswd = UtilIT.downloadTmpFile("/etc/passwd", apiToken); + tryToDownloadEtcPasswd.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("Path must begin with '/tmp' but after normalization was '/etc/passwd'.")); + } + private String createTestNonSuperuserApiToken() { Response createUserResponse = UtilIT.createRandomUser(); createUserResponse.then().assertThat().statusCode(OK.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java index fae9cf95156..28f7fa28328 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java @@ -115,7 +115,7 @@ public void testBagItExport() throws IOException { //Bag-Size: 0 bytes //Payload-Oxum: 0.0 //Internal-Sender-Identifier: Root:Darwin's Finches - Response downloadBag = downloadLocalFile(pathToZip, apiToken); + Response downloadBag = UtilIT.downloadTmpFile(pathToZip, apiToken); downloadBag.then().assertThat().statusCode(OK.getStatusCode()); Path outputPath = Paths.get("/tmp/foo.zip"); java.nio.file.Files.copy(downloadBag.getBody().asInputStream(), outputPath, StandardCopyOption.REPLACE_EXISTING); @@ -162,10 +162,4 @@ public static void tearDownClass() { } - static Response downloadLocalFile(String pathToFile, String apiToken) { - return given() - .header("X-Dataverse-key", apiToken) - .get("/api/admin/localfile?pathToFile=" + pathToFile); - } - -} +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e3a7fd0cfc3..6abfb10c4f6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3574,4 +3574,11 @@ static Response getDownloadSize(Integer datasetId, return requestSpecification .get("/api/datasets/" + datasetId + "/versions/" + version + "/downloadsize"); } + + static Response downloadTmpFile(String fullyQualifiedPathToFile, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/admin/downloadTmpFile?fullyQualifiedPathToFile=" + fullyQualifiedPathToFile); + } + } From 06f6222ba785fa37890efa4156ec3e7988fe4ff5 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 20:29:47 -0500 Subject: [PATCH 187/546] more intermediate changes to the entity classes #8549 --- .../edu/harvard/iq/dataverse/DvObject.java | 28 +++++++++++++++++++ .../iq/dataverse/DvObjectContainer.java | 8 ++++-- .../dataverse/ingest/IngestServiceBean.java | 7 +++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 9e7f3f3fe96..b86fabd0a07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -2,6 +2,8 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.storageuse.StorageUse; import java.sql.Timestamp; import java.text.SimpleDateFormat; @@ -156,6 +158,9 @@ public String visit(DataFile df) { private boolean identifierRegistered; + @Column(nullable = true) + private Long storageSize; + private transient GlobalId globalId = null; @OneToMany(mappedBy = "dvObject", cascade = CascadeType.ALL, orphanRemoval = true) @@ -177,6 +182,13 @@ public void setAlternativePersistentIndentifiers(Set saveAndAddFilesToDataset(DatasetVersion version, + List newFiles, + DataFile fileToReplace, + boolean tabIngest) { + return saveAndAddFilesToDataset(version, newFiles, fileToReplace, tabIngest, null); + } public List saveAndAddFilesToDataset(DatasetVersion version, List newFiles, DataFile fileToReplace, From 8766932b6c086b1775e3faf8e19f411d83f87c07 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:09:12 -0500 Subject: [PATCH 188/546] extra logging --- .../iq/dataverse/search/SearchIncludeFragment.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 400f10cc375..c579eb14b7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -343,9 +343,10 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused setSolrErrorEncountered(false); try { - logger.fine("ATTENTION! query from user: " + query); - logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); - logger.fine("ATTENTION! sort by: " + sortField); + logger.info("ATTENTION! query from user: " + query); + logger.info("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); + logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal); + logger.info("ATTENTION! sort by: " + sortField); /** * @todo Number of search results per page should be configurable - @@ -408,6 +409,8 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); + logger.info("second pass query: " + queryToPassToSolr); + logger.info("second pass filter query: "+filterQueriesFinalSecondPass.toString()); solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false); From 552e7350cd7f9d9eb577b056e8d3eb414e8dc3cc Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:09:40 -0500 Subject: [PATCH 189/546] get quota command #8549 --- .../impl/GetCollectionQuotaCommand.java | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java new file mode 100644 index 00000000000..f07fde9508e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java @@ -0,0 +1,45 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.util.BundleUtil; +import java.io.IOException; +import java.util.List; +import java.util.logging.Logger; + +/** + * + * @author landreev + * The command doesn't do much. It's sole purpose is to check the permissions + * when it's called by the /api/dataverses/.../storage/quota api. + */ +@RequiredPermissions(Permission.ManageDataversePermissions) +public class GetCollectionQuotaCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + + public GetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + dataverse = target; + } + + @Override + public Long execute(CommandContext ctxt) throws CommandException { + + if (dataverse != null && dataverse.getStorageQuota() != null) { + return dataverse.getStorageQuota().getAllocation(); + } + + return null; + } +} + + From e4aea93f0ada3212d1116b13cd0b2ae8105100e1 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:20:29 -0500 Subject: [PATCH 190/546] extra logging --- .../edu/harvard/iq/dataverse/search/SearchIncludeFragment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index c579eb14b7e..e5b5763efe6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -345,7 +345,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused try { logger.info("ATTENTION! query from user: " + query); logger.info("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); - logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal); + logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); logger.info("ATTENTION! sort by: " + sortField); /** From 2b8777990d008b31e61c4338f5b5e964e1f4a20d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:21:17 -0500 Subject: [PATCH 191/546] new classes and instances #8549 --- .../iq/dataverse/storageuse/StorageQuota.java | 118 ++++++++++++++++++ .../iq/dataverse/storageuse/StorageUse.java | 94 ++++++++++++++ .../storageuse/StorageUseServiceBean.java | 65 ++++++++++ 3 files changed, 277 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java new file mode 100644 index 00000000000..68ff6d95d00 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java @@ -0,0 +1,118 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DvObject; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.OneToOne; +import java.io.Serializable; +import java.util.logging.Logger; + +//import jakarta.persistence.*; + +/** + * + * @author landreev + * + */ +@Entity +public class StorageQuota implements Serializable { + private static final Logger logger = Logger.getLogger(StorageQuota.class.getCanonicalName()); + + /** + * Only Collection quotas are supported, for now + */ + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + /** + * For defining quotas for Users and/or Groups + * (Not supported as of yet) + + @Column(nullable = true) + private String assigneeIdentifier; + */ + + /** + * Could be changed to ManyToOne - if we wanted to be able to define separate + * quotas on the same collection for different users. (?) + * Whether we actually want to support the above is TBD. (possibly not) + * Only collection-wide quotas are supported for now. + */ + @OneToOne + @JoinColumn(name="definitionPoint_id", nullable=true) + private DvObject definitionPoint; + + @Column(nullable = true) + private Long allocation; + + public StorageQuota() {} + + /*public String getAssigneeIdentifier() { + return assigneeIdentifier; + } + + public void setAssigneeIdentifier(String assigneeIdentifier) { + this.assigneeIdentifier = assigneeIdentifier; + }*/ + + public DvObject getDefinitionPoint() { + return definitionPoint; + } + + public void setDefinitionPoint(DvObject definitionPoint) { + this.definitionPoint = definitionPoint; + } + + public Long getAllocation() { + return allocation; + } + + public void setAllocation(Long allocation) { + this.allocation = allocation; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof StorageQuota)) { + return false; + } + StorageQuota other = (StorageQuota) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.storageuse.StorageQuota[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java new file mode 100644 index 00000000000..2633e3e026b --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java @@ -0,0 +1,94 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.DvObjectContainer; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GenerationType; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToOne; +import java.io.Serializable; + +/** + * + * @author landreev + */ +@NamedQueries({ + @NamedQuery(name = "StorageUse.findByteSizeByDvContainerId",query = "SELECT su.sizeInBytes FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "), + @NamedQuery(name = "StorageUse.findByDvContainerId",query = "SELECT su FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId ") +}) +@Entity +public class StorageUse implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.AUTO) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + @OneToOne + @JoinColumn(nullable=false) + private DvObject dvObjectContainer; + + @Column + private Long sizeInBytes = null; + + public StorageUse(DvObjectContainer dvObjectContainer, Long sizeInBytes) { + this.dvObjectContainer = dvObjectContainer; + this.sizeInBytes = sizeInBytes; + } + + public Long getSizeInBytes() { + return sizeInBytes; + } + + public void setSizeInBytes(Long sizeInBytes) { + this.sizeInBytes = sizeInBytes; + } + + public void incrementSizeInBytes(Long sizeInBytes) { + this.sizeInBytes += sizeInBytes; + } + + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof StorageUse)) { + return false; + } + StorageUse other = (StorageUse) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.storageuse.StorageUse[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java new file mode 100644 index 00000000000..fd04344c234 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -0,0 +1,65 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.DvObjectContainer; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import java.util.logging.Logger; + +/** + * + * @author landreev + */ +@Stateless +@Named +public class StorageUseServiceBean implements java.io.Serializable { + private static final Logger logger = Logger.getLogger(StorageUseServiceBean.class.getCanonicalName()); + @EJB + DataverseServiceBean dataverseService; + + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + + public StorageUse findByDvContainerId(Long dvObjectId) { + return em.createNamedQuery("StorageUse.findByDvContainerId", StorageUse.class).setParameter("dvObjectId", dvObjectId).getSingleResult(); + } + + public Long findStorageSizeByDvContainerId(Long dvObjectId) { + return em.createNamedQuery("StorageUse.findByteSizeByDvContainerId", Long.class).setParameter("dvObjectId", dvObjectId).getSingleResult(); + } + + public void incrementStorageSizeHierarchy(DvObjectContainer dvObject, Long filesize) { + incrementStorageSize(dvObject, filesize); + DvObjectContainer parent = dvObject.getOwner(); + while (parent != null) { + incrementStorageSize(parent, filesize); + parent = parent.getOwner(); + } + } + + /** + * Should this be done in a new transaction? + * @param dvObject + * @param filesize + */ + public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) { + StorageUse dvContainerSU = findByDvContainerId(dvObject.getId()); + if (dvContainerSU != null) { + // @todo: named query + dvContainerSU.incrementSizeInBytes(filesize); + em.merge(dvContainerSU); + } else { + dvContainerSU = new StorageUse(dvObject, filesize); + em.persist(dvContainerSU); + } + } + +} From 235b1b018a50fd099c983516b046c6847be41e48 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:44:47 -0500 Subject: [PATCH 192/546] A fix for the missing subtree filter query in the 2nd pass search query. #9635 --- .../search/SearchIncludeFragment.java | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index e5b5763efe6..1acd4b0f8a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -282,7 +282,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused SolrQueryResponse solrQueryResponse = null; SolrQueryResponse solrQueryResponseSecondPass = null; - List filterQueriesFinal = new ArrayList<>(); + List filterQueriesExtended = new ArrayList<>(); if (dataverseAlias != null) { this.dataverse = dataverseService.findByAlias(dataverseAlias); @@ -296,7 +296,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused * @todo centralize this into SearchServiceBean */ if (!isfilterQueryAlreadyInMap(filterDownToSubtree)){ - filterQueriesFinal.add(filterDownToSubtree); + filterQueriesExtended.add(filterDownToSubtree); } // this.dataverseSubtreeContext = dataversePath; } else { @@ -309,7 +309,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused this.setRootDv(true); } - filterQueriesFinal.addAll(filterQueries); + filterQueriesExtended.addAll(filterQueries); /** * Add type queries, for the types (Dataverses, Datasets, Datafiles) @@ -323,7 +323,9 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused selectedTypesHumanReadable = combine(arr, " OR "); if (!selectedTypesHumanReadable.isEmpty()) { typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")"; - } + } + List filterQueriesFinal = new ArrayList<>(); + filterQueriesFinal.addAll(filterQueriesExtended); filterQueriesFinal.add(typeFilterQuery); if (page <= 1) { @@ -343,10 +345,10 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused setSolrErrorEncountered(false); try { - logger.info("ATTENTION! query from user: " + query); - logger.info("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); - logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); - logger.info("ATTENTION! sort by: " + sortField); + logger.fine"ATTENTION! query from user: " + query); + logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); + logger.fine("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); + logger.fine("ATTENTION! sort by: " + sortField); /** * @todo Number of search results per page should be configurable - @@ -399,7 +401,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused // run a second search to obtain the numbers of the unselected types: List filterQueriesFinalSecondPass = new ArrayList<>(); - filterQueriesFinalSecondPass.addAll(filterQueries); + filterQueriesFinalSecondPass.addAll(filterQueriesExtended); arr = new String[3 - selectedTypesList.size()]; int c = 0; @@ -409,8 +411,8 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")"); - logger.info("second pass query: " + queryToPassToSolr); - logger.info("second pass filter query: "+filterQueriesFinalSecondPass.toString()); + logger.fine("second pass query: " + queryToPassToSolr); + logger.fine("second pass filter query: "+filterQueriesFinalSecondPass.toString()); solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false); From ceeeaecb9d222c2d2073713cdd839dac2ab4a304 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 19 Nov 2023 21:47:30 -0500 Subject: [PATCH 193/546] typo. #9635 --- .../edu/harvard/iq/dataverse/search/SearchIncludeFragment.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 1acd4b0f8a1..dd9cd78982a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -345,7 +345,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused setSolrErrorEncountered(false); try { - logger.fine"ATTENTION! query from user: " + query); + logger.fine("ATTENTION! query from user: " + query); logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); logger.fine("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); logger.fine("ATTENTION! sort by: " + sortField); From 5ecfd49c7397f04003c745fc78074e1fb1a9b0aa Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 20 Nov 2023 09:30:16 -0500 Subject: [PATCH 194/546] #9686 update metrics queries --- .../dataverse/metrics/MetricsServiceBean.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 79369207963..6b540595e77 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat + "from datasetversion\n" + "where versionstate='RELEASED' \n" + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n") - + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "") - + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n " : "") + + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "") + + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n " : "") + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n") + "group by dataset_id) as subq group by subq.date order by date;" @@ -156,11 +156,13 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat * @param d */ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { - String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n"; + + System.out.print("datasets to month..."); + String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -189,7 +191,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + "and \n" @@ -198,7 +200,6 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { +") sub_temp" ); logger.log(Level.FINE, "Metric query: {0}", query); - return (long) query.getSingleResult(); } @@ -212,6 +213,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + " from datasetversion\n" + " join dataset on dataset.id = datasetversion.dataset_id\n" + + " join dvobject on dataset.id = dvobject.id \n" + " where versionstate='RELEASED'\n" + " and dataset.harvestingclient_id is null\n" + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + @@ -225,6 +227,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // so the query is simpler: String harvestOriginClause = "(\n" + " datasetversion.dataset_id = dataset.id\n" + + " dvobject.id = dataset.id \n" + " AND dataset.harvestingclient_id IS NOT null \n" + " AND date_trunc('month', datasetversion.createtime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + ")\n"; @@ -253,7 +256,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio + "ORDER BY count(dataset.id) desc;" ); logger.log(Level.FINE, "Metric query: {0}", query); - + System.out.print("by sub to month: " + query); return query.getResultList(); } @@ -616,7 +619,7 @@ public String returnUnexpiredCacheDayBased(String metricName, String days, Strin public String returnUnexpiredCacheMonthly(String metricName, String yyyymm, String dataLocation, Dataverse d) { Metric queriedMetric = getMetric(metricName, dataLocation, yyyymm, d); - + System.out.print("returnUnexpiredCacheMonthly: " + queriedMetric); if (!doWeQueryAgainMonthly(queriedMetric)) { return queriedMetric.getValueJson(); } From f69c22982aeae57fdfb57607e06dfad628123b45 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 20 Nov 2023 09:33:06 -0500 Subject: [PATCH 195/546] #9686 update metrics IT --- src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java index e3328eefb4a..fa05a23b675 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java @@ -30,7 +30,7 @@ public static void cleanUpClass() { @Test public void testGetDataversesToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-11"; // yyyymm = null; Response response = UtilIT.metricsDataversesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -54,7 +54,7 @@ public void testGetDataversesToMonth() { @Test public void testGetDatasetsToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-11"; // yyyymm = null; Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -77,7 +77,7 @@ public void testGetDatasetsToMonth() { @Test public void testGetFilesToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-11"; // yyyymm = null; Response response = UtilIT.metricsFilesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -100,7 +100,7 @@ public void testGetFilesToMonth() { @Test public void testGetDownloadsToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-11"; // yyyymm = null; Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null); String precache = response.prettyPrint(); From e4ede35ea8a57afc8830dc63619bed3b660da8ff Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 20 Nov 2023 09:37:27 -0500 Subject: [PATCH 196/546] #9464 fix logger reference --- .../engine/command/impl/ValidateDatasetJsonCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java index ae1a89c3661..619740ddd89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java @@ -21,7 +21,7 @@ @RequiredPermissions(Permission.AddDataset) public class ValidateDatasetJsonCommand extends AbstractCommand { - private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName()); + private static final Logger logger = Logger.getLogger(ValidateDatasetJsonCommand.class.getCanonicalName()); private final Dataverse dataverse; private final String datasetJson; From d30ecfda14bd4adcafced8486d58507aba12c55f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 25 Oct 2023 10:56:14 -0400 Subject: [PATCH 197/546] add S3 tests, LocalStack, MinIO #6783 Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. See S3AccessIT which executes API (end to end) tests. In addition, a new integration test test class (not an API test, the new kind launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. Note that the format of docker-compose-dev.yml had to change to allow for JVM options to be added. Finally, docs were improved for listing and setting stores via API. --- conf/localstack/buckets.sh | 3 + doc/release-notes/6783-s3-tests.md | 3 + .../source/admin/dataverses-datasets.rst | 4 + docker-compose-dev.yml | 78 +++++- pom.xml | 5 + .../harvard/iq/dataverse/api/S3AccessIT.java | 228 +++++++++++++++--- .../dataaccess/S3AccessIOLocalstackIT.java | 153 ++++++++++++ 7 files changed, 436 insertions(+), 38 deletions(-) create mode 100755 conf/localstack/buckets.sh create mode 100644 doc/release-notes/6783-s3-tests.md create mode 100644 src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOLocalstackIT.java diff --git a/conf/localstack/buckets.sh b/conf/localstack/buckets.sh new file mode 100755 index 00000000000..fe940d9890d --- /dev/null +++ b/conf/localstack/buckets.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# https://stackoverflow.com/questions/53619901/auto-create-s3-buckets-on-localstack +awslocal s3 mb s3://mybucket diff --git a/doc/release-notes/6783-s3-tests.md b/doc/release-notes/6783-s3-tests.md new file mode 100644 index 00000000000..1febb87aaed --- /dev/null +++ b/doc/release-notes/6783-s3-tests.md @@ -0,0 +1,3 @@ +Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. See S3AccessIT which executes API (end to end) tests. + +In addition, a new integration test test class (not an API test, the new kind launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index 170807d3d67..37494c57fa1 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -53,11 +53,15 @@ Configure a Dataverse Collection to Store All New Files in a Specific File Store To direct new files (uploaded when datasets are created or edited) for all datasets in a given Dataverse collection, the store can be specified via the API as shown below, or by editing the 'General Information' for a Dataverse collection on the Dataverse collection page. Only accessible to superusers. :: curl -H "X-Dataverse-key: $API_TOKEN" -X PUT -d $storageDriverLabel http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver + +(Note that for ``dataverse.files.store1.label=MyLabel``, you should pass ``MyLabel``.) The current driver can be seen using:: curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver +(Note that for ``dataverse.files.store1.label=MyLabel``, ``store1`` will be returned.) + and can be reset to the default store with:: curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index bb0a4c95b12..769c24fb3a5 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -9,16 +9,38 @@ services: restart: on-failure user: payara environment: - - DATAVERSE_DB_HOST=postgres - - DATAVERSE_DB_PASSWORD=secret - - DATAVERSE_DB_USER=${DATAVERSE_DB_USER} - - ENABLE_JDWP=1 - - DATAVERSE_FEATURE_API_BEARER_AUTH=1 - - DATAVERSE_AUTH_OIDC_ENABLED=1 - - DATAVERSE_AUTH_OIDC_CLIENT_ID=test - - DATAVERSE_AUTH_OIDC_CLIENT_SECRET=94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 - - DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL=http://keycloak.mydomain.com:8090/realms/test - - DATAVERSE_JSF_REFRESH_PERIOD=1 + DATAVERSE_DB_HOST: postgres + DATAVERSE_DB_PASSWORD: secret + DATAVERSE_DB_USER: ${DATAVERSE_DB_USER} + ENABLE_JDWP: "1" + DATAVERSE_FEATURE_API_BEARER_AUTH: "1" + DATAVERSE_AUTH_OIDC_ENABLED: "1" + DATAVERSE_AUTH_OIDC_CLIENT_ID: test + DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 + DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test + DATAVERSE_JSF_REFRESH_PERIOD: "1" + JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 + -Ddataverse.files.file1.type=file + -Ddataverse.files.file1.label=Filesystem + -Ddataverse.files.file1.directory=${STORAGE_DIR}/store + -Ddataverse.files.localstack1.type=s3 + -Ddataverse.files.localstack1.label=LocalStack + -Ddataverse.files.localstack1.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack1.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack1.bucket-name=mybucket + -Ddataverse.files.localstack1.path-style-access=true + -Ddataverse.files.localstack1.upload-redirect=false + -Ddataverse.files.localstack1.access-key=default + -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.files.minio1.type=s3 + -Ddataverse.files.minio1.label=MinIO + -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000 + -Ddataverse.files.minio1.custom-endpoint-region=us-east-1 + -Ddataverse.files.minio1.bucket-name=mybucket + -Ddataverse.files.minio1.path-style-access=true + -Ddataverse.files.minio1.upload-redirect=false + -Ddataverse.files.minio1.access-key=minioadmin + -Ddataverse.files.minio1.secret-key=minioadmin ports: - "8080:8080" # HTTP (Dataverse Application) - "4848:4848" # HTTP (Payara Admin Console) @@ -156,6 +178,42 @@ services: networks: - dataverse + dev_localstack: + container_name: "dev_localstack" + hostname: "localstack" + image: localstack/localstack:2.3.2 + restart: on-failure + ports: + - "127.0.0.1:4566:4566" + environment: + - DEBUG=${DEBUG-} + - DOCKER_HOST=unix:///var/run/docker.sock + - HOSTNAME_EXTERNAL=localstack + networks: + - dataverse + volumes: + - ./conf/localstack:/etc/localstack/init/ready.d + tmpfs: + - /localstack:mode=770,size=128M,uid=1000,gid=1000 + + dev_minio: + container_name: "dev_minio" + hostname: "minio" + image: minio/minio + restart: on-failure + ports: + - "9000:9000" + - "9001:9001" + networks: + - dataverse + volumes: + - minio_storage:/data + environment: + # these are the defaults but are here for clarity + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + command: server /data + networks: dataverse: driver: bridge diff --git a/pom.xml b/pom.xml index 4d10073334f..34b0ad2e835 100644 --- a/pom.xml +++ b/pom.xml @@ -612,6 +612,11 @@ 3.0.0 test + + org.testcontainers + localstack + test + From 4ad06ba1af38cf84f5b639a605eecaf95a4fe8b1 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 30 Nov 2023 15:54:20 -0500 Subject: [PATCH 280/546] rename previewshavefailed to previewimagefail #9506 This matches previewimageavailable, also in dvobject. Plus it's clear we aren't talking about shaving. :) --- .../edu/harvard/iq/dataverse/DataFileServiceBean.java | 2 +- .../iq/dataverse/DatasetVersionServiceBean.java | 4 ++-- src/main/java/edu/harvard/iq/dataverse/DvObject.java | 10 +++++----- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- .../iq/dataverse/dataaccess/ImageThumbConverter.java | 4 ++-- .../migration/V6.0.0.5__9506-track-thumb-failures.sql | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index fae95f12a0c..446c66e5a8b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -987,7 +987,7 @@ public boolean isThumbnailAvailable (DataFile file) { this.save(file); return true; } - file.setPreviewsHaveFailed(true); + file.setPreviewImageFail(true); this.save(file); return false; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index d209f7d9e26..1ee517c9831 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -825,7 +825,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - + "AND o.previewshavefailed = false " + + "AND o.previewimagefail = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype LIKE 'image/%' " @@ -859,7 +859,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - + "AND o.previewshavefailed = false " + + "AND o.previewimagefail = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + "AND df.contenttype = 'application/pdf' " diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 12f0b63b3a1..c6d4a73bfd9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -191,14 +191,14 @@ public void setPreviewImageAvailable(boolean status) { * real failure in generating the thumbnail. In both cases, we won't want to try * again every time the preview/thumbnail is requested for a view. */ - private boolean previewsHaveFailed; + private boolean previewImageFail; - public boolean isPreviewsHaveFailed() { - return previewsHaveFailed; + public boolean isPreviewImageFail() { + return previewImageFail; } - public void setPreviewsHaveFailed(boolean previewsHaveFailed) { - this.previewsHaveFailed = previewsHaveFailed; + public void setPreviewImageFail(boolean previewImageFail) { + this.previewImageFail = previewImageFail; } public Timestamp getModificationTime() { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 2c2f49a0444..b1d31f8d44b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2429,7 +2429,7 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur @DELETE @Path("/clearThumbnailFailureFlag") public Response clearThumbnailFailureFlag() { - em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE").executeUpdate(); + em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE").executeUpdate(); return ok("Thumnail Failure Flags cleared."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index febf659b71a..2de37174a3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -119,9 +119,9 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s } private static boolean generateThumbnail(DataFile file, StorageIO storageIO, int size) { - logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); + logger.log(Level.FINE, (file.isPreviewImageFail() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); // Don't try to generate if there have been failures: - if (!file.isPreviewsHaveFailed()) { + if (!file.isPreviewImageFail()) { boolean thumbnailGenerated = false; if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { thumbnailGenerated = generateImageThumbnail(storageIO, size); diff --git a/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql index 9b12d27db91..156960d2011 100644 --- a/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql +++ b/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql @@ -1 +1 @@ -ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewshavefailed BOOLEAN DEFAULT FALSE; \ No newline at end of file +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewimagefail BOOLEAN DEFAULT FALSE; From 7148158dec36576c33c1cbc96143128769dd938a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 30 Nov 2023 15:56:43 -0500 Subject: [PATCH 281/546] add tests #9506 --- .../java/edu/harvard/iq/dataverse/api/AdminIT.java | 10 ++++++++++ .../java/edu/harvard/iq/dataverse/api/UtilIT.java | 14 +++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 0c5de662e8a..c29c8619d8c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -818,6 +818,16 @@ public void testLoadMetadataBlock_ErrorHandling() { message ); } + @Test + public void testClearThumbnailFailureFlag(){ + Response nonExistentFile = UtilIT.clearThumbnailFailureFlag(Long.MAX_VALUE); + nonExistentFile.prettyPrint(); + nonExistentFile.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + + Response clearAllFlags = UtilIT.clearThumbnailFailureFlags(); + clearAllFlags.prettyPrint(); + clearAllFlags.then().assertThat().statusCode(OK.getStatusCode()); + } @Test public void testBannerMessages(){ diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9b264086c27..58edbae18e0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -223,7 +223,19 @@ public static Response validateDataFileHashValue(String fileId, String apiToken .post("/api/admin/validateDataFileHashValue/" + fileId + "?key=" + apiToken); return response; } - + + public static Response clearThumbnailFailureFlags() { + Response response = given() + .delete("/api/admin/clearThumbnailFailureFlag"); + return response; + } + + public static Response clearThumbnailFailureFlag(long fileId) { + Response response = given() + .delete("/api/admin/clearThumbnailFailureFlag/" + fileId); + return response; + } + private static String getAuthenticatedUserAsJsonString(String persistentUserId, String firstName, String lastName, String authenticationProviderId, String identifier) { JsonObjectBuilder builder = Json.createObjectBuilder(); builder.add("authenticationProviderId", authenticationProviderId); From 67502ca2326b0536077ad96eb0fe497ca70f37f6 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 30 Nov 2023 15:58:18 -0500 Subject: [PATCH 282/546] fix typos #9506 --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index b1d31f8d44b..1445db81e4c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2430,7 +2430,7 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur @Path("/clearThumbnailFailureFlag") public Response clearThumbnailFailureFlag() { em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE").executeUpdate(); - return ok("Thumnail Failure Flags cleared."); + return ok("Thumbnail Failure Flags cleared."); } @DELETE @@ -2441,7 +2441,7 @@ public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String file Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE where id = ?"); deleteQuery.setParameter(1, df.getId()); deleteQuery.executeUpdate(); - return ok("Thumnail Failure Flag cleared for file id=: " + df.getId() + "."); + return ok("Thumbnail Failure Flag cleared for file id=: " + df.getId() + "."); } catch (WrappedResponse r) { logger.info("Could not find file with the id: " + fileId); return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId); From 82f0bc0eef833388b3e20bf48fe8bb46163640ee Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 30 Nov 2023 15:59:05 -0500 Subject: [PATCH 283/546] one more rename to previewimagefail #9506 This should have been part of 4ad06ba1a. --- src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index 1445db81e4c..4cb0521d218 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -2438,7 +2438,7 @@ public Response clearThumbnailFailureFlag() { public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String fileId) { try { DataFile df = findDataFileOrDie(fileId); - Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE where id = ?"); + Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE where id = ?"); deleteQuery.setParameter(1, df.getId()); deleteQuery.executeUpdate(); return ok("Thumbnail Failure Flag cleared for file id=: " + df.getId() + "."); From de2f9a4f6beaad2e34249616dd39748c29e15701 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 30 Nov 2023 16:37:35 -0500 Subject: [PATCH 284/546] popup separate tab for single file download transfer --- .../iq/dataverse/FileDownloadServiceBean.java | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 7a03f1a35dc..ca3f5b4bded 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -20,6 +20,8 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; @@ -310,13 +312,19 @@ private void redirectToCustomZipDownloadService(String customZipServiceUrl, Stri } } - private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, Long fileMetadataId) { - String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, fileMetadataId); - logger.fine("Redirecting to file download url: " + fileDownloadUrl); - try { - FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl); - } catch (IOException ex) { - logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex); + private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, + Long fileMetadataId) { + String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, + fileMetadataId); + if (downloadType.equals("GlobusTransfer")) { + PrimeFaces.current().executeScript(URLTokenUtil.getScriptForUrl(fileDownloadUrl)); + } else { + logger.fine("Redirecting to file download url: " + fileDownloadUrl); + try { + FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl); + } catch (IOException ex) { + logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex); + } } } From c82064ace53bcbf5e8b04a24f916fa333f863c9c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 30 Nov 2023 16:38:17 -0500 Subject: [PATCH 285/546] fix old label in popup required case --- src/main/webapp/file-download-button-fragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 8ef2af40431..318aab1454e 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -80,7 +80,7 @@ - GT: #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} + #{bundle['file.globus.of']} #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} From 2644faee02f7001e51d19e474e3ca5b1b1264302 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 30 Nov 2023 18:03:29 -0500 Subject: [PATCH 286/546] Rearranges the code that updates the Storage Use records to reflect the size of the saved content. #8549 --- .../dataverse/ingest/IngestServiceBean.java | 120 +++++++++++------- 1 file changed, 76 insertions(+), 44 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 9b3ddd228e9..5efb4c06f48 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -177,7 +177,7 @@ public class IngestServiceBean { // It must be called before we attempt to permanently save the files in // the database by calling the Save command on the dataset and/or version. - // There is way too much going on in this method. :( + // !! There is way too much going on in this method. :( !! // @todo: Is this method a good candidate for turning into a dedicated Command? public List saveAndAddFilesToDataset(DatasetVersion version, @@ -195,6 +195,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, // renamed FOOBAR-1.txt back to FOOBAR.txt... IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles, fileToReplace); Dataset dataset = version.getDataset(); + long totalBytesSaved = 0L; if (systemConfig.isStorageQuotasEnforced()) { // Check if this dataset is subject to any storage quotas: @@ -205,6 +206,9 @@ public List saveAndAddFilesToDataset(DatasetVersion version, boolean unattached = false; boolean savedSuccess = false; if (dataFile.getOwner() == null) { + // is it ever "unattached"? + // do we ever call this method with dataFile.getOwner() != null? + // - we really shouldn't be, either. unattached = true; dataFile.setOwner(dataset); } @@ -230,31 +234,38 @@ public List saveAndAddFilesToDataset(DatasetVersion version, dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation); logger.fine("Successfully created a new storageIO object."); - /* - * This commented-out code demonstrates how to copy bytes from a local - * InputStream (or a readChannel) into the writable byte channel of a Dataverse - * DataAccessIO object: + /** + * This commented-out code demonstrates how to copy + * bytes from a local InputStream (or a readChannel) + * into the writable byte channel of a Dataverse + * DataAccessIO object: */ - /* - * storageIO.open(DataAccessOption.WRITE_ACCESS); - * - * writeChannel = storageIO.getWriteChannel(); readChannel = new - * FileInputStream(tempLocationPath.toFile()).getChannel(); - * - * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while ( - * start < readChannel.size() ) { readChannel.transferTo(start, - * bytesPerIteration, writeChannel); start += bytesPerIteration; } + /** + * storageIO.open(DataAccessOption.WRITE_ACCESS); + * + * writeChannel = storageIO.getWriteChannel(); + * readChannel = new + * FileInputStream(tempLocationPath.toFile()).getChannel(); + * + * long bytesPerIteration = 16 * 1024; // 16K bytes long + * start = 0; + * while ( start < readChannel.size() ) { + * readChannel.transferTo(start, bytesPerIteration, writeChannel); start += bytesPerIteration; + * } */ - /* - * But it's easier to use this convenience method from the DataAccessIO: - * - * (if the underlying storage method for this file is local filesystem, the - * DataAccessIO will simply copy the file using Files.copy, like this: - * - * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), - * StandardCopyOption.REPLACE_EXISTING); + /** + * But it's easier to use this convenience method from + * the DataAccessIO: + * + * (if the underlying storage method for this file is + * local filesystem, the DataAccessIO will simply copy + * the file using Files.copy, like this: + * + * Files.copy(tempLocationPath, + * storageIO.getFileSystemLocation(), + * StandardCopyOption.REPLACE_EXISTING); */ dataAccess.savePath(tempLocationPath); @@ -265,7 +276,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, savedSuccess = true; logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); - // TODO: reformat this file to remove the many tabs added in cc08330 + // TODO: reformat this file to remove the many tabs added in cc08330 - done, I think? extractMetadataNcml(dataFile, tempLocationPath); } catch (IOException ioex) { @@ -375,6 +386,15 @@ public List saveAndAddFilesToDataset(DatasetVersion version, if (savedSuccess) { if (uploadSessionQuota != null) { + // It may be worth considering refreshing the quota here, + // and incrementing the Storage Use record for + // all the parent objects in real time, as + // *each* individual file is being saved. I experimented + // with that, but decided against it for performance + // reasons. But yes, there may be some edge case where + // parallel multi-file uploads can end up being able + // to save 2X worth the quota that was available at the + // beginning of each session. if (confirmedFileSize > uploadSessionQuota.getRemainingQuotaInBytes()) { savedSuccess = false; logger.warning("file size over quota limit, skipping"); @@ -382,7 +402,6 @@ public List saveAndAddFilesToDataset(DatasetVersion version, // this (potentially partial) failure to the user. //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(confirmedFileSize), bytesToHumanReadable(storageQuotaLimit))); } else { - // Adjust quota: logger.info("Setting total usage in bytes to " + (uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize)); uploadSessionQuota.setTotalUsageInBytes(uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize); @@ -390,19 +409,12 @@ public List saveAndAddFilesToDataset(DatasetVersion version, } // ... unless we had to reject the file just now because of - // the quota limits, increment the storage use record(s): + // the quota limits, count the number of bytes saved for the + // purposes of incrementing the total storage of the parent + // DvObjectContainers: if (savedSuccess) { - // Update storage use for all the parent dvobjects: - // @todo: Do we want to do this after after *each* file is saved? - there may be - // quite a few files being saved here all at once. We could alternatively - // perform this update only once, after this loop is completed (are there any - // risks/accuracy loss?) - // This update is performed with a direct native query that - // is supposed to be quite fast. But still. - logger.info("Incrementing recorded storage use by " + confirmedFileSize + " bytes for dataset " + dataset.getId()); - // (@todo: need to consider what happens when this code is called on Create?) - storageUseService.incrementStorageSizeRecursively(dataset.getId(), confirmedFileSize); + totalBytesSaved += confirmedFileSize; } } @@ -425,12 +437,14 @@ public List saveAndAddFilesToDataset(DatasetVersion version, boolean metadataExtracted = false; boolean metadataExtractedFromNetcdf = false; if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) { - /* - * Note that we don't try to ingest the file right away - instead we mark it as - * "scheduled for ingest", then at the end of the save process it will be queued - * for async. ingest in the background. In the meantime, the file will be - * ingested as a regular, non-tabular file, and appear as such to the user, - * until the ingest job is finished with the Ingest Service. + /** + * Note that we don't try to ingest the file right away + * - instead we mark it as "scheduled for ingest", then + * at the end of the save process it will be queued for + * async. ingest in the background. In the meantime, the + * file will be ingested as a regular, non-tabular file, + * and appear as such to the user, until the ingest job + * is finished with the Ingest Service. */ dataFile.SetIngestScheduled(); } else if (fileMetadataExtractable(dataFile)) { @@ -488,6 +502,10 @@ public List saveAndAddFilesToDataset(DatasetVersion version, // dataset.getGlobalId()); // Make sure the file is attached to the dataset and to the version, if this // hasn't been done yet: + // @todo: but shouldn't we be doing the reverse if we haven't been + // able to save the file? - disconnect it from the dataset and + // the version?? - L.A. 2023 + // (that said, is there *ever* a case where dataFile.getOwner() != null ?) if (dataFile.getOwner() == null) { dataFile.setOwner(dataset); @@ -503,8 +521,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, DataFileCategory dataFileCategory = dfcIt.next(); if (dataFileCategory.getDataset() == null) { - DataFileCategory newCategory = dataset - .getCategoryByName(dataFileCategory.getName()); + DataFileCategory newCategory = dataset.getCategoryByName(dataFileCategory.getName()); if (newCategory != null) { newCategory.addFileMetadata(dataFile.getFileMetadata()); // dataFileCategory = newCategory; @@ -516,10 +533,25 @@ public List saveAndAddFilesToDataset(DatasetVersion version, } } } + + // Hmm. Noticing that the following two things - adding the + // files to the return list were being + // done outside of this "if (savedSuccess)" block. I'm pretty + // sure that was wrong. - L.A. 11-30-2023 + ret.add(dataFile); + // (unless that is that return value isn't used for anything - ?) } - ret.add(dataFile); } + // Update storage use for all the parent dvobjects: + logger.info("Incrementing recorded storage use by " + totalBytesSaved + " bytes for dataset " + dataset.getId()); + // Q. Need to consider what happens when this code is called on Create? + // A. It works on create as well, yes. (the recursive increment + // query in the method below does need the parent dataset to + // have the database id. But even if these files have been + // uploaded on the Create form, we first save the dataset, and + // then add the files to it. - L.A. + storageUseService.incrementStorageSizeRecursively(dataset.getId(), totalBytesSaved); } return ret; From dc567848bdfcc9647d0779c01bb57f93ab593d89 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 30 Nov 2023 19:10:43 -0500 Subject: [PATCH 287/546] making the set/delete quota commands superuser-only (doh). #8549 --- .../impl/DeleteCollectionQuotaCommand.java | 13 ++++++++++++- .../command/impl/SetCollectionQuotaCommand.java | 16 +++++++++++++--- src/main/java/propertyFiles/Bundle.properties | 1 + .../edu/harvard/iq/dataverse/api/FilesIT.java | 3 +++ 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java index 5fcbad929a9..bdeb9c6e8cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java @@ -6,20 +6,25 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.logging.Logger; /** * * @author landreev + * + * A superuser-only command: */ -@RequiredPermissions(Permission.ManageDataversePermissions) +@RequiredPermissions({}) public class DeleteCollectionQuotaCommand extends AbstractVoidCommand { private static final Logger logger = Logger.getLogger(DeleteCollectionQuotaCommand.class.getCanonicalName()); @@ -33,6 +38,12 @@ public DeleteCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) @Override public void executeImpl(CommandContext ctxt) throws CommandException { + // first check if user is a superuser + if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) { + throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"), + this, null, targetDataverse); + } + if (targetDataverse == null) { throw new IllegalCommandException("", this); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java index a134cbefdb9..6b0d1bf313a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; @@ -13,14 +14,18 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.logging.Logger; /** * * @author landreev + * + * A superuser-only command: */ -@RequiredPermissions(Permission.ManageDataversePermissions) +@RequiredPermissions({}) public class SetCollectionQuotaCommand extends AbstractVoidCommand { private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName()); @@ -36,13 +41,18 @@ public SetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target, Lo @Override public void executeImpl(CommandContext ctxt) throws CommandException { + // Check if user is a superuser: + if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) { + throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"), + this, null, dataverse); + } if (dataverse == null) { - throw new IllegalCommandException("", this); + throw new IllegalCommandException("Must specify valid collection", this); } if (allocation == null) { - throw new IllegalCommandException("", this); + throw new IllegalCommandException("Must specify valid allocation in bytes", this); } StorageQuota storageQuota = dataverse.getStorageQuota(); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 748b674a4e1..5033426175c 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -925,6 +925,7 @@ dataverse.storage.quota.allocation=Total quota allocation for this collection: { dataverse.storage.quota.notdefined=No quota defined for this collection dataverse.storage.quota.updated=Storage quota successfully set for the collection dataverse.storage.quota.deleted=Storage quota successfully disabled for the collection +dataverse.storage.quota.superusersonly=Only superusers can change storage quotas. dataverse.storage.use=Total recorded size of the files stored in this collection (user-uploaded files plus the versions in the archival tab-delimited format when applicable): {0} bytes dataverse.datasize.ioerror=Fatal IO error while trying to determine the total size of the files stored in the dataverse. Please report this error to the Dataverse administrator. dataverse.inherited=(inherited from enclosing Dataverse) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index e391e17d8d5..915f82a6de2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -2375,6 +2375,9 @@ public void testCollectionStorageQuotas() { Response createUser = UtilIT.createRandomUser(); createUser.then().assertThat().statusCode(OK.getStatusCode()); String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + Response makeSuperUser = UtilIT.makeSuperUser(username); + assertEquals(200, makeSuperUser.getStatusCode()); Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); From f4eee659021dfaab4dfa9c13e761b7c1875281c5 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 30 Nov 2023 19:18:15 -0500 Subject: [PATCH 288/546] removing the license template stubs (#8549) --- .../engine/command/impl/DeleteCollectionQuotaCommand.java | 5 ----- .../engine/command/impl/GetCollectionStorageUseCommand.java | 4 ---- .../engine/command/impl/SetCollectionQuotaCommand.java | 6 ------ .../edu/harvard/iq/dataverse/storageuse/StorageQuota.java | 4 ---- .../edu/harvard/iq/dataverse/storageuse/StorageUse.java | 4 ---- .../iq/dataverse/storageuse/StorageUseServiceBean.java | 4 ---- .../iq/dataverse/storageuse/UploadSessionQuotaLimit.java | 4 ---- 7 files changed, 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java index bdeb9c6e8cb..4015228366b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java @@ -1,11 +1,6 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java index 40b3128b80d..c30a5a34a81 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataverse; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java index 6b0d1bf313a..cf8fb6fd42e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java @@ -1,13 +1,7 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java index 0cfebe4167a..d00f7041e61 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.storageuse; import edu.harvard.iq.dataverse.DvObject; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java index 11a2a8b706c..240fba1037d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.storageuse; import edu.harvard.iq.dataverse.DvObject; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java index e92ba43e950..b542a7cd661 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.storageuse; import edu.harvard.iq.dataverse.DvObjectContainer; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java index 06bbe986f70..f7dac52e886 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.storageuse; /** From 538921061604e4daacd864f8ec3865d6d0642561 Mon Sep 17 00:00:00 2001 From: GPortas Date: Fri, 1 Dec 2023 14:21:35 +0000 Subject: [PATCH 289/546] Stash: working on new canDownloadAtLeastOneFile Datasets API endpoint --- .../iq/dataverse/PermissionServiceBean.java | 8 ++++++ .../harvard/iq/dataverse/api/Datasets.java | 14 +++++++++++ .../harvard/iq/dataverse/api/DatasetsIT.java | 25 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 6 +++++ 4 files changed, 53 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index a1de33a764e..9e6628617ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -837,4 +837,12 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio return false; } + public boolean canDownloadAtLeastOneFile(User requestUser, DatasetVersion datasetVersion) { + for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { + if (userOn(requestUser, fileMetadata.getDataFile()).has(Permission.DownloadFile)) { + return true; + } + } + return false; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index af6059cf882..a9cfefc33d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4134,4 +4134,18 @@ public Response getUserPermissionsOnDataset(@Context ContainerRequestContext crc jsonObjectBuilder.add("canDeleteDatasetDraft", permissionService.userOn(requestUser, dataset).has(Permission.DeleteDatasetDraft)); return ok(jsonObjectBuilder); } + + @GET + @AuthRequired + @Path("{id}/versions/{versionId}/canDownloadAtLeastOneFile") + public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response(req -> { + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false); + return ok(permissionService.canDownloadAtLeastOneFile(getRequestUser(crc), datasetVersion)); + }, getRequestUser(crc)); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index d20f1e8a58b..945b741a94b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -4121,4 +4121,29 @@ public void testGetUserPermissionsOnDataset() { Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getUserPermissionsOnDataset("testInvalidId", apiToken); getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); } + + @Test + public void testGetCanDownloadAtLeastOneFile() { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + // Call with valid dataset id + Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, apiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + boolean canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertTrue(canDownloadAtLeastOneFile); + + // Call with invalid dataset id + Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, apiToken); + getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9b264086c27..bf43733788a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3442,6 +3442,12 @@ static Response getUserPermissionsOnDataset(String datasetId, String apiToken) { .get("/api/datasets/" + datasetId + "/userPermissions"); } + static Response getCanDownloadAtLeastOneFile(String datasetId, String versionId, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/datasets/" + datasetId + "/versions/" + versionId + "/canDownloadAtLeastOneFile"); + } + static Response createFileEmbargo(Integer datasetId, Integer fileId, String dateAvailable, String apiToken) { JsonObjectBuilder jsonBuilder = Json.createObjectBuilder(); jsonBuilder.add("dateAvailable", dateAvailable); From 8ec0984a663e4daa5b60049c1ee8d51004ca452c Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 1 Dec 2023 09:26:39 -0500 Subject: [PATCH 290/546] add page on Jenkins #10101 --- doc/sphinx-guides/source/qa/index.md | 1 + doc/sphinx-guides/source/qa/jenkins.md | 44 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 doc/sphinx-guides/source/qa/jenkins.md diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md index 08deb7ee27d..6027f07574f 100644 --- a/doc/sphinx-guides/source/qa/index.md +++ b/doc/sphinx-guides/source/qa/index.md @@ -7,4 +7,5 @@ performance-tests.md manual-testing.md test-automation.md other-approaches.md +jenkins.md ``` diff --git a/doc/sphinx-guides/source/qa/jenkins.md b/doc/sphinx-guides/source/qa/jenkins.md new file mode 100644 index 00000000000..dbfec0d60d0 --- /dev/null +++ b/doc/sphinx-guides/source/qa/jenkins.md @@ -0,0 +1,44 @@ +# Jenkins + +```{contents} Contents: +:local: +:depth: 3 +``` + +## Introduction + +Jenkins is our primary tool for knowing if our API tests are passing. (Unit tests are executed locally by developers.) + +You can find our Jenkins installation at . + +Please note that while it has been open to the public in the past, it is currently firewalled off. We can poke a hole in the firewall for your IP address if necessary. Please get in touch. (You might also be interested in which is about restoring the ability of contributors to see if their pull requests are passing API tests or not.) + +## Jobs + +Jenkins is organized into jobs. We'll highlight a few. + +### IQSS-dataverse-develop + +, which we will refer to as the "develop" job runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it! + +You can get to this job from the README at . + +### IQSS-Dataverse-Develop-PR + + can be thought of as "PR jobs". It's a collection of jobs run on pull requests. Typically, you will navigate directly into the job (and it's particular build number) from a pull request. For example, from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). + +### guides.dataverse.org + + is what we use to build guides. See {doc}`/developers/making-releases` in the Developer Guide. + +## Checking if API Tests are Passing + +If API tests are failing, you should not merge the pull request. + +How can you know if API tests are passing? Here are the steps, by way of example. + +- From the pull request, navigate to the build. For example from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). +- You are now on the new "blue" interface for Jenkins. Click the button in the header called "go to classic" which should take you to (for example) . +- Click "Test Result". +- Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run. +- Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error. From f48f3a84a72b212d66a4bae1c1056e31dc8f7e52 Mon Sep 17 00:00:00 2001 From: GPortas Date: Fri, 1 Dec 2023 14:50:40 +0000 Subject: [PATCH 291/546] Fixed: DatasetVersionFilesServiceBean order by condition for type criteria --- .../DatasetVersionFilesServiceBean.java | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java index 78fd896c897..99c3c65e3b8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java @@ -260,22 +260,27 @@ private Predicate createSearchCriteriaPredicate(DatasetVersion datasetVersion, return criteriaBuilder.and(predicates.toArray(new Predicate[]{})); } - private Order createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder, - FileOrderCriteria orderCriteria, - Root fileMetadataRoot) { + private List createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder, + FileOrderCriteria orderCriteria, + Root fileMetadataRoot) { Path label = fileMetadataRoot.get("label"); Path dataFile = fileMetadataRoot.get("dataFile"); Path publicationDate = dataFile.get("publicationDate"); Path createDate = dataFile.get("createDate"); Expression orderByLifetimeExpression = criteriaBuilder.selectCase().when(publicationDate.isNotNull(), publicationDate).otherwise(createDate); - return switch (orderCriteria) { - case NameZA -> criteriaBuilder.desc(label); - case Newest -> criteriaBuilder.desc(orderByLifetimeExpression); - case Oldest -> criteriaBuilder.asc(orderByLifetimeExpression); - case Size -> criteriaBuilder.asc(dataFile.get("filesize")); - case Type -> criteriaBuilder.asc(dataFile.get("contentType")); - default -> criteriaBuilder.asc(label); - }; + List orderList = new ArrayList<>(); + switch (orderCriteria) { + case NameZA -> orderList.add(criteriaBuilder.desc(label)); + case Newest -> orderList.add(criteriaBuilder.desc(orderByLifetimeExpression)); + case Oldest -> orderList.add(criteriaBuilder.asc(orderByLifetimeExpression)); + case Size -> orderList.add(criteriaBuilder.asc(dataFile.get("filesize"))); + case Type -> { + orderList.add(criteriaBuilder.asc(dataFile.get("contentType"))); + orderList.add(criteriaBuilder.asc(label)); + } + default -> orderList.add(criteriaBuilder.asc(label)); + } + return orderList; } private long getOriginalTabularFilesSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { From a142ac82e7315370755f11245c38f388f7580b12 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 1 Dec 2023 12:51:55 -0500 Subject: [PATCH 292/546] Adds description about the "go to classic" button --- doc/sphinx-guides/source/qa/jenkins.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/qa/jenkins.md b/doc/sphinx-guides/source/qa/jenkins.md index dbfec0d60d0..a4ca4d8688f 100644 --- a/doc/sphinx-guides/source/qa/jenkins.md +++ b/doc/sphinx-guides/source/qa/jenkins.md @@ -38,7 +38,7 @@ If API tests are failing, you should not merge the pull request. How can you know if API tests are passing? Here are the steps, by way of example. - From the pull request, navigate to the build. For example from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). -- You are now on the new "blue" interface for Jenkins. Click the button in the header called "go to classic" which should take you to (for example) . +- You are now on the new "blue" interface for Jenkins. Click the button with an arrow on the right side of the header called "go to classic" which should take you to (for example) . - Click "Test Result". - Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run. - Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error. From a29942bf4c8c78d7dee34d61fbb73f44b8ec699e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:08:26 -0500 Subject: [PATCH 293/546] add files not accessible by dataverse flag --- .../dataaccess/AbstractRemoteOverlayAccessIO.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java index 9de6bf69832..16defc26a4f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -48,6 +48,11 @@ public abstract class AbstractRemoteOverlayAccessIO extends static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; protected static final String REMOTE_STORE_NAME = "remote-store-name"; protected static final String REMOTE_STORE_URL = "remote-store-url"; + + // Whether Dataverse can access the file bytes + //Currently True for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits + static final String FILES_NOT_ACCESSIBLE_BY_DATAVERSE = "files-not-accessible-by-dataverse"; + protected StorageIO baseStore = null; protected String path = null; protected PoolingHttpClientConnectionManager cm = null; @@ -329,6 +334,10 @@ protected String getStoragePath() throws IOException { logger.fine("fullStoragePath: " + fullStoragePath); return fullStoragePath; } + + public static boolean isNotDataverseAccessible(String storeId) { + return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(storeId, FILES_NOT_ACCESSIBLE_BY_DATAVERSE)); + } From 0d758398b64521e65c0d0d90d963aeb7b01af42d Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:09:03 -0500 Subject: [PATCH 294/546] add Globus store to the normal file upload (as for the remote store) --- .../java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index 4a4d3f57f83..a1bcbe49327 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -263,7 +263,8 @@ public static StorageIO createNewStorageIO(T dvObject, S storageIO = new S3AccessIO<>(dvObject, null, storageDriverId); break; case REMOTE: - storageIO = createNewStorageIO(dvObject, storageTag, RemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; + case GLOBUS: + storageIO = createNewStorageIO(dvObject, storageTag, AbstractRemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; break; default: logger.warning("Could not find storage driver for: " + storageTag); From ce8bb6e97ff776777b642ceafb3c1fb7bae6129f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:10:28 -0500 Subject: [PATCH 295/546] add Globus as a download option in file table header requires changes to startGlobusTransfer in separate commit --- src/main/webapp/dataset.xhtml | 2 +- src/main/webapp/filesFragment.xhtml | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 2f76197e508..0b8983a7770 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -230,7 +230,7 @@
  • - +
  • diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index fbc48a0e884..3d28e3170f7 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -436,7 +436,7 @@
    + and !(DatasetPage.isVersionHasTabular()||DatasetPage.isVersionHasGlobus())}"> #{bundle.download}
    -
    + and (DatasetPage.isVersionHasTabular()||DatasetPage.isVersionHasGlobus())}">
    From 8e75a3e2f501b3f0e09fbc9cba9041c52f769737 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:11:56 -0500 Subject: [PATCH 296/546] Add logic for Globus transfer of some files --- .../edu/harvard/iq/dataverse/DatasetPage.java | 112 +++++++++++++----- 1 file changed, 81 insertions(+), 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index a663b8588ad..0b0d0a2e4f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -11,6 +11,9 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; @@ -361,6 +364,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { * other boolean. */ private boolean versionHasTabular = false; + private boolean versionHasGlobus = false; private boolean showIngestSuccess; @@ -2183,10 +2187,19 @@ private String init(boolean initFull) { // the total "originals" size of the dataset with direct custom queries; // then we'll be able to drop the lookup hint for DataTable from the // findDeep() method for the version and further speed up the lookup - // a little bit. + // a little bit. + boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); for (FileMetadata fmd : workingVersion.getFileMetadatas()) { - if (fmd.getDataFile().isTabularData()) { + DataFile df = fmd.getDataFile(); + if (df.isTabularData()) { versionHasTabular = true; + } + if(globusDownloadEnabled) { + if(GlobusAccessibleStore.isGlobusAccessible(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { + versionHasGlobus= true; + } + } + if(versionHasTabular &&(!globusDownloadEnabled || versionHasGlobus)) { break; } } @@ -2483,6 +2496,10 @@ private DefaultTreeNode createFileTreeNode(FileMetadata fileMetadata, TreeNode p public boolean isVersionHasTabular() { return versionHasTabular; } + + public boolean isVersionHasGlobus() { + return versionHasGlobus; + } public boolean isReadOnly() { return readOnly; @@ -3089,6 +3106,16 @@ public void setSelectedNonDownloadableFiles(List selectedNonDownlo this.selectedNonDownloadableFiles = selectedNonDownloadableFiles; } + private List selectedGlobusTransferableFiles; + + public List getSelectedGlobusTransferableFiles() { + return selectedGlobusTransferableFiles; + } + + public void setSelectedGlobusTransferableFiles(List selectedGlobusTransferableFiles) { + this.selectedGlobusTransferableFiles = selectedGlobusTransferableFiles; + } + public String getSizeOfDataset() { return DatasetUtil.getDownloadSize(workingVersion, false); } @@ -3247,8 +3274,8 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ } } - //if there are two or more files with a total size - //over the zip limit post a "too large" popup + //if there are two or more files, with a total size + //over the zip limit, post a "too large" popup if (bytes > settingsWrapper.getZipDownloadLimit() && selectedDownloadableFiles.size() > 1) { setValidateFilesOutcome("FailSize"); return false; @@ -3257,16 +3284,17 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ // If some of the files were restricted and we had to drop them off the // list, and NONE of the files are left on the downloadable list // - we show them a "you're out of luck" popup: - if (getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { + if (getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { setValidateFilesOutcome("FailRestricted"); return false; } - if (!getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { + if (!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty()) + && !getSelectedNonDownloadableFiles().isEmpty()) { setValidateFilesOutcome("Mixed"); return true; } - + //ToDo - should Mixed not trigger this? if (isTermsPopupRequired() || isGuestbookPopupRequiredAtDownload()) { setValidateFilesOutcome("GuestbookRequired"); } @@ -3302,12 +3330,25 @@ private boolean filterSelectedFiles(){ setSelectedNonDownloadableFiles(new ArrayList<>()); setSelectedRestrictedFiles(new ArrayList<>()); setSelectedUnrestrictedFiles(new ArrayList<>()); + setSelectedGlobusTransferableFiles(new ArrayList<>()); boolean someFiles = false; + boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); for (FileMetadata fmd : this.selectedFiles){ - if(this.fileDownloadHelper.canDownloadFile(fmd)){ + boolean downloadable=this.fileDownloadHelper.canDownloadFile(fmd); + + boolean globusTransferable = false; + if(globusDownloadEnabled) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId); + downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId); + } + if(downloadable){ getSelectedDownloadableFiles().add(fmd); someFiles=true; + } else if(globusTransferable) { + getSelectedGlobusTransferableFiles().add(fmd); + someFiles=true; } else { getSelectedNonDownloadableFiles().add(fmd); } @@ -5247,7 +5288,7 @@ public boolean isFileAccessRequestMultiButtonEnabled(){ } return false; } - +/* These appear to be unused - toDo - delete private Boolean downloadButtonAllEnabled = null; public boolean isDownloadAllButtonEnabled() { @@ -5276,7 +5317,7 @@ public boolean isDownloadSelectedButtonEnabled(){ } return false; } - +*/ public boolean isFileAccessRequestMultiSignUpButtonRequired(){ if (isSessionUserAuthenticated()){ return false; @@ -6277,28 +6318,37 @@ public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } - public void startGlobusTransfer() { - ApiToken apiToken = null; - User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); - } - if(fileMetadataForAction!=null) { - List downloadFMList = new ArrayList(1); - downloadFMList.add(fileMetadataForAction); - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); - } else { - if(getSelectedDownloadableFiles()!=null) { - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, getSelectedDownloadableFiles())); + public void startGlobusTransfer(boolean transferAll) { + if(transferAll) { + this.setSelectedFiles(workingVersion.getFileMetadatas()); + } + boolean validated = validateFilesForDownload(true); + if (validated) { + ApiToken apiToken = null; + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privUrl.getToken()); + } + if (fileMetadataForAction != null) { + List downloadFMList = new ArrayList(1); + downloadFMList.add(fileMetadataForAction); + PrimeFaces.current() + .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); } else { - //ToDo: For non-public, need the subset that are downloadable by the user - //ToDo: For mixed (some in backing store), need the ones in the globus store - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, workingVersion.getFileMetadatas())); + if (getSelectedGlobusTransferableFiles() != null) { + PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, + getSelectedGlobusTransferableFiles())); + } else { + // ToDo: For non-public, need the subset that are downloadable by the user + // ToDo: For mixed (some in backing store), need the ones in the globus store + PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, + workingVersion.getFileMetadatas())); + } } } } From 0e91e6ae59020991513add7e14e09c69641ee71e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:12:20 -0500 Subject: [PATCH 297/546] Convenience method to get store id for a file --- src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index df0c3e5a019..776d04e98cc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1797,5 +1797,11 @@ public static boolean isActivelyEmbargoed(List fmdList) { } return false; } + + + public static String getStorageDriver(DataFile dataFile) { + String storageIdentifier = dataFile.getStorageIdentifier(); + return storageIdentifier.substring(0, storageIdentifier.indexOf(DataAccess.SEPARATOR)); + } } From e5bf3001e39bf8362f9025e85cf3f6626baf15d0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:14:41 -0500 Subject: [PATCH 298/546] skip inaccessible files when doing validatation --- .../command/impl/FinalizeDatasetPublicationCommand.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 3da087addd9..89cfc732455 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -32,15 +32,13 @@ import java.util.logging.Logger; import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.util.FileUtil; import java.util.ArrayList; import java.util.concurrent.Future; import org.apache.solr.client.solrj.SolrServerException; -import jakarta.ejb.EJB; -import jakarta.inject.Inject; - /** * @@ -350,7 +348,8 @@ private void validateDataFiles(Dataset dataset, CommandContext ctxt) throws Comm // (the decision was made to validate all the files on every // major release; we can revisit the decision if there's any // indication that this makes publishing take significantly longer. - if (maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) { + String driverId = FileUtil.getStorageDriver(dataFile); + if(StorageIO.isDataverseAccessible(driverId) && maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) { FileUtil.validateDataFileChecksum(dataFile); } else { From 534c99bb0376aeaa25f2d9d54cbe68a8bfb3b6bc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:15:23 -0500 Subject: [PATCH 299/546] Convenience method re: store supports globus access --- .../iq/dataverse/dataaccess/GlobusAccessibleStore.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index aad1dab5eab..d827e40e807 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -58,4 +58,11 @@ public static String getGlobusToken(String storeId) { return StorageIO.getConfigParamForDriver(storeId, GLOBUS_TOKEN); } + public static boolean isGlobusAccessible(String storeId) { + if(StorageIO.getConfigParamForDriver(storeId, StorageIO.TYPE).equals(DataAccess.GLOBUS)) { + return true; + } + return false; + } + } From ca1a4f1267b2d52cd38054cca61fbddf6941522b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:16:12 -0500 Subject: [PATCH 300/546] Update to use new isNotDataverseAccessible method in getInputStream --- .../iq/dataverse/dataaccess/GlobusOverlayAccessIO.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 7ec1e2f9e73..3e72fa85d35 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -232,7 +232,9 @@ public long retrieveSizeFromMedia() { @Override public InputStream getInputStream() throws IOException { - if(Boolean.parseBoolean(getConfigParam("endpoint-maps-to-base-store"))) { + //Currently only supported when using an S3 store with the Globus S3Connector. + //ToDo: Support when using a managed Globus endpoint that supports http access + if(!AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(endpoint)) { return baseStore.getInputStream(); } else { throw new IOException("Not implemented"); From f39fa0715e81aafefd14c92c50171eb436a45491 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:17:03 -0500 Subject: [PATCH 301/546] Convenience method isDataverseAccessible --- .../edu/harvard/iq/dataverse/dataaccess/StorageIO.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index 14fc9254c59..51cdecf64a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -57,6 +57,8 @@ public abstract class StorageIO { static final String UPLOAD_REDIRECT = "upload-redirect"; static final String UPLOAD_OUT_OF_BAND = "upload-out-of-band"; protected static final String DOWNLOAD_REDIRECT = "download-redirect"; + protected static final String DATAVERSE_INACCESSIBLE = "dataverse-inaccessible"; + public StorageIO() { @@ -620,6 +622,11 @@ public static boolean isDirectUploadEnabled(String driverId) { || Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_OUT_OF_BAND)); } + //True by default, Stores (e.g. RemoteOverlay, Globus) can set this false to stop attempts to read bytes + public static boolean isDataverseAccessible(String driverId) { + return (true && !Boolean.parseBoolean(getConfigParamForDriver(driverId, DATAVERSE_INACCESSIBLE))); + } + // Check that storageIdentifier is consistent with store's config // False will prevent direct uploads static boolean isValidIdentifier(String driverId, String storageId) { From dc4580232dcfe698010cdc4c20fb77c19482484b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 14:18:05 -0500 Subject: [PATCH 302/546] use correct term (though up and down terms are the same) could also fix for native/http, but not for rsync --- src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index e40f55fedd8..3c6992f8ec3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -941,7 +941,7 @@ public boolean isHTTPDownload() { } public boolean isGlobusDownload() { - return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), false); + return getMethodAvailable(FileDownloadMethods.GLOBUS.toString(), false); } public boolean isGlobusFileDownload() { From 0bfbb10c355ea1ebc24d2d8bee928c50ca22db41 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 1 Dec 2023 16:59:38 -0500 Subject: [PATCH 303/546] "manage collections" guide entry. #8549 --- .../source/admin/collectionquotas.rst | 17 +++++++++++++++++ doc/sphinx-guides/source/admin/index.rst | 1 + doc/sphinx-guides/source/api/native-api.rst | 12 +++++++++++- .../iq/dataverse/storageuse/StorageUse.java | 3 +++ 4 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 doc/sphinx-guides/source/admin/collectionquotas.rst diff --git a/doc/sphinx-guides/source/admin/collectionquotas.rst b/doc/sphinx-guides/source/admin/collectionquotas.rst new file mode 100644 index 00000000000..883b6cf0c93 --- /dev/null +++ b/doc/sphinx-guides/source/admin/collectionquotas.rst @@ -0,0 +1,17 @@ +Storage Quotas for Collections +============================== + +Please note that this is a new and still experimental feature (as of Dataverse v6.1 release). + +Instance admins can now define storage quota limits for specific collections. These limits can be set, changed and/or deleted via the provided APIs (please see the :ref:`collection-storage-quotas` section of the :doc:`/api/native-api` guide). The Read version of the API is available to the individual collection admins (i.e., a collection owner can check on the quota configured for their collection), but only superusers can set, change or disable storage quotas. + +Storage quotas are *inherited* by subcollections. In other words, when storage use limit is set for a specific collection, it applies to all the datasets immediately under it and in its sub-collections, unless different quotas are defined there and so on. Each file added to any dataset in that hierarchy counts for the purposes of the quota limit defined for the top collection. A storage quota defined on a child sub-collection overrides whatever quota that may be defined on the parent, or inherited from an ancestor. + +For example, a collection ``A`` has the storage quota set to 10GB. It has 3 sub-collections, ``B``, ``C`` and ``D``. Users can keep uploading files into the datasets anywhere in this hierarchy until the combined size of 10GB is reached between them. However, if an admin has reasons to limit one of the sub-collections, ``B`` to 3GB only, that quota can be explicitly set there. This both limits the growth of ``B`` to 3GB, and also *guarantees* that allocation to it. I.e. the contributors to collection ``B`` will be able to keep adding data until the 3GB limit is reached, even after the parent collection ``A`` reaches the combined 10GB limit (at which point ``A`` and all its subcollections except for ``B`` will become read-only). + +We do not yet know whether this is going to be a popular, or needed use case - a child collection quota that is different from the quota it inherits from a parent. It is likely that for many instances it will be sufficient to be able to define quotas for collections and have them apply to all the child objects underneath. We will examine the response to this feature and consider making adjustments to this scheme based on it. We are already considering introducing other types of quotas, such as limits by users or specific storage volumes. + +Please note that only the sizes of the main datafiles and the archival tab-delimited format versions, as produced by the ingest process are counted for the purposes of enforcing the limits. Automatically generated "auxiliary" files, such as rescaled image thumbnails and metadata exports for datasets are not. + +When quotas are set and enforced, the users will be informed of the remaining storage allocation on the file upload page together with other upload and processing limits. + diff --git a/doc/sphinx-guides/source/admin/index.rst b/doc/sphinx-guides/source/admin/index.rst index ac81aa737a7..633842044b4 100755 --- a/doc/sphinx-guides/source/admin/index.rst +++ b/doc/sphinx-guides/source/admin/index.rst @@ -27,6 +27,7 @@ This guide documents the functionality only available to superusers (such as "da solr-search-index ip-groups mail-groups + collectionquotas monitoring reporting-tools-and-queries maintenance diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 234d5f37232..7bd334f6a95 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -763,7 +763,8 @@ Collection Storage Quotas curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota" -Will output the storage quota allocated (in bytes), or a message indicating that the quota is not defined for the collection. +Will output the storage quota allocated (in bytes), or a message indicating that the quota is not defined for the specific collection. The user identified by the API token must have the ``Manage`` permission on the collection. + To set or change the storage allocation quota for a collection: @@ -771,13 +772,22 @@ To set or change the storage allocation quota for a collection: curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota/$SIZE_IN_BYTES" +This is API is superuser-only. + + To delete a storage quota configured for a collection: .. code-block:: curl -X DELETE -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota" +This is API is superuser-only. + +Use the ``/settings`` API to enable or disable the enforcement of storage quotas that are defined across the instance via the following setting. For example, + +.. code-block:: + curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:UseStorageQuotas Datasets diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java index 240fba1037d..b777736dc8d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java @@ -7,10 +7,12 @@ import jakarta.persistence.GenerationType; import jakarta.persistence.GeneratedValue; import jakarta.persistence.Id; +import jakarta.persistence.Index; import jakarta.persistence.JoinColumn; import jakarta.persistence.NamedQueries; import jakarta.persistence.NamedQuery; import jakarta.persistence.OneToOne; +import jakarta.persistence.Table; import java.io.Serializable; /** @@ -23,6 +25,7 @@ @NamedQuery(name = "StorageUse.incrementByteSizeByDvContainerId", query = "UPDATE StorageUse su SET su.sizeInBytes = su.sizeInBytes +:fileSize WHERE su.dvObjectContainer.id =:dvObjectId") }) @Entity +@Table(indexes = {@Index(columnList="dvobjectcontainer_id")}) public class StorageUse implements Serializable { private static final long serialVersionUID = 1L; From 9af23d23d97413338ce2b800697b19970aca3dd5 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 1 Dec 2023 17:23:09 -0500 Subject: [PATCH 304/546] add mixed/other dialogs for transfer case --- .../edu/harvard/iq/dataverse/DatasetPage.java | 92 ++++++++++++------- src/main/java/propertyFiles/Bundle.properties | 6 +- src/main/webapp/dataset.xhtml | 48 ++++++++-- src/main/webapp/filesFragment.xhtml | 10 +- 4 files changed, 110 insertions(+), 46 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 0b0d0a2e4f5..47a32987b0b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -365,6 +365,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { */ private boolean versionHasTabular = false; private boolean versionHasGlobus = false; + private boolean globusTransferRequested = false; private boolean showIngestSuccess; @@ -3116,6 +3117,16 @@ public void setSelectedGlobusTransferableFiles(List selectedGlobus this.selectedGlobusTransferableFiles = selectedGlobusTransferableFiles; } + private List selectedNonGlobusTransferableFiles; + + public List getSelectedNonGlobusTransferableFiles() { + return selectedNonGlobusTransferableFiles; + } + + public void setSelectedNonGlobusTransferableFiles(List selectedNonGlobusTransferableFiles) { + this.selectedNonGlobusTransferableFiles = selectedNonGlobusTransferableFiles; + } + public String getSizeOfDataset() { return DatasetUtil.getDownloadSize(workingVersion, false); } @@ -3227,7 +3238,7 @@ private void startDownload(boolean downloadOriginal){ boolean guestbookRequired = isDownloadPopupRequired(); boolean validate = validateFilesForDownload(downloadOriginal); if (validate) { - updateGuestbookResponse(guestbookRequired, downloadOriginal); + updateGuestbookResponse(guestbookRequired, downloadOriginal, false); if(!guestbookRequired && !getValidateFilesOutcome().equals("Mixed")){ startMultipleFileDownload(); } @@ -3289,8 +3300,9 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ return false; } - if (!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty()) - && !getSelectedNonDownloadableFiles().isEmpty()) { + //Some are selected and there are non-downloadable ones or there are both downloadable and globus transferable files + if ((!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty()) + && (!getSelectedNonDownloadableFiles().isEmpty()) || (!getSelectedDownloadableFiles().isEmpty() && !getSelectedGlobusTransferableFiles().isEmpty()))) { setValidateFilesOutcome("Mixed"); return true; } @@ -3302,7 +3314,7 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ } - private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal) { + private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal, boolean isGlobusTransfer) { // Note that the GuestbookResponse object may still have information from // the last download action performed by the user. For example, it may // still have the non-null Datafile in it, if the user has just downloaded @@ -3310,7 +3322,11 @@ private void updateGuestbookResponse (boolean guestbookRequired, boolean downloa // even if that's not what they are trying to do now. // So make sure to reset these values: guestbookResponse.setDataFile(null); - guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString()); + if(isGlobusTransfer) { + guestbookResponse.setSelectedFileIds(getFilesIdsString(getSelectedGlobusTransferableFiles())); + } else { + guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString()); + } if (downloadOriginal) { guestbookResponse.setFileFormat("original"); } else { @@ -3331,6 +3347,7 @@ private boolean filterSelectedFiles(){ setSelectedRestrictedFiles(new ArrayList<>()); setSelectedUnrestrictedFiles(new ArrayList<>()); setSelectedGlobusTransferableFiles(new ArrayList<>()); + setSelectedNonGlobusTransferableFiles(new ArrayList<>()); boolean someFiles = false; boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); @@ -3346,11 +3363,14 @@ private boolean filterSelectedFiles(){ if(downloadable){ getSelectedDownloadableFiles().add(fmd); someFiles=true; - } else if(globusTransferable) { + } else { + getSelectedNonDownloadableFiles().add(fmd); + } + if(globusTransferable) { getSelectedGlobusTransferableFiles().add(fmd); someFiles=true; } else { - getSelectedNonDownloadableFiles().add(fmd); + getSelectedNonGlobusTransferableFiles().add(fmd); } if(fmd.isRestricted()){ getSelectedRestrictedFiles().add(fmd); //might be downloadable to user or not @@ -6318,37 +6338,45 @@ public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } - public void startGlobusTransfer(boolean transferAll) { - if(transferAll) { + public boolean isGlobusTransferRequested() { + return globusTransferRequested; + } + + public void startGlobusTransfer(boolean transferAll, boolean popupShown) { + if (transferAll) { this.setSelectedFiles(workingVersion.getFileMetadatas()); } + boolean guestbookRequired = isDownloadPopupRequired(); + boolean validated = validateFilesForDownload(true); if (validated) { - ApiToken apiToken = null; - User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); - } - if (fileMetadataForAction != null) { - List downloadFMList = new ArrayList(1); - downloadFMList.add(fileMetadataForAction); - PrimeFaces.current() - .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); - } else { - if (getSelectedGlobusTransferableFiles() != null) { - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, - getSelectedGlobusTransferableFiles())); + globusTransferRequested = true; + boolean mixed = "Mixed".equals(getValidateFilesOutcome()); + // transfer is + updateGuestbookResponse(guestbookRequired, true, true); + if ((!guestbookRequired && !mixed) || popupShown) { + ApiToken apiToken = null; + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privUrl.getToken()); + } + if (fileMetadataForAction != null) { + List downloadFMList = new ArrayList(1); + downloadFMList.add(fileMetadataForAction); + PrimeFaces.current() + .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); } else { - // ToDo: For non-public, need the subset that are downloadable by the user - // ToDo: For mixed (some in backing store), need the ones in the globus store - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, - workingVersion.getFileMetadatas())); + if (getSelectedGlobusTransferableFiles() != null) { + PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, + getSelectedGlobusTransferableFiles())); + } } + globusTransferRequested = false; } } } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 13e3a675a27..65dd020f27b 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -64,6 +64,7 @@ manager=Manager curator=Curator explore=Explore download=Download +transfer=Globus Transfer downloadOriginal=Original Format downloadArchival=Archival Format (.tab) deaccession=Deaccession @@ -1391,6 +1392,7 @@ dataset.accessBtn.header.explore=Explore Options dataset.accessBtn.header.configure=Configure Options dataset.accessBtn.header.compute=Compute Options dataset.accessBtn.download.size=ZIP ({0}) +dataset.accessBtn.transfer.size=({0}) dataset.accessBtn.too.big=The dataset is too large to download. Please select the files you need from the files table. dataset.accessBtn.original.too.big=The dataset is too large to download in the original format. Please select the files you need from the files table. dataset.accessBtn.archival.too.big=The dataset is too large to download in the archival format. Please select the files you need from the files table. @@ -1655,8 +1657,10 @@ dataset.inValidSelectedFilesForDownloadWithEmbargo=Embargoed and/or Restricted F dataset.noValidSelectedFilesForDownload=The selected file(s) may not be downloaded because you have not been granted access. dataset.mixedSelectedFilesForDownload=The restricted file(s) selected may not be downloaded because you have not been granted access. dataset.mixedSelectedFilesForDownloadWithEmbargo=The embargoed and/or restricted file(s) selected may not be downloaded because you have not been granted access. - +dataset.mixedSelectedFilesForTransfer=Some file(s) cannot be transferred. (They are restricted, embargoed, or not Globus accessible.) +dataset.inValidSelectedFilesForTransfer=Ineligible Files Selected dataset.downloadUnrestricted=Click Continue to download the files you have access to download. +dataset.transferUnrestricted=Click Continue to transfer the elligible files. dataset.requestAccessToRestrictedFiles=You may request access to the restricted file(s) by clicking the Request Access button. dataset.requestAccessToRestrictedFilesWithEmbargo=Embargoed files cannot be accessed during the embargo period. If your selection contains restricted files, you may request access to them by clicking the Request Access button. diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 0b8983a7770..e50e68ec162 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -178,7 +178,7 @@
  • + oncomplete="showPopup(false);"> #{bundle.download} @@ -192,7 +192,7 @@
  • #{bundle.downloadOriginal} @@ -208,7 +208,7 @@
  • - #{bundle.downloadArchival} @@ -230,9 +230,14 @@
  • - - - + + #{bundle.transfer} + + + + +
  • @@ -1095,6 +1100,28 @@ + +

    #{bundle['dataset.mixedSelectedFilesForTransfer']}

    + + + + + + +
    #{resFile.label}
    +
    +

    #{bundle['dataset.transferUnrestricted']}

    + + + +
    +

    #{bundle['file.deleteDialog.tip']}

    @@ -1545,6 +1572,7 @@ + @@ -1911,10 +1939,14 @@ $('button[id$="updateOwnerDataverse"]').trigger('click'); } - function showPopup() { + function showPopup(isTransfer) { var outcome = document.getElementById("datasetForm:validateFilesOutcome").value; if (outcome ==='Mixed'){ - PF('downloadMixed').show(); + if(isTransfer) { + PF('globusTransferMixed').show(); + } else { + PF('downloadMixed').show(); + } } if (outcome ==='FailEmpty'){ PF('selectFilesForDownload').show(); diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index 3d28e3170f7..58899ab7062 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -442,7 +442,7 @@ disabled="#{false and DatasetPage.lockedFromDownload}" onclick="if (!testFilesSelected()) return false;" action="#{DatasetPage.startDownloadSelectedOriginal()}" - update="@form" oncomplete="showPopup();"> + update="@form" oncomplete="showPopup(false);"> #{bundle.download} @@ -459,7 +459,7 @@
  • @@ -470,7 +470,7 @@
  • @@ -481,9 +481,9 @@
  • + actionListener="#{DatasetPage.startGlobusTransfer(false, false)}"> #{bundle['file.globus.transfer']} From 43105d31ae3d5357e450da3a98cac6886e18a1d3 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 2 Dec 2023 13:14:28 -0500 Subject: [PATCH 305/546] refactor, handle guestbook at download case --- .../edu/harvard/iq/dataverse/DatasetPage.java | 29 ++----- .../iq/dataverse/FileDownloadHelper.java | 36 ++++---- .../iq/dataverse/GuestbookResponse.java | 2 +- .../dataverse/api/DownloadInstanceWriter.java | 6 +- .../dataverse/globus/GlobusServiceBean.java | 86 ++++++++++++++++--- .../guestbook-terms-popup-fragment.xhtml | 13 ++- 6 files changed, 115 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 47a32987b0b..830e146fa07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3321,7 +3321,11 @@ private void updateGuestbookResponse (boolean guestbookRequired, boolean downloa // a single file; or it may still have the format set to "original" - // even if that's not what they are trying to do now. // So make sure to reset these values: - guestbookResponse.setDataFile(null); + if(fileMetadataForAction == null) { + guestbookResponse.setDataFile(null); + } else { + guestbookResponse.setDataFile(fileMetadataForAction.getDataFile()); + } if(isGlobusTransfer) { guestbookResponse.setSelectedFileIds(getFilesIdsString(getSelectedGlobusTransferableFiles())); } else { @@ -6355,27 +6359,8 @@ public void startGlobusTransfer(boolean transferAll, boolean popupShown) { // transfer is updateGuestbookResponse(guestbookRequired, true, true); if ((!guestbookRequired && !mixed) || popupShown) { - ApiToken apiToken = null; - User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); - } - if (fileMetadataForAction != null) { - List downloadFMList = new ArrayList(1); - downloadFMList.add(fileMetadataForAction); - PrimeFaces.current() - .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList)); - } else { - if (getSelectedGlobusTransferableFiles() != null) { - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, - getSelectedGlobusTransferableFiles())); - } - } + boolean doNotSaveGuestbookResponse = workingVersion.isDraft(); + globusService.writeGuestbookAndStartTransfer(guestbookResponse, doNotSaveGuestbookResponse); globusTransferRequested = false; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java index a6ae7223d9d..4d8100124ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java @@ -9,6 +9,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.externaltools.ExternalTool; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; @@ -53,6 +54,9 @@ public class FileDownloadHelper implements java.io.Serializable { @EJB DataFileServiceBean datafileService; + + @EJB + GlobusServiceBean globusService; private final Map fileDownloadPermissionMap = new HashMap<>(); // { FileMetadata.id : Boolean } @@ -60,32 +64,32 @@ public FileDownloadHelper() { this.filesForRequestAccess = new ArrayList<>(); } - // See also @Size(max = 255) in GuestbookResponse - private boolean testResponseLength(String value) { - return !(value != null && value.length() > 255); - } - // This helper method is called from the Download terms/guestbook/etc. popup, // when the user clicks the "ok" button. We use it, instead of calling // downloadServiceBean directly, in order to differentiate between single // file downloads and multiple (batch) downloads - since both use the same // terms/etc. popup. - public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse) { + public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse, boolean isGlobusTransfer) { PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); // Note that this method is only ever called from the file-download-popup - // meaning we know for the fact that we DO want to save this // guestbookResponse permanently in the database. - if (guestbookResponse.getSelectedFileIds() != null) { - // this is a batch (multiple file) download. - // Although here's a chance that this is not really a batch download - i.e., - // there may only be one file on the file list. But the fileDownloadService - // method below will check for that, and will redirect to the single download, if - // that's the case. -- L.A. - fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse); - } else if (guestbookResponse.getDataFile() != null) { - // this a single file download: - fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse); + if(isGlobusTransfer) { + globusService.writeGuestbookAndStartTransfer(guestbookResponse, true); + } else { + if (guestbookResponse.getSelectedFileIds() != null) { + // this is a batch (multiple file) download. + // Although here's a chance that this is not really a batch download - i.e., + // there may only be one file on the file list. But the fileDownloadService + // method below will check for that, and will redirect to the single download, + // if + // that's the case. -- L.A. + fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse); + } else if (guestbookResponse.getDataFile() != null) { + // this a single file download: + fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse); + } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index 976f1e084ac..9041ccf887c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -99,7 +99,7 @@ public class GuestbookResponse implements Serializable { */ public static final String ACCESS_REQUEST = "AccessRequest"; - static final String DOWNLOAD = "Download"; + public static final String DOWNLOAD = "Download"; static final String SUBSET = "Subset"; static final String EXPLORE = "Explore"; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index cc064976982..bcb8799ec9e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -213,9 +213,9 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] if (di.getConversionParam().equals("format")) { if ("GlobusTransfer".equals(di.getConversionParamValue())) { - List downloadFMList = new ArrayList(1); - downloadFMList.add(dataFile.getFileMetadata()); - redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadFMList); + List downloadDFList = new ArrayList(1); + downloadDFList.add(dataFile); + redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadDFList); } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index d8742fc90d5..0c991424ce9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -50,15 +50,19 @@ import java.util.stream.IntStream; import org.apache.commons.codec.binary.StringUtils; +import org.primefaces.PrimeFaces; import com.google.gson.Gson; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -73,21 +77,22 @@ public class GlobusServiceBean implements java.io.Serializable { @EJB protected DatasetServiceBean datasetSvc; - @EJB protected SettingsServiceBean settingsSvc; - @Inject DataverseSession session; - @EJB protected AuthenticationServiceBean authSvc; - @EJB EjbDataverseEngine commandEngine; - @EJB UserNotificationServiceBean userNotificationService; + @EJB + PrivateUrlServiceBean privateUrlService; + @EJB + FileDownloadServiceBean fileDownloadService; + @EJB + DataFileServiceBean dataFileService; private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @@ -600,7 +605,7 @@ public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); } - public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List fileMetadataList) { + public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List dataFiles) { String localeCode = session.getLocaleCode(); ApiToken apiToken = null; User user = session.getUser(); @@ -629,10 +634,6 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List downloadDFList = new ArrayList(1); + downloadDFList.add(df); + if (!doNotSaveGuestbookResponse) { + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + } + PrimeFaces.current() + .executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList)); + } else { + //Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload + List list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(","))); + List selectedFiles = new ArrayList(); + for (String idAsString : list) { + try { + Long fileId = Long.parseLong(idAsString); + // If we need to create a GuestBookResponse record, we have to + // look up the DataFile object for this file: + if (!doNotSaveGuestbookResponse) { + df = dataFileService.findCheapAndEasy(fileId); + guestbookResponse.setDataFile(df); + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + selectedFiles.add(df); + } + } catch (NumberFormatException nfe) { + logger.warning("A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + idAsString); + return; + } + + } + if (!selectedFiles.isEmpty()) { + //Use dataset from one file - files should all be from the same dataset + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, + selectedFiles)); + } + } + } } diff --git a/src/main/webapp/guestbook-terms-popup-fragment.xhtml b/src/main/webapp/guestbook-terms-popup-fragment.xhtml index 34df0c79390..5948047d845 100644 --- a/src/main/webapp/guestbook-terms-popup-fragment.xhtml +++ b/src/main/webapp/guestbook-terms-popup-fragment.xhtml @@ -274,8 +274,17 @@ + + + + From a76158f5903ec73a78b284de90d6491a7e05bfce Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 2 Dec 2023 13:35:33 -0500 Subject: [PATCH 306/546] suppress download entry when not accessible, refactor --- .../edu/harvard/iq/dataverse/DatasetPage.java | 2 +- .../harvard/iq/dataverse/SettingsWrapper.java | 22 +++ .../file-download-button-fragment.xhtml | 6 +- .../dataaccess/GlobusOverlayAccessIOTest.java | 176 ++++++++++++++++++ 4 files changed, 202 insertions(+), 4 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 830e146fa07..704c1d42228 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -3354,7 +3354,7 @@ private boolean filterSelectedFiles(){ setSelectedNonGlobusTransferableFiles(new ArrayList<>()); boolean someFiles = false; - boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); + boolean globusDownloadEnabled = settingsWrapper.isGlobusDownload(); for (FileMetadata fmd : this.selectedFiles){ boolean downloadable=this.fileDownloadHelper.canDownloadFile(fmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 8b7f732d03f..8ab1e87aef2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -6,6 +6,8 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; @@ -337,6 +339,26 @@ public boolean isGlobusEnabledStorageDriver(String driverId) { return (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId)); } + public boolean isDownloadable(FileMetadata fmd) { + boolean downloadable=true; + if(isGlobusFileDownload()) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + + downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId); + } + return downloadable; + } + + public boolean isGlobusTransferable(FileMetadata fmd) { + boolean globusTransferable=true; + if(isGlobusFileDownload()) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId); + } + return globusTransferable; + } + + public String getGlobusAppUrl() { if (globusAppUrl == null) { globusAppUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost"); diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 318aab1454e..9c29fd777a1 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -60,7 +60,7 @@ -
  • +
  • gsio = new GlobusOverlayAccessIO(datafile, null, "globus"); + System.out.println("Size2 is " + gsio.retrieveSizeFromMedia()); + + System.out.println( + "NotValid: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//../of/the/hill")); + System.out.println( + "ValidRemote: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//of/the/hill")); + System.setProperty("dataverse.files.globus.managed", "true"); + datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath); + System.out.println("ValidLocal: " + + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42")); + + // We can read the storageIdentifier and get the driver + assertTrue(datafile.getStorageIdentifier() + .startsWith(DataAccess.getStorageDriverFromIdentifier(datafile.getStorageIdentifier()))); + // We can get the driver type from it's ID + assertTrue(DataAccess.getDriverType("globus").equals(System.getProperty("dataverse.files.globus.type"))); + // When we get a StorageIO for the file, it is the right type + StorageIO storageIO = DataAccess.getStorageIO(localDatafile); + assertTrue(storageIO instanceof GlobusOverlayAccessIO); + // When we use it, we can get properties like the remote store name + GlobusOverlayAccessIO globusIO = (GlobusOverlayAccessIO) storageIO; + assertTrue( + globusIO.getRemoteStoreName().equals(System.getProperty("dataverse.files.globus.remote-store-name"))); + + String location = globusIO.getStorageLocation(); + assertEquals("globus:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + baseStoreId, location); +/* + // TBD: + // And can get a temporary download URL for the main file + String signedURL = globusIO.generateTemporaryDownloadUrl(null, null, null); + System.out.println(signedURL); + // And the URL starts with the right stuff + assertTrue(signedURL.startsWith(System.getProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH) + "/" + logoPath)); + // And the signature is valid + // assertTrue( + // UrlSignerUtil.isValidUrl(signedURL, null, null, + // System.getProperty("dataverse.files.globus.secret-key"))); + // And we get an unsigned URL with the right stuff with no key + System.clearProperty("dataverse.files.globus.secret-key"); + String unsignedURL = globusIO.generateTemporaryDownloadUrl(null, null, null); + assertTrue(unsignedURL.equals(System.getProperty("dataverse.files.globus.base-url") + "/" + logoPath)); +*/ + // Once we've opened, we can get the file size (only works if the call to Globus + // works) + globusIO.open(DataAccessOption.READ_ACCESS); + assertTrue(globusIO.getSize() > 0); + // If we ask for the path for an aux file, it is correct + System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, + identifier, baseStoreId + ".auxobject").toString()); + System.out.println(globusIO.getAuxObjectAsPath("auxobject").toString()); + assertTrue(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, identifier, + baseStoreId + ".auxobject").equals(globusIO.getAuxObjectAsPath("auxobject"))); + IOException thrown = assertThrows(IOException.class, () -> DataAccess.getStorageIO(localDatafile), + "Expected getStorageIO() to throw, but it didn't"); + // 'test' is the driverId in the IOException messages + assertTrue(thrown.getMessage().contains("globus")); + + } + + @Test + void testRemoteOverlayIdentifierFormats() throws IOException { + System.clearProperty("dataverse.files.globus.managed"); + datafile.setStorageIdentifier( + "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath); + assertTrue(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier())); + assertFalse( + DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier().replace("globus", "bad"))); + assertFalse(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier())); + System.setProperty("dataverse.files.globus.managed", "true"); + assertTrue(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier())); + + } + +} From 93a586727a3c00069699eb47e5ca5ca3ebbf91cf Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sat, 2 Dec 2023 17:58:45 -0500 Subject: [PATCH 307/546] remove old testing code --- .../dataaccess/GlobusOverlayAccessIO.java | 46 ----- .../dataaccess/GlobusOverlayAccessIOTest.java | 176 ------------------ 2 files changed, 222 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 3e72fa85d35..e825af8cf30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -408,52 +408,6 @@ private static String[] getAllowedEndpoints(String driverId) throws IOException } - public static void main(String[] args) { - System.out.println("Running the main method"); - if (args.length > 0) { - System.out.printf("List of arguments: {}", Arrays.toString(args)); - } - System.setProperty("dataverse.files.globus.base-url", "globus://d8c42580-6528-4605-9ad8-116a61982644"); - System.out.println("NotValid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill")); - System.out.println("ValidRemote: " + isValidIdentifier("globus", "globus://localid//of/the/hill")); - System.setProperty("dataverse.files.globus.managed", "true"); - - System.out.println("ValidLocal: " + isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42")); - System.setProperty("dataverse.files.globus.globus-token", - ""); - System.setProperty("dataverse.files.globus.base-store", "file"); - System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - System.setProperty("dataverse.files.file.directory", "/tmp/files"); - // logger.info(JvmSettings.BASE_URL.lookup("globus")); - // logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus")); - - try { - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO( - "globus://1234///hdc1/image001.mrc", "globus"); - logger.info("Size is " + gsio.retrieveSizeFromMedia()); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - try { - DataFile df = new DataFile(); - Dataset ds = new Dataset(); - ds.setAuthority("10.5072"); - ds.setIdentifier("FK21234"); - df.setOwner(ds); - df.setStorageIdentifier("globus://1234///hdc1/image001.mrc"); - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO(df, null, "globus"); - logger.info("Size2 is " + gsio.retrieveSizeFromMedia()); - - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - } - - @Override public void open(DataAccessOption... option) throws IOException { // TODO Auto-generated method stub diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index bf3bcdbfe8e..e69de29bb2d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -1,176 +0,0 @@ -/* - * Copyright 2018 Forschungszentrum Jülich GmbH - * SPDX-License-Identifier: Apache 2.0 - */ -package edu.harvard.iq.dataverse.dataaccess; - -import edu.harvard.iq.dataverse.DOIServiceBean; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.mocks.MocksFactory; -import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.util.UrlSignerUtil; - -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import static org.junit.jupiter.api.Assertions.*; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.mockito.junit.jupiter.MockitoSettings; -import org.mockito.quality.Strictness; -import java.io.IOException; -import java.nio.file.Paths; - -@ExtendWith(MockitoExtension.class) -@MockitoSettings(strictness = Strictness.STRICT_STUBS) -public class GlobusOverlayAccessIOTest { - - @Mock - - private Dataset dataset; - private DataFile datafile; - private DataFile localDatafile; - private String baseStoreId = "182ad2bda2f-c3508e719076"; - private String logoPath = "image002.mrc"; - private String authority = "10.5072"; - private String identifier = "F2ABCDEF"; - - @BeforeEach - public void setUp() { - System.setProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH, - "d8c42580-6528-4605-9ad8-116a61982644/hdc1"); - System.setProperty("dataverse.files.globus." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, - "d8c42580-6528-4605-9ad8-116a61982644/hdc1"); - - System.setProperty("dataverse.files.globus.globus-token", - "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9"); - System.setProperty("dataverse.files.globus.remote-store-name", "GlobusEndpoint1"); - System.setProperty("dataverse.files.globus.type", "globus"); - - System.setProperty("dataverse.files.globus.managed", "true"); - - System.setProperty("dataverse.files.globus.base-store", "file"); - System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - System.setProperty("dataverse.files.file.directory", "/tmp/files"); - - // System.setProperty("dataverse.files.test.type", "remote"); - System.setProperty("dataverse.files.globus.label", "globusTest"); - System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org/resources"); - System.setProperty("dataverse.files.test.base-store", "file"); - System.setProperty("dataverse.files.test.download-redirect", "true"); - System.setProperty("dataverse.files.test.remote-store-name", "DemoDataCorp"); - System.setProperty("dataverse.files.globus.secret-key", "12345"); // Real keys should be much longer, more - // random - System.setProperty("dataverse.files.file.type", "file"); - System.setProperty("dataverse.files.file.label", "default"); - datafile = MocksFactory.makeDataFile(); - dataset = MocksFactory.makeDataset(); - dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", - DOIServiceBean.DOI_RESOLVER_URL, null)); - datafile.setOwner(dataset); - datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath); - - localDatafile = MocksFactory.makeDataFile(); - localDatafile.setOwner(dataset); - localDatafile.setStorageIdentifier("globus://" + baseStoreId); - } - - @AfterEach - public void tearDown() { - System.clearProperty("dataverse.files.test.type"); - System.clearProperty("dataverse.files.test.label"); - System.clearProperty("dataverse.files.test.base-url"); - System.clearProperty("dataverse.files.test.base-store"); - System.clearProperty("dataverse.files.test.download-redirect"); - System.clearProperty("dataverse.files.test.label"); - System.clearProperty("dataverse.files.test.remote-store-name"); - System.clearProperty("dataverse.files.test.secret-key"); - System.clearProperty("dataverse.files.file.type"); - System.clearProperty("dataverse.files.file.label"); - } - - @Test - void testGlobusOverlayFiles() throws IOException { - System.clearProperty("dataverse.files.globus.managed"); - datafile.setStorageIdentifier( - "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath); - GlobusOverlayAccessIO gsio = new GlobusOverlayAccessIO(datafile, null, "globus"); - System.out.println("Size2 is " + gsio.retrieveSizeFromMedia()); - - System.out.println( - "NotValid: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//../of/the/hill")); - System.out.println( - "ValidRemote: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//of/the/hill")); - System.setProperty("dataverse.files.globus.managed", "true"); - datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath); - System.out.println("ValidLocal: " - + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42")); - - // We can read the storageIdentifier and get the driver - assertTrue(datafile.getStorageIdentifier() - .startsWith(DataAccess.getStorageDriverFromIdentifier(datafile.getStorageIdentifier()))); - // We can get the driver type from it's ID - assertTrue(DataAccess.getDriverType("globus").equals(System.getProperty("dataverse.files.globus.type"))); - // When we get a StorageIO for the file, it is the right type - StorageIO storageIO = DataAccess.getStorageIO(localDatafile); - assertTrue(storageIO instanceof GlobusOverlayAccessIO); - // When we use it, we can get properties like the remote store name - GlobusOverlayAccessIO globusIO = (GlobusOverlayAccessIO) storageIO; - assertTrue( - globusIO.getRemoteStoreName().equals(System.getProperty("dataverse.files.globus.remote-store-name"))); - - String location = globusIO.getStorageLocation(); - assertEquals("globus:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + baseStoreId, location); -/* - // TBD: - // And can get a temporary download URL for the main file - String signedURL = globusIO.generateTemporaryDownloadUrl(null, null, null); - System.out.println(signedURL); - // And the URL starts with the right stuff - assertTrue(signedURL.startsWith(System.getProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH) + "/" + logoPath)); - // And the signature is valid - // assertTrue( - // UrlSignerUtil.isValidUrl(signedURL, null, null, - // System.getProperty("dataverse.files.globus.secret-key"))); - // And we get an unsigned URL with the right stuff with no key - System.clearProperty("dataverse.files.globus.secret-key"); - String unsignedURL = globusIO.generateTemporaryDownloadUrl(null, null, null); - assertTrue(unsignedURL.equals(System.getProperty("dataverse.files.globus.base-url") + "/" + logoPath)); -*/ - // Once we've opened, we can get the file size (only works if the call to Globus - // works) - globusIO.open(DataAccessOption.READ_ACCESS); - assertTrue(globusIO.getSize() > 0); - // If we ask for the path for an aux file, it is correct - System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, - identifier, baseStoreId + ".auxobject").toString()); - System.out.println(globusIO.getAuxObjectAsPath("auxobject").toString()); - assertTrue(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, identifier, - baseStoreId + ".auxobject").equals(globusIO.getAuxObjectAsPath("auxobject"))); - IOException thrown = assertThrows(IOException.class, () -> DataAccess.getStorageIO(localDatafile), - "Expected getStorageIO() to throw, but it didn't"); - // 'test' is the driverId in the IOException messages - assertTrue(thrown.getMessage().contains("globus")); - - } - - @Test - void testRemoteOverlayIdentifierFormats() throws IOException { - System.clearProperty("dataverse.files.globus.managed"); - datafile.setStorageIdentifier( - "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath); - assertTrue(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier())); - assertFalse( - DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier().replace("globus", "bad"))); - assertFalse(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier())); - System.setProperty("dataverse.files.globus.managed", "true"); - assertTrue(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier())); - - } - -} From 1a96c566bccdf32aefeaca89898a3746b146fa08 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 3 Dec 2023 18:57:59 -0500 Subject: [PATCH 308/546] the kill switch for the real-time storageuse updates (just in case) and some related documentation (#8549) --- .../source/admin/collectionquotas.rst | 2 + .../source/installation/config.rst | 5 ++ .../dataverse/ingest/IngestServiceBean.java | 2 +- .../iq/dataverse/settings/JvmSettings.java | 3 + .../storageuse/StorageUseServiceBean.java | 58 ++++++------------- 5 files changed, 30 insertions(+), 40 deletions(-) diff --git a/doc/sphinx-guides/source/admin/collectionquotas.rst b/doc/sphinx-guides/source/admin/collectionquotas.rst index 883b6cf0c93..2ce3132e2ba 100644 --- a/doc/sphinx-guides/source/admin/collectionquotas.rst +++ b/doc/sphinx-guides/source/admin/collectionquotas.rst @@ -1,3 +1,4 @@ + Storage Quotas for Collections ============================== @@ -15,3 +16,4 @@ Please note that only the sizes of the main datafiles and the archival tab-delim When quotas are set and enforced, the users will be informed of the remaining storage allocation on the file upload page together with other upload and processing limits. +Part of the new and experimental nature of this feature is that we don't know for the fact yet how well it will function in real life on a very busy production system, despite our best efforts to test it prior to the release. One specific issue is having to update the recorded storage use for every parent collection of the given dataset whenever new files are added. This includes updating the combined size of the root, top collection - which will need to be updated after *every* file upload. In an unlikely case that this will start causing problems with race conditions and database update conflicts, it is possible to disable these updates (and thus disable the storage quotas feature), by setting the :ref:`dataverse.storageuse.disable-storageuse-increments` JVM setting to true. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 52ba35376ac..03eeff9dbb6 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2489,6 +2489,11 @@ This setting was added to keep S3 direct upload lightweight. When that feature i See also :ref:`s3-direct-upload-features-disabled`. +dataverse.storageuse.disable-storageuse-increments +++++++++++++++++++++++++++++++++++++++++++++++++++ + +This setting serves the role of an emergency "kill switch" that will disable maintaining the real time record of storage use for all the datasets and collections in the database. Because of the experimental nature of this feature (see :doc:`/admin/collectionquotas`) that hasn't been used in production setting as of this release, v6.1 this setting is provided in case these updates start causing database race conditions and conflicts on a busy server. + dataverse.auth.oidc.* +++++++++++++++++++++ diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 5efb4c06f48..233f746fb17 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -206,7 +206,7 @@ public List saveAndAddFilesToDataset(DatasetVersion version, boolean unattached = false; boolean savedSuccess = false; if (dataFile.getOwner() == null) { - // is it ever "unattached"? + // is it ever "attached"? // do we ever call this method with dataFile.getOwner() != null? // - we really shouldn't be, either. unattached = true; diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index cc3272413c7..7c65bba77d5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -150,6 +150,9 @@ public enum JvmSettings { SCOPE_NETCDF(PREFIX, "netcdf"), GEO_EXTRACT_S3_DIRECT_UPLOAD(SCOPE_NETCDF, "geo-extract-s3-direct-upload"), + // STORAGE USE SETTINGS + SCOPE_STORAGEUSE(PREFIX, "storageuse"), + STORAGEUSE_DISABLE_UPDATES(SCOPE_STORAGEUSE, "disable-storageuse-increments"), ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java index b542a7cd661..18e4ef49640 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -1,12 +1,14 @@ package edu.harvard.iq.dataverse.storageuse; import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.settings.JvmSettings; import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; import jakarta.ejb.TransactionAttributeType; import jakarta.inject.Named; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; +import java.util.Optional; import java.util.logging.Logger; /** @@ -37,31 +39,6 @@ public Long findStorageSizeByDvContainerId(Long dvObjectId) { return res == null ? 0L : res; } - public void incrementStorageSizeHierarchy(DvObjectContainer dvObject, Long filesize) { - incrementStorageSize(dvObject, filesize); - DvObjectContainer parent = dvObject.getOwner(); - while (parent != null) { - incrementStorageSize(parent, filesize); - parent = parent.getOwner(); - } - } - - /** - * @param dvObject - * @param filesize - */ - public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) { - StorageUse dvContainerSU = findByDvContainerId(dvObject.getId()); - if (dvContainerSU != null) { - // @todo: named query - dvContainerSU.incrementSizeInBytes(filesize); - em.merge(dvContainerSU); - } else { - dvContainerSU = new StorageUse(dvObject, filesize); - em.persist(dvContainerSU); - } - } - /** * Increments the recorded storage size for all the dvobject parents of a * datafile, recursively. @@ -71,20 +48,23 @@ public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) { @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void incrementStorageSizeRecursively(Long dvObjectContainerId, Long increment) { //@todo should throw exceptions if either parameter is null - String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" - + "(" - + " SELECT id, owner_id\n" - + " FROM dvobject\n" - + " WHERE id=" + dvObjectContainerId + "\n" - + " UNION ALL\n" - + " SELECT dvobject.id, dvobject.owner_id\n" - + " FROM dvobject\n" - + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" - + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" - + "FROM uptree\n" - + "WHERE dvobjectcontainer_id = uptree.id;"; - - int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + Optional allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class); + if (!(allow.isPresent() && allow.get())) { + String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" + + "(" + + " SELECT id, owner_id\n" + + " FROM dvobject\n" + + " WHERE id=" + dvObjectContainerId + "\n" + + " UNION ALL\n" + + " SELECT dvobject.id, dvobject.owner_id\n" + + " FROM dvobject\n" + + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" + + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" + + "FROM uptree\n" + + "WHERE dvobjectcontainer_id = uptree.id;"; + + int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + } // @todo throw an exception if the number of parent dvobjects updated by // the query is < 2 - ? } From 0a536da0c42ed9654641985f1fd8dc20b461c16c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 4 Dec 2023 09:46:59 -0500 Subject: [PATCH 309/546] a missing ref in the doc. #8549 --- doc/sphinx-guides/source/installation/config.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 03eeff9dbb6..7cb321708a7 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2489,6 +2489,8 @@ This setting was added to keep S3 direct upload lightweight. When that feature i See also :ref:`s3-direct-upload-features-disabled`. +.. _dataverse.storageuse.disable-storageuse-increments: + dataverse.storageuse.disable-storageuse-increments ++++++++++++++++++++++++++++++++++++++++++++++++++ From b20f198368615d7d8c4e798a25d6f68a6d0c4ed9 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 4 Dec 2023 11:27:27 -0500 Subject: [PATCH 310/546] Bump version to 6.1 --- doc/sphinx-guides/source/conf.py | 4 ++-- doc/sphinx-guides/source/versions.rst | 3 ++- modules/dataverse-parent/pom.xml | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index 0660ec3b071..64efc359e9a 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -66,9 +66,9 @@ # built documents. # # The short X.Y version. -version = '6.0' +version = '6.1' # The full version, including alpha/beta/rc tags. -release = '6.0' +release = '6.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index 2000a2097f0..2cf7f46dc5e 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -7,7 +7,8 @@ Dataverse Software Documentation Versions This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. - pre-release `HTML (not final!) `__ and `PDF (experimental!) `__ built from the :doc:`develop ` branch :doc:`(how to contribute!) ` -- 6.0 +- 6.1 +- `6.0 `__ - `5.14 `__ - `5.13 `__ - `5.12.1 `__ diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index db0fa46a952..7b305cad581 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -131,7 +131,7 @@ - 6.0 + 6.1 17 UTF-8 From 5f29144762c166c7856958497e24f629d53c92a0 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 4 Dec 2023 12:58:01 -0500 Subject: [PATCH 311/546] adding 6.1 release notes and removing .md files --- ...001-datasets-files-api-user-permissions.md | 13 -- doc/release-notes/10060-api-changelog.md | 3 - .../10093-signedUrl_improvements.md | 5 - .../10104-dataset-citation-deaccessioned.md | 1 - doc/release-notes/6.1-release-notes.md | 195 ++++++++++++++++++ .../9268-8349-oidc-improvements.md | 43 ---- doc/release-notes/9412-markdown-previewer.md | 1 - doc/release-notes/9428-alternative-title.md | 9 - doc/release-notes/9589-ds-configure-tool.md | 1 - doc/release-notes/9590-intellij-redeploy.md | 3 - .../9599-guestbook-at-request.md | 2 - doc/release-notes/9635-solr-improvements.md | 4 - doc/release-notes/9692-files-api-extension.md | 7 - .../9714-files-api-extension-filters.md | 14 -- .../9763-versions-api-improvements.md | 8 - .../9785-files-api-extension-search-text.md | 3 - .../9834-files-api-extension-counts.md | 6 - ...oad-extension-new-file-access-endpoints.md | 14 -- .../9852-files-api-extension-deaccession.md | 12 -- .../9880-info-api-zip-limit-embargo.md | 5 - .../9907-files-api-counts-with-criteria.md | 11 - doc/release-notes/9955-Signposting-updates.md | 7 - ...et-api-downloadsize-ignore-tabular-size.md | 9 - .../9972-files-api-filter-by-tabular-tags.md | 3 - ...with-criteria-and-deaccessioned-support.md | 12 -- 25 files changed, 195 insertions(+), 196 deletions(-) delete mode 100644 doc/release-notes/10001-datasets-files-api-user-permissions.md delete mode 100644 doc/release-notes/10060-api-changelog.md delete mode 100644 doc/release-notes/10093-signedUrl_improvements.md delete mode 100644 doc/release-notes/10104-dataset-citation-deaccessioned.md create mode 100644 doc/release-notes/6.1-release-notes.md delete mode 100644 doc/release-notes/9268-8349-oidc-improvements.md delete mode 100644 doc/release-notes/9412-markdown-previewer.md delete mode 100644 doc/release-notes/9428-alternative-title.md delete mode 100644 doc/release-notes/9589-ds-configure-tool.md delete mode 100644 doc/release-notes/9590-intellij-redeploy.md delete mode 100644 doc/release-notes/9599-guestbook-at-request.md delete mode 100644 doc/release-notes/9635-solr-improvements.md delete mode 100644 doc/release-notes/9692-files-api-extension.md delete mode 100644 doc/release-notes/9714-files-api-extension-filters.md delete mode 100644 doc/release-notes/9763-versions-api-improvements.md delete mode 100644 doc/release-notes/9785-files-api-extension-search-text.md delete mode 100644 doc/release-notes/9834-files-api-extension-counts.md delete mode 100644 doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md delete mode 100644 doc/release-notes/9852-files-api-extension-deaccession.md delete mode 100644 doc/release-notes/9880-info-api-zip-limit-embargo.md delete mode 100644 doc/release-notes/9907-files-api-counts-with-criteria.md delete mode 100644 doc/release-notes/9955-Signposting-updates.md delete mode 100644 doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md delete mode 100644 doc/release-notes/9972-files-api-filter-by-tabular-tags.md delete mode 100644 doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md diff --git a/doc/release-notes/10001-datasets-files-api-user-permissions.md b/doc/release-notes/10001-datasets-files-api-user-permissions.md deleted file mode 100644 index 0aa75f9218a..00000000000 --- a/doc/release-notes/10001-datasets-files-api-user-permissions.md +++ /dev/null @@ -1,13 +0,0 @@ -- New query parameter `includeDeaccessioned` added to the getVersion endpoint (/api/datasets/{id}/versions/{versionId}) to consider deaccessioned versions when searching for versions. - - -- New endpoint to get user permissions on a dataset (/api/datasets/{id}/userPermissions). In particular, the user permissions that this API call checks, returned as booleans, are the following: - - - Can view the unpublished dataset - - Can edit the dataset - - Can publish the dataset - - Can manage the dataset permissions - - Can delete the dataset draft - - -- New permission check "canManageFilePermissions" added to the existing endpoint for getting user permissions on a file (/api/access/datafile/{id}/userPermissions). \ No newline at end of file diff --git a/doc/release-notes/10060-api-changelog.md b/doc/release-notes/10060-api-changelog.md deleted file mode 100644 index 56ac96e3564..00000000000 --- a/doc/release-notes/10060-api-changelog.md +++ /dev/null @@ -1,3 +0,0 @@ -We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html - -See also #10060. diff --git a/doc/release-notes/10093-signedUrl_improvements.md b/doc/release-notes/10093-signedUrl_improvements.md deleted file mode 100644 index 26a17c65e3f..00000000000 --- a/doc/release-notes/10093-signedUrl_improvements.md +++ /dev/null @@ -1,5 +0,0 @@ -A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - -SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - -Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. diff --git a/doc/release-notes/10104-dataset-citation-deaccessioned.md b/doc/release-notes/10104-dataset-citation-deaccessioned.md deleted file mode 100644 index 0ba06d729c4..00000000000 --- a/doc/release-notes/10104-dataset-citation-deaccessioned.md +++ /dev/null @@ -1 +0,0 @@ -The getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md new file mode 100644 index 00000000000..c2b52ab34b8 --- /dev/null +++ b/doc/release-notes/6.1-release-notes.md @@ -0,0 +1,195 @@ +# Dataverse 6.1 + +(If this note appears truncated on the GitHub Releases page, you can view it in full in the source tree: https://github.com/IQSS/dataverse/blob/master/doc/release-notes/6.1-release-notes.md) + +This release brings new features, enhancements, and bug fixes to the Dataverse software. +Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +## Release Highlights (Major Upgrades, Breaking Changes) + +This release contains major upgrades to core components. Detailed upgrade instructions can be found below. + +## Detailed Release Highlights, New Features and Use Case Scenarios + +### Dataverse installation can be now be configured to allow out-of-band upload +- Installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. +By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. + +### Alternative Title is made repeatable. +- One will need to update database with updated citation block. + `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` +- One will also need to update solr schema: + Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` + Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` + +Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. +For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] + +### Improvements in the /versions API +- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions +- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output +- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. + +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. +### The following API endpoints have been added: + +- /api/files/{id}/downloadCount +- /api/files/{id}/dataTables +- /api/files/{id}/metadata/tabularTags New endpoint to set tabular file tags. +- canManageFilePermissions (/access/datafile/{id}/userPermissions) Added for getting user permissions on a file. +- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Given a dataset and its version, retrieves file counts based on different criteria (Total count, per content type, per access status and per category name). +- setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created. +- userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. +- hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. +- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). +- getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. +- getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. + +### Extended the existing endpoints: +- getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. +- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain file counts. Added support for filtering by optional criteria query parameter: + - contentType + - accessStatus + - categoryName + - tabularTagName + - searchText +- getDownloadSize ("api/datasets/{identifier}/versions/{versionId}/downloadsize"): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. Added a new optional query parameter "mode" +This parameter applies a filter criteria to the operation and supports the following values: + - All (Default): Includes both archival and original sizes for tabular files + - Archival: Includes only the archival size for tabular files + - Original: Includes only the original size for tabular files. +- /api/datasets/{id}/versions/{versionId} New query parameter `includeDeaccessioned` added to consider deaccessioned versions when searching for versions. +- /api/datasets/{id}/userPermissions Get user permissions on a dataset, in particular, the user permissions that this API call checks, returned as booleans, are the following: + - Can view the unpublished dataset + - Can edit the dataset + - Can publish the dataset + - Can manage the dataset permissions + - Can delete the dataset draft +- getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. + + +### DataFile API payload has been extended to include the following fields: +- tabularData: Boolean field to know if the DataFile is of tabular type +- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) +- friendlyType: String + +### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering +- Access status: through the `accessStatus` query parameter, which supports the following values: + - Public + - Restricted + - EmbargoedThenRestricted + - EmbargoedThenPublic +- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. +- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". + + +### Misc +- Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. + +- Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). +The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. + +- Dataverse's OAI_ORE Metadata Export format and archival BagIT exports +(which include the OAI-ORE metadata export file) have been updated to include +information about the dataset version state, e.g. RELEASED or DEACCESSIONED +and to indicate which version of Dataverse was used to create the archival Bag. +As part of the latter, the current OAI_ORE Metadata format has been given a 1.0.0 +version designation and it is expected that any future changes to the OAI_ORE export +format will result in a version change and that tools such as DVUploader that can +recreate datasets from archival Bags will start indicating which version(s) of the +OAI_ORE format they can read. +Dataverse installations that have been using archival Bags may wish to update any +existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse +[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) +to generate updated versions. + +- This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. + - To fix #9952, we surround the license info with `<` and `>`. + - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information + - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. + +- We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html +See also #10060. + +### Solr Improvements +- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. + +Please see the "Installing Solr" section of the Installation Prerequisites guide. + + +### Development +- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. +For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools + +- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews + +- A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. + - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. + - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. + +## OpenID Connect Authentication Provider Improvements + +### Using MicroProfile Config For Provisioning + +With this release it is possible to provision a single OIDC-based authentication provider +by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. + +If you are using an external OIDC provider component as an identity management system and/or broker +to other authentication providers such as Google, eduGain SAML and so on, this might make your +life easier during instance setups and reconfiguration. You no longer need to generate the +necessary JSON file. + +### Adding PKCE Support + +Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable +support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) + +## Improved Testing + +With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests +like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition +of both OIDC login options in UI and API. + +The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. + +The support for setting JVM options during testing has been improved for developers. You now may add the +`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is +also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. + +As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. + +## New Configuration Options + +- dataverse.auth.oidc.enabled +- dataverse.auth.oidc.client-id +- dataverse.auth.oidc.client-secret +- dataverse.auth.oidc.auth-server-url +- dataverse.auth.oidc.pkce.enabled +- dataverse.auth.oidc.pkce.method +- dataverse.auth.oidc.title +- dataverse.auth.oidc.subtitle +- dataverse.auth.oidc.pkce.max-cache-size +- dataverse.auth.oidc.pkce.max-cache-age + +## Installation + +If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! + +Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! + +You are also very welcome to join the [Global Dataverse Community Consortium](https://dataversecommunity.global) (GDCC). + +## Upgrade Instructions + +Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. + +These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. + +## Complete List of Changes + +For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. + +## Getting Help + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. diff --git a/doc/release-notes/9268-8349-oidc-improvements.md b/doc/release-notes/9268-8349-oidc-improvements.md deleted file mode 100644 index ddfc13e603c..00000000000 --- a/doc/release-notes/9268-8349-oidc-improvements.md +++ /dev/null @@ -1,43 +0,0 @@ -## OpenID Connect Authentication Provider Improvements - -### Using MicroProfile Config For Provisioning - -With this release it is possible to provision a single OIDC-based authentication provider -by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. - -If you are using an external OIDC provider component as an identity management system and/or broker -to other authentication providers such as Google, eduGain SAML and so on, this might make your -life easier during instance setups and reconfiguration. You no longer need to generate the -necessary JSON file. - -### Adding PKCE Support - -Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable -support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) - -## Improved Testing - -With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests -like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition -of both OIDC login options in UI and API. - -The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. - -The support for setting JVM options during testing has been improved for developers. You now may add the -`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is -also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. - -As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. - -## New Configuration Options - -- dataverse.auth.oidc.enabled -- dataverse.auth.oidc.client-id -- dataverse.auth.oidc.client-secret -- dataverse.auth.oidc.auth-server-url -- dataverse.auth.oidc.pkce.enabled -- dataverse.auth.oidc.pkce.method -- dataverse.auth.oidc.title -- dataverse.auth.oidc.subtitle -- dataverse.auth.oidc.pkce.max-cache-size -- dataverse.auth.oidc.pkce.max-cache-age diff --git a/doc/release-notes/9412-markdown-previewer.md b/doc/release-notes/9412-markdown-previewer.md deleted file mode 100644 index 8faa2679fb0..00000000000 --- a/doc/release-notes/9412-markdown-previewer.md +++ /dev/null @@ -1 +0,0 @@ -There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews diff --git a/doc/release-notes/9428-alternative-title.md b/doc/release-notes/9428-alternative-title.md deleted file mode 100644 index 3bc74f218b5..00000000000 --- a/doc/release-notes/9428-alternative-title.md +++ /dev/null @@ -1,9 +0,0 @@ -Alternative Title is made repeatable. -- One will need to update database with updated citation block. -`curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -- One will also need to update solr schema: -Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` -Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` - -Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. -For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] diff --git a/doc/release-notes/9589-ds-configure-tool.md b/doc/release-notes/9589-ds-configure-tool.md deleted file mode 100644 index 70ac5fcaa6a..00000000000 --- a/doc/release-notes/9589-ds-configure-tool.md +++ /dev/null @@ -1 +0,0 @@ -Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. diff --git a/doc/release-notes/9590-intellij-redeploy.md b/doc/release-notes/9590-intellij-redeploy.md deleted file mode 100644 index 07af352ece4..00000000000 --- a/doc/release-notes/9590-intellij-redeploy.md +++ /dev/null @@ -1,3 +0,0 @@ -Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. - -For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools diff --git a/doc/release-notes/9599-guestbook-at-request.md b/doc/release-notes/9599-guestbook-at-request.md deleted file mode 100644 index e9554b71fb4..00000000000 --- a/doc/release-notes/9599-guestbook-at-request.md +++ /dev/null @@ -1,2 +0,0 @@ -Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). -The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. diff --git a/doc/release-notes/9635-solr-improvements.md b/doc/release-notes/9635-solr-improvements.md deleted file mode 100644 index ad55ee3afe6..00000000000 --- a/doc/release-notes/9635-solr-improvements.md +++ /dev/null @@ -1,4 +0,0 @@ -- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. - -Please see the "Installing Solr" section of the Installation Prerequisites guide. - diff --git a/doc/release-notes/9692-files-api-extension.md b/doc/release-notes/9692-files-api-extension.md deleted file mode 100644 index baa8e2f87cd..00000000000 --- a/doc/release-notes/9692-files-api-extension.md +++ /dev/null @@ -1,7 +0,0 @@ -The following API endpoints have been added: - -- /api/files/{id}/downloadCount -- /api/files/{id}/dataTables -- /access/datafile/{id}/userPermissions - -The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination and ordering diff --git a/doc/release-notes/9714-files-api-extension-filters.md b/doc/release-notes/9714-files-api-extension-filters.md deleted file mode 100644 index 034230efe61..00000000000 --- a/doc/release-notes/9714-files-api-extension-filters.md +++ /dev/null @@ -1,14 +0,0 @@ -The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support optional filtering by: - -- Access status: through the `accessStatus` query parameter, which supports the following values: - - - Public - - Restricted - - EmbargoedThenRestricted - - EmbargoedThenPublic - - -- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. - - -- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". diff --git a/doc/release-notes/9763-versions-api-improvements.md b/doc/release-notes/9763-versions-api-improvements.md deleted file mode 100644 index 8d7f6c7a20a..00000000000 --- a/doc/release-notes/9763-versions-api-improvements.md +++ /dev/null @@ -1,8 +0,0 @@ -# Improvements in the /versions API - -- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions; -- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output; -- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. - -This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. - diff --git a/doc/release-notes/9785-files-api-extension-search-text.md b/doc/release-notes/9785-files-api-extension-search-text.md deleted file mode 100644 index fb185e1c7af..00000000000 --- a/doc/release-notes/9785-files-api-extension-search-text.md +++ /dev/null @@ -1,3 +0,0 @@ -The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support optional filtering by search text through the `searchText` query parameter. - -The search will be applied to the labels and descriptions of the dataset files. diff --git a/doc/release-notes/9834-files-api-extension-counts.md b/doc/release-notes/9834-files-api-extension-counts.md deleted file mode 100644 index 3ec15d8bd36..00000000000 --- a/doc/release-notes/9834-files-api-extension-counts.md +++ /dev/null @@ -1,6 +0,0 @@ -Implemented the following new endpoints: - -- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Given a dataset and its version, retrieves file counts based on different criteria (Total count, per content type, per access status and per category name). - - -- setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created. diff --git a/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md b/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md deleted file mode 100644 index f306ae2ab80..00000000000 --- a/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md +++ /dev/null @@ -1,14 +0,0 @@ -Implemented the following new endpoints: - -- userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. - - -- hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. - - -In addition, the DataFile API payload has been extended to include the following fields: - -- tabularData: Boolean field to know if the DataFile is of tabular type - - -- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) diff --git a/doc/release-notes/9852-files-api-extension-deaccession.md b/doc/release-notes/9852-files-api-extension-deaccession.md deleted file mode 100644 index 55698580e3c..00000000000 --- a/doc/release-notes/9852-files-api-extension-deaccession.md +++ /dev/null @@ -1,12 +0,0 @@ -Extended the existing endpoints: - -- getVersionFiles (/api/datasets/{id}/versions/{versionId}/files) -- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts) - -The above endpoints now accept a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files or file counts. - -Additionally, a new endpoint has been developed to support version deaccessioning through API (Given a dataset and a version). - -- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession) - -Finally, the DataFile API payload has been extended to add the field "friendlyType" diff --git a/doc/release-notes/9880-info-api-zip-limit-embargo.md b/doc/release-notes/9880-info-api-zip-limit-embargo.md deleted file mode 100644 index d2afb139e72..00000000000 --- a/doc/release-notes/9880-info-api-zip-limit-embargo.md +++ /dev/null @@ -1,5 +0,0 @@ -Implemented the following new endpoints: - -- getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. - -- getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. diff --git a/doc/release-notes/9907-files-api-counts-with-criteria.md b/doc/release-notes/9907-files-api-counts-with-criteria.md deleted file mode 100644 index 07cd23daad0..00000000000 --- a/doc/release-notes/9907-files-api-counts-with-criteria.md +++ /dev/null @@ -1,11 +0,0 @@ -Extended the getVersionFileCounts endpoint (/api/datasets/{id}/versions/{versionId}/files/counts) to support filtering by criteria. - -In particular, the endpoint now accepts the following optional criteria query parameters: - -- contentType -- accessStatus -- categoryName -- tabularTagName -- searchText - -This filtering criteria is the same as the one for the getVersionFiles endpoint. diff --git a/doc/release-notes/9955-Signposting-updates.md b/doc/release-notes/9955-Signposting-updates.md deleted file mode 100644 index db0e27e51c5..00000000000 --- a/doc/release-notes/9955-Signposting-updates.md +++ /dev/null @@ -1,7 +0,0 @@ -This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - -To fix #9952, we surround the license info with `<` and `>`. - -To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - -To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. diff --git a/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md b/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md deleted file mode 100644 index 2ede679b361..00000000000 --- a/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md +++ /dev/null @@ -1,9 +0,0 @@ -Added a new optional query parameter "mode" to the "getDownloadSize" API endpoint ("api/datasets/{identifier}/versions/{versionId}/downloadsize"). - -This parameter applies a filter criteria to the operation and supports the following values: - -- All (Default): Includes both archival and original sizes for tabular files - -- Archival: Includes only the archival size for tabular files - -- Original: Includes only the original size for tabular files diff --git a/doc/release-notes/9972-files-api-filter-by-tabular-tags.md b/doc/release-notes/9972-files-api-filter-by-tabular-tags.md deleted file mode 100644 index 9c3fced1741..00000000000 --- a/doc/release-notes/9972-files-api-filter-by-tabular-tags.md +++ /dev/null @@ -1,3 +0,0 @@ -- New query parameter `tabularTagName` added to the getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) to return files to which the particular tabular tag has been added. - -- New endpoint to set tabular file tags via API: /api/files/{id}/metadata/tabularTags. diff --git a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md deleted file mode 100644 index 020224b2094..00000000000 --- a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md +++ /dev/null @@ -1,12 +0,0 @@ -Extended the getDownloadSize endpoint (/api/datasets/{id}/versions/{versionId}/downloadsize), including the following new features: - -- The endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned dataset versions when searching for versions to obtain the file total download size. - - -- The endpoint now supports filtering by criteria. In particular, it accepts the following optional criteria query parameters: - - - contentType - - accessStatus - - categoryName - - tabularTagName - - searchText From b077d98a11e6957085757c54c48030ef33b50c30 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 13:30:03 -0500 Subject: [PATCH 312/546] doc update, release note --- doc/release-notes/10162-globus-support.md | 14 ++++++++++++++ .../source/developers/big-data-support.rst | 7 +++++-- 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 doc/release-notes/10162-globus-support.md diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md new file mode 100644 index 00000000000..d64e72b70a1 --- /dev/null +++ b/doc/release-notes/10162-globus-support.md @@ -0,0 +1,14 @@ +Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) +- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incomatibilities. +- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model +- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus) +- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) + +Backward Incompatibilities: +- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism +- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. + +New JVM Options: +- A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). + +Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index d38f7f27a68..fe49f9f6150 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -152,8 +152,6 @@ Note: Globus file transfer is still experimental but feedback is welcome! See :r Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) and a community-developed `dataverse-globus `_ app has been properly installed and configured. -Due to differences in the access control models of a Dataverse installation and Globus, enabling the Globus capability on a store will disable the ability to restrict and embargo files in that store. - Globus endpoints can be in a variety of places, from data centers to personal computers. This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa). @@ -162,11 +160,16 @@ Globus transfer uses an efficient transfer mechanism and has additional features * robust file transfer capable of restarting after network or endpoint failures * third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation +Note: Due to differences in the access control models of a Dataverse installation and Globus and the current Globus store model, Dataverse cannot enforce per-file-access restrictions. +It is therefore recommended that a store be configured as public, which disables the ability to restrict and embargo files in that store, when Globus access is allowed. + Dataverse supports three options for using Globus, two involving transfer to Dataverse-managed endpoints and one allowing Dataverse to reference files on remote endpoints. Dataverse-managed endpoints must be Globus 'guest collections' hosted on either a file-system-based endpoint or an S3-based endpoint (the latter requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint. Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost). +With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionlity related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionlity normally associated with direct uploads to S3 is available.) + For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access). From 547d71c342e08ebdf674d8754dc072465ad20651 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 4 Dec 2023 14:31:07 -0500 Subject: [PATCH 313/546] #9464 add more detail to validation error message --- .../edu/harvard/iq/dataverse/DataverseServiceBean.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index ed46caf65a1..027e58d9263 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -1072,7 +1072,12 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid } catch (ValidationException vx) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); - return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + String accumulatedexceptions = ""; + for (ValidationException va : vx.getCausingExceptions()){ + accumulatedexceptions = accumulatedexceptions + va; + accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " "); + } + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; } catch (Exception ex) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); From fc3ae08ec9335ac857af4d9c112e892255ef1c7a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 14:44:00 -0500 Subject: [PATCH 314/546] adding documentation --- .../edu/harvard/iq/dataverse/DatasetPage.java | 21 ++ .../harvard/iq/dataverse/api/Datasets.java | 238 +++++++++++------- 2 files changed, 163 insertions(+), 96 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 704c1d42228..f871d2e5198 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -6346,6 +6346,27 @@ public boolean isGlobusTransferRequested() { return globusTransferRequested; } + /** + * Analagous with the startDownload method, this method is called when the user + * tries to start a Globus transfer out (~download). The + * validateFilesForDownload call checks to see if there are some files that can + * be Globus transfered and, if so and there are no files that can't be + * transferre, this method will launch the globus transfer app. If there is a + * mix of files or if the guestbook popup is required, the method passes back to + * the UI so those popup(s) can be shown. Once they are, this method is called + * with the popupShown param true and the app will be shown. + * + * @param transferAll - when called from the dataset Access menu, this should be + * true so that all files are included in the processing. + * When it is called from the file table, the current + * selection is used and the param should be false. + * @param popupShown - This method is called twice if the the mixed files or + * guestbook popups are needed. On the first call, popupShown + * is false so that the transfer is not started and those + * popups can be shown. On the second call, popupShown is + * true and processing will occur as long as there are some + * valid files to transfer. + */ public void startGlobusTransfer(boolean transferAll, boolean popupShown) { if (transferAll) { this.setSelectedFiles(workingVersion.getFileMetadatas()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 5961b428bcb..ae576134be3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3444,90 +3444,34 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam(" } - @POST - @AuthRequired - @Path("{id}/addGlobusFiles") - @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, - @PathParam("id") String datasetId, - @FormDataParam("jsonData") String jsonData, - @Context UriInfo uriInfo - ) throws IOException, ExecutionException, InterruptedException { - - logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); - - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - - // ------------------------------------- - // (1) Get the user from the API key - // ------------------------------------- - AuthenticatedUser authUser; - try { - authUser = getRequestAuthenticatedUserOrDie(crc); - } catch (WrappedResponse ex) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") - ); - } - - // ------------------------------------- - // (2) Get the Dataset Id - // ------------------------------------- - Dataset dataset; - - try { - dataset = findDatasetOrDie(datasetId); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } - - JsonObject jsonObject = null; - try { - jsonObject = JsonUtil.getJsonObject(jsonData); - } catch (Exception ex) { - logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage()); - return badRequest("Error parsing json body"); - - } - - //------------------------------------ - // (2b) Make sure dataset does not have package file - // -------------------------------------- - - for (DatasetVersion dv : dataset.getVersions()) { - if (dv.isHasPackageFile()) { - return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") - ); - } - } - - - String lockInfoMessage = "Globus Upload API started "; - DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, - (authUser).getId(), lockInfoMessage); - if (lock != null) { - dataset.addLock(lock); - } else { - logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); - } - - - ApiToken token = authSvc.findApiTokenByUser(authUser); - - if(uriInfo != null) { - logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); - } - - - String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); - - // Async Call - globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); - - return ok("Async call to Globus Upload started "); - - } +/**************************** + * Globus Support Section: + * + * Globus transfer in (upload) and out (download) involve three basic steps: The + * app is launched and makes a callback to the + * globusUploadParameters/globusDownloadParameters method to get all of the info + * needed to set up it's display. + * + * At some point after that, the user will make a selection as to which files to + * transfer and the app will call requestGlobusUploadPaths/requestGlobusDownload + * to indicate a transfer is about to start. In addition to providing the + * details of where to transfer the files to/from, Dataverse also grants the + * Globus principal involved the relevant rw or r permission for the dataset. + * + * Once the transfer is started, the app records the task id and sends it to + * Dataverse in the addGlobusFiles/monitorGlobusDownload call. Dataverse then + * monitors the transfer task and when it ultimately succeeds for fails it + * revokes the principal's permission and, for the transfer in case, adds the + * files to the dataset. (The dataset is locked until the transfer completes.) + * + * (If no transfer is started within a specified timeout, permissions will + * automatically be revoked - see the GlobusServiceBean for details.) + * + * The option to reference a file at a remote endpoint (rather than transfer it) + * follows the first two steps of the process above but completes with a call to + * the normal /addFiles endpoint (as there is no transfer to monitor and the + * files can be added to the dataset immediately.) + */ /** * Retrieve the parameters and signed URLs required to perform a globus @@ -3630,11 +3574,11 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat } /** - * Requests permissions for a given globus user to upload to the dataset + * Provides specific storageIdentifiers to use for each file amd requests permissions for a given globus user to upload to the dataset * * @param crc * @param datasetId - * @param jsonData + * @param jsonData - an object that must include the id of the globus "principal" involved and the "numberOfFiles" that will be transferred. * @return * @throws IOException * @throws ExecutionException @@ -3721,15 +3665,114 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP } - /** - * Retrieve the parameters and signed URLs required to perform a globus - * transfer/download. This api endpoint is expected to be called as a signed - * callback after the globus-dataverse app/other app is launched, but it will - * accept other forms of authentication. + /** A method analogous to /addFiles that must also include the taskIdentifier of the transfer-in-progress to monitor * * @param crc * @param datasetId + * @param jsonData - see /addFiles documentation, aditional "taskIdentifier" key in the main object is required. + * @param uriInfo + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException */ + @POST + @AuthRequired + @Path("{id}/addGlobusFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @FormDataParam("jsonData") String jsonData, + @Context UriInfo uriInfo + ) throws IOException, ExecutionException, InterruptedException { + + logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + JsonObject jsonObject = null; + try { + jsonObject = JsonUtil.getJsonObject(jsonData); + } catch (Exception ex) { + logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage()); + return badRequest("Error parsing json body"); + + } + + //------------------------------------ + // (2b) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + + String lockInfoMessage = "Globus Upload API started "; + DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload, + (authUser).getId(), lockInfoMessage); + if (lock != null) { + dataset.addLock(lock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + + + ApiToken token = authSvc.findApiTokenByUser(authUser); + + if(uriInfo != null) { + logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); + } + + + String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); + + // Async Call + globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + + return ok("Async call to Globus Upload started "); + + } + +/** + * Retrieve the parameters and signed URLs required to perform a globus + * transfer/download. This api endpoint is expected to be called as a signed + * callback after the globus-dataverse app/other app is launched, but it will + * accept other forms of authentication. + * + * @param crc + * @param datasetId + * @param locale + * @param downloadId - an id to a cached object listing the files involved. This is generated via Dataverse and provided to the dataverse-globus app in a signedURL. + * @return - JSON containing the parameters and URLs needed by the dataverse-globus app. The format is analogous to that for external tools. + */ @GET @AuthRequired @Path("{id}/globusDownloadParameters") @@ -3815,12 +3858,14 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P /** * Requests permissions for a given globus user to download the specified files - * the dataset + * the dataset and returns information about the paths to transfer from. + * + * When called directly rather than in response to being given a downloadId, the jsonData can include a "fileIds" key with an array of file ids to transfer. * * @param crc * @param datasetId - * @param jsonData - * @return + * @param jsonData - a JSON object that must include the id of the Globus "principal" that will be transferring the files in the case where Dataverse manages the Globus endpoint. For remote endpoints, the principal is not required. + * @return - a JSON object containing a map of file ids to Globus endpoint/path * @throws IOException * @throws ExecutionException * @throws InterruptedException @@ -3957,11 +4002,12 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat /** * Monitors a globus download and removes permissions on the dir/dataset when - * done + * the specified transfer task is completed. * * @param crc * @param datasetId - * @param jsonData + * @param jsonData - a JSON Object containing the key "taskIdentifier" with the + * Globus task to monitor. * @return * @throws IOException * @throws ExecutionException From 7697157ac98049dea45a2bd98193aad75e6037e1 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 4 Dec 2023 15:27:21 -0500 Subject: [PATCH 315/546] #9464 handle single errors --- .../edu/harvard/iq/dataverse/DataverseServiceBean.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 027e58d9263..07e7fe615e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -1077,7 +1077,12 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { accumulatedexceptions = accumulatedexceptions + va; accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " "); } - return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; + if (!accumulatedexceptions.isEmpty()){ + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; + } else { + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + } + } catch (Exception ex) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); From 8ec61d084a81c7d5786bd583177b80255aa7e883 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 15:58:21 -0500 Subject: [PATCH 316/546] cleanup, add method stubs, open for basestore, info->fine --- .../AbstractRemoteOverlayAccessIO.java | 12 +- .../dataaccess/GlobusAccessibleStore.java | 6 + .../dataaccess/GlobusOverlayAccessIO.java | 142 ++++++++++++------ 3 files changed, 112 insertions(+), 48 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java index 16defc26a4f..8d058b7c9e3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -15,11 +15,8 @@ import javax.net.ssl.SSLContext; -import org.apache.http.Header; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpHead; import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; @@ -30,15 +27,18 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; -import org.apache.http.protocol.HTTP; import org.apache.http.ssl.SSLContextBuilder; -import org.apache.http.util.EntityUtils; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; + +/** + * A base class for StorageIO implementations supporting remote access. At present, that includes the RemoteOverlayAccessIO store and the newer GlobusOverlayAccessIO store. It primarily includes + * common methods for handling auxiliary files in the configured base store. + * @param + */ public abstract class AbstractRemoteOverlayAccessIO extends StorageIO { protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index d827e40e807..e4d062f0619 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -6,7 +6,13 @@ public interface GlobusAccessibleStore { + //Whether Dataverse manages access controls for the Globus endpoint or not. static final String MANAGED = "managed"; + /* + * transfer and reference endpoint formats: + * + * REFERENCE_ENDPOINTS_WITH_BASEPATHS - reference endpoints separated by a comma + */ static final String TRANSFER_ENDPOINT_WITH_BASEPATH = "transfer-endpoint-with-basepath"; static final String GLOBUS_TOKEN = "globus-token"; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index e825af8cf30..7a6809cb2ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -2,12 +2,15 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.globus.AccessToken; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.URI; @@ -16,6 +19,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; +import java.util.List; import java.util.logging.Logger; import org.apache.http.client.ClientProtocolException; @@ -32,9 +36,18 @@ /** * @author qqmyers - */ + * + * This class implements three related use cases, all of which leverage the underlying idea of using a base store (as with the Https RemoteOverlay store): + * Managed - where Dataverse has control of the specified Globus endpoint and can set/remove permissions as needed to allow file transfers in/out: + * File/generic endpoint - assumes Dataverse does not have access to the datafile contents + * S3-Connector endpoint - assumes the datafiles are accessible via Globus and via S3 such that Dataverse can access to the datafile contents when needed. + * Remote - where Dataverse references files that remain at remote Globus endpoints (as with the Https RemoteOverlay store) and cannot access to the datafile contents. + * + * Note that Globus endpoints can provide Http URLs to get file contents, so a future enhancement could potentially support datafile contents access in the Managed/File and Remote cases. + * + * */ /* - * Globus Overlay Driver + * Globus Overlay Driver storageIdentifer format: * * Remote: StorageIdentifier format: * ://// @@ -47,11 +60,6 @@ * * Storage location: * /// - * - * transfer and reference endpoint formats: - * - * - * reference endpoints separated by a comma * */ public class GlobusOverlayAccessIO extends AbstractRemoteOverlayAccessIO implements GlobusAccessibleStore { @@ -115,7 +123,6 @@ private String retrieveGlobusAccessToken() { return accessToken.getOtherTokens().get(0).getAccessToken(); } - private void parsePath() { int filenameStart = path.lastIndexOf("/") + 1; String endpointWithBasePath = null; @@ -126,9 +133,9 @@ private void parsePath() { } //String endpointWithBasePath = baseEndpointPath.substring(baseEndpointPath.lastIndexOf(DataAccess.SEPARATOR) + 3); int pathStart = endpointWithBasePath.indexOf("/"); - logger.info("endpointWithBasePath: " + endpointWithBasePath); + logger.fine("endpointWithBasePath: " + endpointWithBasePath); endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); - logger.info("endpointPath: " + endpointPath); + logger.fine("endpointPath: " + endpointPath); if (isManaged() && (dvObject!=null)) { @@ -146,7 +153,7 @@ private void parsePath() { if (filenameStart > 0) { relativeDirectoryPath = relativeDirectoryPath + path.substring(0, filenameStart); } - logger.info("relativeDirectoryPath finally: " + relativeDirectoryPath); + logger.fine("relativeDirectoryPath finally: " + relativeDirectoryPath); filename = path.substring(filenameStart); endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; @@ -171,7 +178,7 @@ protected void validatePath(String relPath) throws IOException { } else { try { String endpoint = findMatchingEndpoint(relPath, allowedEndpoints); - logger.info(endpoint + " " + relPath); + logger.fine(endpoint + " " + relPath); if (endpoint == null || !Paths.get(endpoint, relPath).normalize().startsWith(endpoint)) { throw new IOException( @@ -189,7 +196,6 @@ protected void validatePath(String relPath) throws IOException { public long retrieveSizeFromMedia() { parsePath(); String globusAccessToken = retrieveGlobusAccessToken(); - logger.info("GAT2: " + globusAccessToken); // Construct Globus URL URI absoluteURI = null; try { @@ -198,13 +204,12 @@ public long retrieveSizeFromMedia() { + "/ls?path=" + endpointPath + relativeDirectoryPath + "&filter=name:" + filename); HttpGet get = new HttpGet(absoluteURI); - logger.info("Token is " + globusAccessToken); get.addHeader("Authorization", "Bearer " + globusAccessToken); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); if (response.getStatusLine().getStatusCode() == 200) { // Get reponse as string String responseString = EntityUtils.toString(response.getEntity()); - logger.info("Response from " + get.getURI().toString() + " is: " + responseString); + logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); JsonArray dataArray = responseJson.getJsonArray("DATA"); if (dataArray != null && dataArray.size() != 0) { @@ -214,7 +219,7 @@ public long retrieveSizeFromMedia() { } else { logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); - logger.info(EntityUtils.toString(response.getEntity())); + logger.fine(EntityUtils.toString(response.getEntity())); } } catch (URISyntaxException e) { // Should have been caught in validatePath @@ -258,16 +263,15 @@ public void delete() throws IOException { absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/submission_id"); HttpGet get = new HttpGet(absoluteURI); - logger.info("Token is " + globusAccessToken); get.addHeader("Authorization", "Bearer " + globusAccessToken); CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext); if (response.getStatusLine().getStatusCode() == 200) { // Get reponse as string String responseString = EntityUtils.toString(response.getEntity()); - logger.info("Response from " + get.getURI().toString() + " is: " + responseString); + logger.fine("Response from " + get.getURI().toString() + " is: " + responseString); JsonObject responseJson = JsonUtil.getJsonObject(responseString); String submissionId = responseJson.getString("value"); - logger.info("submission_id for delete is: " + submissionId); + logger.fine("submission_id for delete is: " + submissionId); absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/delete"); HttpPost post = new HttpPost(absoluteURI); JsonObjectBuilder taskJsonBuilder = Json.createObjectBuilder(); @@ -277,30 +281,30 @@ public void delete() throws IOException { post.setHeader("Content-Type", "application/json"); post.addHeader("Authorization", "Bearer " + globusAccessToken); String taskJson= JsonUtil.prettyPrint(taskJsonBuilder.build()); - logger.info("Sending: " + taskJson); + logger.fine("Sending: " + taskJson); post.setEntity(new StringEntity(taskJson, "utf-8")); CloseableHttpResponse postResponse = getSharedHttpClient().execute(post, localContext); int statusCode=postResponse.getStatusLine().getStatusCode(); - logger.info("Response :" + statusCode + ": " +postResponse.getStatusLine().getReasonPhrase()); + logger.fine("Response :" + statusCode + ": " +postResponse.getStatusLine().getReasonPhrase()); switch (statusCode) { case 202: // ~Success - delete task was accepted - logger.info("Globus delete initiated: " + EntityUtils.toString(postResponse.getEntity())); + logger.fine("Globus delete initiated: " + EntityUtils.toString(postResponse.getEntity())); break; case 200: // Duplicate - delete task was already accepted - logger.info("Duplicate Globus delete: " + EntityUtils.toString(postResponse.getEntity())); + logger.warning("Duplicate Globus delete: " + EntityUtils.toString(postResponse.getEntity())); break; default: logger.warning("Response from " + post.getURI().toString() + " was " + postResponse.getStatusLine().getStatusCode()); - logger.info(EntityUtils.toString(postResponse.getEntity())); + logger.fine(EntityUtils.toString(postResponse.getEntity())); } } else { logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode()); - logger.info(EntityUtils.toString(response.getEntity())); + logger.fine(EntityUtils.toString(response.getEntity())); } } catch (Exception e) { logger.warning(e.getMessage()); @@ -383,7 +387,7 @@ public String getStorageLocation() throws IOException { */ protected void configureGlobusEndpoints() throws IOException { allowedEndpoints = getAllowedEndpoints(this.driverId); - logger.info("Set allowed endpoints: " + Arrays.toString(allowedEndpoints)); + logger.fine("Set allowed endpoints: " + Arrays.toString(allowedEndpoints)); } private static String[] getAllowedEndpoints(String driverId) throws IOException { @@ -409,37 +413,91 @@ private static String[] getAllowedEndpoints(String driverId) throws IOException @Override - public void open(DataAccessOption... option) throws IOException { - // TODO Auto-generated method stub - - } + public void open(DataAccessOption... options) throws IOException { + + baseStore.open(options); + + DataAccessRequest req = this.getRequest(); + + if (isWriteAccessRequested(options)) { + isWriteAccess = true; + isReadAccess = false; + } else { + isWriteAccess = false; + isReadAccess = true; + } + + if (dvObject instanceof DataFile) { + String storageIdentifier = dvObject.getStorageIdentifier(); + + DataFile dataFile = this.getDataFile(); + + if (req != null && req.getParameter("noVarHeader") != null) { + baseStore.setNoVarHeader(true); + } + + if (storageIdentifier == null || "".equals(storageIdentifier)) { + throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile."); + } + + logger.fine("StorageIdentifier is: " + storageIdentifier); + + if (isReadAccess) { + if (dataFile.getFilesize() >= 0) { + this.setSize(dataFile.getFilesize()); + } else { + logger.fine("Setting size"); + this.setSize(retrieveSizeFromMedia()); + } + // Only applies for the S3 Connector case (where we could have run an ingest) + if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + + List datavariables = dataFile.getDataTable().getDataVariables(); + String varHeaderLine = generateVariableHeader(datavariables); + this.setVarHeader(varHeaderLine); + } + + } + this.setMimeType(dataFile.getContentType()); + + try { + this.setFileName(dataFile.getFileMetadata().getLabel()); + } catch (Exception ex) { + this.setFileName("unknown"); + } + } else if (dvObject instanceof Dataset) { + throw new IOException( + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); + } else if (dvObject instanceof Dataverse) { + throw new IOException( + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); + } + } @Override public Path getFileSystemPath() throws IOException { - // TODO Auto-generated method stub - return null; + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } - @Override public void savePath(Path fileSystemPath) throws IOException { - // TODO Auto-generated method stub - + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } - @Override public void saveInputStream(InputStream inputStream) throws IOException { - // TODO Auto-generated method stub - + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } - @Override public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { - // TODO Auto-generated method stub - + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } - + } From 38c120e13d2e1276324b903be58306520168b577 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 18:21:41 -0500 Subject: [PATCH 317/546] cleanup, delete obsolete methods, change to private, info->fine --- .../harvard/iq/dataverse/api/Datasets.java | 4 +- .../dataverse/globus/GlobusServiceBean.java | 461 +++--------------- .../iq/dataverse/settings/JvmSettings.java | 2 +- src/main/webapp/globus.xhtml | 30 -- 4 files changed, 78 insertions(+), 419 deletions(-) delete mode 100644 src/main/webapp/globus.xhtml diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index ae576134be3..cb57acd3b86 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3543,7 +3543,7 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat } else { params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths); } - int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class); + int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class); JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths"; allowedApiCalls.add( @@ -3833,7 +3833,7 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P params.add("endpoint", transferEndpoint); } params.add("files", files); - int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class); + int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class); JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "monitorGlobusDownload") .add(URLTokenUtil.HTTP_METHOD, "POST") diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 0c991424ce9..37959188857 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -97,34 +97,6 @@ public class GlobusServiceBean implements java.io.Serializable { private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - private String code; - private String userTransferToken; - private String state; - - public String getState() { - return state; - } - - public void setState(String state) { - this.state = state; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getUserTransferToken() { - return userTransferToken; - } - - public void setUserTransferToken(String userTransferToken) { - this.userTransferToken = userTransferToken; - } - private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions) throws MalformedURLException { @@ -152,33 +124,6 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi return null; } - /* - * public void updatePermision(AccessToken clientTokenUser, String directory, - * String principalType, String perm) throws MalformedURLException { if - * (directory != null && !directory.equals("")) { directory = directory + "/"; } - * logger.info("Start updating permissions." + " Directory is " + directory); - * String globusEndpoint = - * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - * ArrayList rules = checkPermisions(clientTokenUser, directory, - * globusEndpoint, principalType, null); logger.info("Size of rules " + - * rules.size()); int count = 0; while (count < rules.size()) { - * logger.info("Start removing rules " + rules.get(count)); Permissions - * permissions = new Permissions(); permissions.setDATA_TYPE("access"); - * permissions.setPermissions(perm); permissions.setPath(directory); - * - * Gson gson = new GsonBuilder().create(); URL url = new - * URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint - * + "/access/" + rules.get(count)); - * logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + - * globusEndpoint + "/access/" + rules.get(count)); MakeRequestResponse result = - * makeRequest(url, "Bearer", - * clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", - * gson.toJson(permissions)); if (result.status != 200) { - * logger.warning("Cannot update access rule " + rules.get(count)); } else { - * logger.info("Access rule " + rules.get(count) + " was updated"); } count++; } - * } - */ - /** * Call to delete a globus rule related to the specified dataset. * @@ -214,6 +159,13 @@ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger } } + /** Request read/write access for the specified principal and generate a list of accessible paths for new files for the specified dataset. + * + * @param principal - the id of the Globus principal doing the transfer + * @param dataset + * @param numberOfPaths - how many files are to be transferred + * @return + */ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) { GlobusEndpoint endpoint = getGlobusEndpoint(dataset); @@ -278,6 +230,12 @@ private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissi } } + /** Given an array of remote files to be referenced in the dataset, create a set of valid storage identifiers and return a map of the remote file paths to storage identifiers. + * + * @param dataset + * @param referencedFiles - a JSON array of remote files to be referenced in the dataset - each should be a string with the /path/to/file + * @return - a map of supplied paths to valid storage identifiers + */ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) { String driverId = dataset.getEffectiveStorageDriverId(); JsonArray endpoints = GlobusAccessibleStore.getReferenceEndpointsWithPaths(driverId); @@ -304,39 +262,38 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref return fileMap.build(); } + + /** A cache of temporary permission requests - for upload (rw) and download (r) access. + * When a temporary permission request is created, it is added to the cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, the permission will be revoked/deleted. + * (If a transfer has been started, the permission will not be revoked/deleted until the transfer is complete. This is handled in other methods.) + */ // Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() .expireAfterWrite( - Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) + Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> { // Delete rules that expire - logger.info("Rule " + ruleId + " expired"); + logger.fine("Rule " + ruleId + " expired"); Dataset dataset = datasetSvc.find(datasetId); deletePermission((String) ruleId, dataset, logger); }) .build(); + //Convenience method to add a temporary permission request to the cache - allows logging of temporary permission requests private void monitorTemporaryPermissions(String ruleId, long datasetId) { - logger.info("Adding rule " + ruleId + " for dataset " + datasetId); + logger.fine("Adding rule " + ruleId + " for dataset " + datasetId); rulesCache.put(ruleId, datasetId); } - public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { - - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId - + "/successful_transfers"); - - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); - - if (result.status == 200) { - logger.info(" SUCCESS ====== "); - return true; - } - return false; - } - +/** Call the Globus API to get info about the transfer. + * + * @param accessToken + * @param taskId - the Globus task id supplied by the user + * @param globusLogger - the transaction-specific logger to use (separate log files are created in general, some calls may use the class logger) + * @return + * @throws MalformedURLException + */ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); @@ -356,6 +313,11 @@ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger return task; } + /** Globus call to get an access token for the user using the long-term token we hold. + * + * @param globusBasicToken - the base64 encoded Globus Basic token comprised of the : + * @return - a valid Globus access token + */ public static AccessToken getClientToken(String globusBasicToken) { URL url; AccessToken clientTokenUser = null; @@ -375,36 +337,7 @@ public static AccessToken getClientToken(String globusBasicToken) { return clientTokenUser; } - public AccessToken getAccessToken(HttpServletRequest origRequest, String globusBasicToken) - throws UnsupportedEncodingException, MalformedURLException { - String serverName = origRequest.getServerName(); - if (serverName.equals("localhost")) { - logger.severe("Changing localhost to utoronto"); - serverName = "utl-192-123.library.utoronto.ca"; - } - - String redirectURL = "https://" + serverName + "/globus.xhtml"; - - redirectURL = URLEncoder.encode(redirectURL, "UTF-8"); - - URL url = new URL("https://auth.globus.org/v2/oauth2/token?code=" + code + "&redirect_uri=" + redirectURL - + "&grant_type=authorization_code"); - logger.info(url.toString()); - - MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); - AccessToken accessTokenUser = null; - - if (result.status == 200) { - logger.info("Access Token: \n" + result.toString()); - accessTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); - logger.info(accessTokenUser.getAccessToken()); - } - - return accessTokenUser; - - } - - public static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, + private static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { String str = null; HttpURLConnection connection = null; @@ -412,9 +345,8 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a try { connection = (HttpURLConnection) url.openConnection(); // Basic - // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 logger.info(authType + " " + authCode); - logger.info("For URL: " + url.toString()); + logger.fine("For URL: " + url.toString()); connection.setRequestProperty("Authorization", authType + " " + authCode); // connection.setRequestProperty("Content-Type", // "application/x-www-form-urlencoded"); @@ -422,7 +354,7 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a if (jsonString != null) { connection.setRequestProperty("Content-Type", "application/json"); connection.setRequestProperty("Accept", "application/json"); - logger.info(jsonString); + logger.fine(jsonString); connection.setDoOutput(true); OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); @@ -431,24 +363,21 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a } status = connection.getResponseCode(); - logger.info("Status now " + status); + logger.fine("Status now " + status); InputStream result = connection.getInputStream(); if (result != null) { - logger.info("Result is not null"); str = readResultJson(result).toString(); - logger.info("str is "); - logger.info(result.toString()); + logger.fine("str is " + result.toString()); } else { - logger.info("Result is null"); + logger.fine("Result is null"); str = null; } - logger.info("status: " + status); + logger.fine("status: " + status); } catch (IOException ex) { - logger.info("IO"); logger.severe(ex.getMessage()); - logger.info(ex.getCause().toString()); - logger.info(ex.getStackTrace().toString()); + logger.fine(ex.getCause().toString()); + logger.fine(ex.getStackTrace().toString()); } finally { if (connection != null) { connection.disconnect(); @@ -461,16 +390,14 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a private static StringBuilder readResultJson(InputStream in) { StringBuilder sb = null; - try { - - BufferedReader br = new BufferedReader(new InputStreamReader(in)); + try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) { sb = new StringBuilder(); String line; while ((line = br.readLine()) != null) { sb.append(line + "\n"); } br.close(); - logger.info(sb.toString()); + logger.fine(sb.toString()); } catch (IOException e) { sb = null; logger.severe(e.getMessage()); @@ -495,31 +422,6 @@ private static T parseJson(String sb, Class jsonParserClass, boolean nami } } - public String getDirectory(String datasetId) { - Dataset dataset = null; - String directory = null; - try { - dataset = datasetSvc.find(Long.parseLong(datasetId)); - if (dataset == null) { - logger.severe("Dataset not found " + datasetId); - return null; - } - String storeId = dataset.getStorageIdentifier(); - storeId.substring(storeId.indexOf("//") + 1); - directory = storeId.substring(storeId.indexOf("//") + 1); - logger.info(storeId); - logger.info(directory); - logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage()); - return directory; - - } catch (NumberFormatException nfe) { - logger.severe(nfe.getMessage()); - - return null; - } - - } - static class MakeRequestResponse { public String jsonResponse; public int status; @@ -531,53 +433,26 @@ static class MakeRequestResponse { } - /* - * unused - may be needed for S3 case private MakeRequestResponse - * findDirectory(String directory, String clientToken, String globusEndpoint) - * throws MalformedURLException { URL url = new - * URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint - * + "/ls?path=" + directory + "/"); - * - * MakeRequestResponse result = makeRequest(url, "Bearer", clientToken, "GET", - * null); logger.info("find directory status:" + result.status); - * - * return result; } - */ - /* - * public boolean giveGlobusPublicPermissions(Dataset dataset) throws - * UnsupportedEncodingException, MalformedURLException { - * - * GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - * - * - * MakeRequestResponse status = findDirectory(endpoint.getBasePath(), - * endpoint.getClientToken(), endpoint.getId()); - * - * if (status.status == 200) { - * - * int perStatus = givePermission("all_authenticated_users", "", "r", dataset); - * logger.info("givePermission status " + perStatus); if (perStatus == 409) { - * logger.info("Permissions already exist or limit was reached"); } else if - * (perStatus == 400) { logger.info("No directory in Globus"); } else if - * (perStatus != 201 && perStatus != 200) { - * logger.info("Cannot give read permission"); return false; } + /** + * Cache of open download Requests This cache keeps track of the set of files + * selected for transfer out (download) via Globus. It is a means of + * transferring the list from the DatasetPage, where it is generated via user UI + * actions, and the Datasets/globusDownloadParameters API. * - * } else if (status.status == 404) { - * logger.info("There is no globus directory"); } else { - * logger.severe("Cannot find directory in globus, status " + status); return - * false; } + * Nominally, the dataverse-globus app will call that API endpoint and then + * /requestGlobusDownload, at which point the cached info is sent to the app. If + * the app doesn't call within 5 minutes (the time allowed to call + * /globusDownloadParameters) + GLOBUS_CACHE_MAXAGE minutes (a ~longer period + * giving the user time to make choices in the app), the cached info is deleted. * - * return true; } */ - - // Single cache of open rules/permission requests private final Cache downloadCache = Caffeine.newBuilder() .expireAfterWrite( - Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES)) + Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES)) .scheduler(Scheduler.systemScheduler()).evictionListener((downloadId, datasetId, cause) -> { // Delete downloads that expire - logger.info("Download for " + downloadId + " expired"); + logger.fine("Download for " + downloadId + " expired"); }) .build(); @@ -600,11 +475,18 @@ public int setPermissionForDownload(Dataset dataset, String principal) { return requestPermission(endpoint, dataset, permissions); } - // Generates the URL to launch the Globus app + // Generates the URL to launch the Globus app for upload public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); } + /** Generated the App URl for upload (in) or download (out) + * + * @param d - the dataset involved + * @param upload - boolean, true for upload, false for download + * @param dataFiles - a list of the DataFiles to be downloaded + * @return + */ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List dataFiles) { String localeCode = session.getLocaleCode(); ApiToken apiToken = null; @@ -654,7 +536,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List dataFiles, Dataset d) { return filesBuilder.build(); } - public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List downloadDFList) { + private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List downloadDFList) { return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList)); } @@ -718,7 +600,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S GlobusEndpoint endpoint = getGlobusEndpoint(dataset); GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); - logger.info("Found rule: " + ruleId); + logger.fine("Found rule: " + ruleId); if (ruleId != null) { Long datasetId = rulesCache.getIfPresent(ruleId); if (datasetId != null) { @@ -812,8 +694,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // calculateMissingMetadataFields: checksum, mimetype JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - logger.info("Size: " + newfilesJsonArray.size()); - logger.info("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); + logger.fine("Size: " + newfilesJsonArray.size()); + logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { @@ -1227,198 +1109,8 @@ public String calculatemime(String fileName) throws InterruptedException { return finalType; } - /* - * public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) - * throws MalformedURLException { - * - * logger.info("=====Tasklist == dataset id :" + dataset.getId()); String - * directory = null; - * - * try { - * - * List fileMetadatas = new ArrayList<>(); - * - * StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - * - * - * - * DatasetVersion workingVersion = dataset.getEditVersion(); - * - * if (workingVersion.getCreateTime() != null) { - * workingVersion.setCreateTime(new Timestamp(new Date().getTime())); } - * - * directory = dataset.getAuthorityForFileStorage() + "/" + - * dataset.getIdentifierForFileStorage(); - * - * System.out.println("======= directory ==== " + directory + - * " ==== datasetId :" + dataset.getId()); Map checksumMapOld - * = new HashMap<>(); - * - * Iterator fmIt = workingVersion.getFileMetadatas().iterator(); - * - * while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); if (fm.getDataFile() - * != null && fm.getDataFile().getId() != null) { String chksum = - * fm.getDataFile().getChecksumValue(); if (chksum != null) { - * checksumMapOld.put(chksum, 1); } } } - * - * List dFileList = new ArrayList<>(); boolean update = false; for - * (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { - * - * String s3ObjectKey = s3ObjectSummary.getKey(); - * - * - * String t = s3ObjectKey.replace(directory, ""); - * - * if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String - * filePath = s3ObjectKey; String fileName = - * filePath.split("/")[filePath.split("/").length - 1]; String fullPath = - * datasetSIO.getStorageLocation() + "/" + fileName; - * - * logger.info("Full path " + fullPath); StorageIO dataFileStorageIO = - * DataAccess.getDirectStorageIO(fullPath); InputStream in = - * dataFileStorageIO.getInputStream(); - * - * String checksumVal = FileUtil.calculateChecksum(in, - * DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); - * logger.info("The checksum is " + checksumVal); if - * ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" + - * dataset.getId() + "======= filename ==== " + filePath + - * " == file already exists "); } else if (filePath.contains("cached") || - * filePath.contains(".thumb")) { logger.info(filePath + " is ignored"); } else - * { update = true; logger.info("datasetId :" + dataset.getId() + - * "======= filename ==== " + filePath + " == new file "); try { - * - * DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); - * //MIME_TYPE_GLOBUS datafile.setModificationTime(new Timestamp(new - * Date().getTime())); datafile.setCreateDate(new Timestamp(new - * Date().getTime())); datafile.setPermissionModificationTime(new Timestamp(new - * Date().getTime())); - * - * FileMetadata fmd = new FileMetadata(); - * - * - * fmd.setLabel(fileName); fmd.setDirectoryLabel(filePath.replace(directory, - * "").replace(File.separator + fileName, "")); - * - * fmd.setDataFile(datafile); - * - * datafile.getFileMetadatas().add(fmd); - * - * FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile); - * logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== " - * + filePath + " == added to datafile, filemetadata "); - * - * try { // We persist "SHA1" rather than "SHA-1". - * //datafile.setChecksumType(DataFile.ChecksumType.SHA1); - * datafile.setChecksumType(DataFile.ChecksumType.MD5); - * datafile.setChecksumValue(checksumVal); } catch (Exception cksumEx) { - * logger.info("==== datasetId :" + dataset.getId() + - * "======Could not calculate checksumType signature for the new file "); } - * - * datafile.setFilesize(totalSize); - * - * dFileList.add(datafile); - * - * } catch (Exception ioex) { logger.info("datasetId :" + dataset.getId() + - * "======Failed to process and/or save the file " + ioex.getMessage()); return - * false; - * - * } } } } if (update) { - * - * List filesAdded = new ArrayList<>(); - * - * if (dFileList != null && dFileList.size() > 0) { - * - * // Dataset dataset = version.getDataset(); - * - * for (DataFile dataFile : dFileList) { - * - * if (dataFile.getOwner() == null) { dataFile.setOwner(dataset); - * - * workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); - * dataFile.getFileMetadata().setDatasetVersion(workingVersion); - * dataset.getFiles().add(dataFile); - * - * } - * - * filesAdded.add(dataFile); - * - * } - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ===== Done! Finished saving new files to the dataset."); } - * - * fileMetadatas.clear(); for (DataFile addedFile : filesAdded) { - * fileMetadatas.add(addedFile.getFileMetadata()); } filesAdded = null; - * - * if (workingVersion.isDraft()) { - * - * logger.info("Async: ==== datasetId :" + dataset.getId() + - * " ==== inside draft version "); - * - * Timestamp updateTime = new Timestamp(new Date().getTime()); - * - * workingVersion.setLastUpdateTime(updateTime); - * dataset.setModificationTime(updateTime); - * - * - * for (FileMetadata fileMetadata : fileMetadatas) { - * - * if (fileMetadata.getDataFile().getCreateDate() == null) { - * fileMetadata.getDataFile().setCreateDate(updateTime); - * fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); } - * fileMetadata.getDataFile().setModificationTime(updateTime); } - * - * - * } else { logger.info("datasetId :" + dataset.getId() + - * " ==== inside released version "); - * - * for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { for - * (FileMetadata fileMetadata : fileMetadatas) { if - * (fileMetadata.getDataFile().getStorageIdentifier() != null) { - * - * if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion. - * getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { - * workingVersion.getFileMetadatas().set(i, fileMetadata); } } } } - * - * - * } - * - * - * try { Command cmd; logger.info("Async: ==== datasetId :" + - * dataset.getId() + - * " ======= UpdateDatasetVersionCommand START in globus function "); cmd = new - * UpdateDatasetVersionCommand(dataset, new DataverseRequest(user, - * (HttpServletRequest) null)); ((UpdateDatasetVersionCommand) - * cmd).setValidateLenient(true); //new DataverseRequest(authenticatedUser, - * (HttpServletRequest) null) //dvRequestService.getDataverseRequest() - * commandEngine.submit(cmd); } catch (CommandException ex) { - * logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + - * "======CommandException updating DatasetVersion from batch job: " + - * ex.getMessage()); return false; } - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ======= GLOBUS CALL COMPLETED SUCCESSFULLY "); - * - * //return true; } - * - * } catch (Exception e) { String message = e.getMessage(); - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ======= GLOBUS CALL Exception ============== " + message); - * e.printStackTrace(); return false; //return - * error(Response.Status.INTERNAL_SERVER_ERROR, - * "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" - * + message + "'."); } - * - * String globusBasicToken = - * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - * AccessToken clientTokenUser = getClientToken(globusBasicToken); - * updatePermision(clientTokenUser, directory, "identity", "r"); return true; } - * - */ - GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { + private GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { Dataset dataset = null; if (dvObject instanceof Dataset) { dataset = (Dataset) dvObject; @@ -1435,8 +1127,6 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { if (GlobusAccessibleStore.isDataverseManaged(driverId) && (dataset != null)) { directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage(); - logger.info("directoryPath now: " + directoryPath); - } else { // remote store - may have path in file storageidentifier String relPath = dvObject.getStorageIdentifier() @@ -1446,17 +1136,16 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { directoryPath = directoryPath + relPath.substring(0, filenameStart); } } - logger.info("directoryPath finally: " + directoryPath); + logger.fine("directoryPath finally: " + directoryPath); String endpointId = GlobusAccessibleStore.getTransferEndpointId(driverId); - logger.info("endpointId: " + endpointId); + logger.fine("endpointId: " + endpointId); String globusToken = GlobusAccessibleStore.getGlobusToken(driverId); AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); String clientToken = accessToken.getOtherTokens().get(0).getAccessToken(); - logger.info("clientToken: " + clientToken); endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath); return endpoint; @@ -1484,7 +1173,7 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, DataFile df = guestbookResponse.getDataFile(); if (df != null) { - logger.info("Single datafile case for writeGuestbookAndStartTransfer"); + logger.fine("Single datafile case for writeGuestbookAndStartTransfer"); List downloadDFList = new ArrayList(1); downloadDFList.add(df); if (!doNotSaveGuestbookResponse) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index c9038047611..96a56d09c0b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -50,7 +50,7 @@ public enum JvmSettings { UPLOADS_DIRECTORY(SCOPE_FILES, "uploads"), DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), - GLOBUS_RULES_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"), + GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"), FILES(SCOPE_FILES), BASE_URL(FILES, "base-url"), GLOBUS_TOKEN(FILES, "globus-token"), diff --git a/src/main/webapp/globus.xhtml b/src/main/webapp/globus.xhtml deleted file mode 100644 index f4eebd4babf..00000000000 --- a/src/main/webapp/globus.xhtml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - - - - - - - From caa6e684390bb4c36dff45f1de94837f8b632f57 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 4 Dec 2023 18:29:22 -0500 Subject: [PATCH 318/546] revert unrelated changes, old settings --- .../harvest/server/web/servlet/OAIServlet.java | 15 ++++++++++----- .../iq/dataverse/settings/JvmSettings.java | 5 +---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 19901cae796..96a19acc0e8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -73,13 +73,18 @@ public class OAIServlet extends HttpServlet { @EJB SystemConfig systemConfig; + + @Inject + @ConfigProperty(name = "dataverse.oai.server.maxidentifiers", defaultValue="100") + private Integer maxListIdentifiers; - //Todo - revert this change - added to get past some local compile issues - private Integer maxListIdentifiers=100; - - private Integer maxListSets=100; + @Inject + @ConfigProperty(name = "dataverse.oai.server.maxsets", defaultValue="100") + private Integer maxListSets; - private Integer maxListRecords=10; + @Inject + @ConfigProperty(name = "dataverse.oai.server.maxrecords", defaultValue="10") + private Integer maxListRecords; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.web.servlet.OAIServlet"); // If we are going to stick with this solution - of providing a minimalist diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 96a56d09c0b..fb85ae9adab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -50,10 +50,7 @@ public enum JvmSettings { UPLOADS_DIRECTORY(SCOPE_FILES, "uploads"), DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), - GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"), - FILES(SCOPE_FILES), - BASE_URL(FILES, "base-url"), - GLOBUS_TOKEN(FILES, "globus-token"), + GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-cache-maxage"), // SOLR INDEX SETTINGS SCOPE_SOLR(PREFIX, "solr"), From 3babc5aac25710dcc92a90ae861a7b21eef43742 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 4 Dec 2023 20:35:56 -0500 Subject: [PATCH 319/546] moving the StorageUse member to DvObjectContainer from DvObject; moving the em.merge()/em.persist() to the djb. #8549 --- .../java/edu/harvard/iq/dataverse/DataFile.java | 17 ----------------- .../iq/dataverse/DataverseServiceBean.java | 17 ++++++++++++++++- .../java/edu/harvard/iq/dataverse/DvObject.java | 14 -------------- .../harvard/iq/dataverse/DvObjectContainer.java | 14 ++++++++++++-- .../command/impl/SetCollectionQuotaCommand.java | 15 +-------------- .../storageuse/StorageUseServiceBean.java | 1 - 6 files changed, 29 insertions(+), 49 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 2770118d41b..3d8086b142b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -641,23 +641,6 @@ public String getFriendlySize() { } } - /** - * Experimental - record the pre-calculated "storage size" of the file, and - * all its associated auxiliary file objects: - - @Column(nullable = true) - private Long storageSize; - - - public Long getStorageSize() { - return storageSize; - } - - public void setStorageSize(Long storageSize) { - this.storageSize = storageSize; - } - * */ - public boolean isRestricted() { return restricted; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 549b8310122..487215c7a65 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -18,6 +18,7 @@ import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.SolrSearchResult; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; @@ -919,5 +920,19 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { return em.createNativeQuery(cqString).getResultList(); } - + public void saveStorageQuota(Dataverse target, Long allocation) { + StorageQuota storageQuota = target.getStorageQuota(); + + if (storageQuota != null) { + storageQuota.setAllocation(allocation); + em.merge(storageQuota); + } else { + storageQuota = new StorageQuota(); + storageQuota.setDefinitionPoint(target); + storageQuota.setAllocation(allocation); + target.setStorageQuota(storageQuota); + em.persist(storageQuota); + } + em.flush(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 515d9f9f153..df249e04663 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -3,7 +3,6 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.storageuse.StorageQuota; -import edu.harvard.iq.dataverse.storageuse.StorageUse; import java.sql.Timestamp; import java.text.SimpleDateFormat; @@ -182,10 +181,6 @@ public void setAlternativePersistentIndentifiers(Set roleAssignments; - /** - * Should only be used in constructors for DvObjectContainers (Datasets and - * Collections), to make sure new entries are created and persisted in the - * database StorageUse table for every DvObject container we create. - * @param storageUse - */ - public void setStorageUse(StorageUse storageUse) { - this.storageUse = storageUse; - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java index 2f391e394fa..82057315fbb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java @@ -2,11 +2,9 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.storageuse.StorageUse; import edu.harvard.iq.dataverse.util.SystemConfig; import jakarta.persistence.CascadeType; -import java.util.Locale; import java.util.Optional; import jakarta.persistence.MappedSuperclass; @@ -45,6 +43,9 @@ public boolean isEffectivelyPermissionRoot() { private Boolean guestbookAtRequest = null; + @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE, CascadeType.PERSIST}, orphanRemoval=true) + private StorageUse storageUse; + public String getEffectiveStorageDriverId() { String id = storageDriver; if (StringUtils.isBlank(id)) { @@ -165,4 +166,13 @@ public void setCurationLabelSetName(String setName) { this.externalLabelSetName = setName; } + /** + * Should only be used in constructors for DvObjectContainers (Datasets and + * Collections), to make sure new entries are created and persisted in the + * database StorageUse table for every DvObject container we create. + * @param storageUse + */ + public void setStorageUse(StorageUse storageUse) { + this.storageUse = storageUse; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java index cf8fb6fd42e..e52c47a5e7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.logging.Logger; @@ -49,18 +48,6 @@ public void executeImpl(CommandContext ctxt) throws CommandException { throw new IllegalCommandException("Must specify valid allocation in bytes", this); } - StorageQuota storageQuota = dataverse.getStorageQuota(); - - if (storageQuota != null) { - storageQuota.setAllocation(allocation); - ctxt.em().merge(storageQuota); - } else { - storageQuota = new StorageQuota(); - storageQuota.setDefinitionPoint(dataverse); - storageQuota.setAllocation(allocation); - dataverse.setStorageQuota(storageQuota); - ctxt.em().persist(storageQuota); - } - ctxt.em().flush(); + ctxt.dataverses().saveStorageQuota(dataverse, allocation); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java index 18e4ef49640..fbaaff22dee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.storageuse; -import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.settings.JvmSettings; import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; From dfa2dc3853254bc8c58bedbfd288a63bcfa07b32 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 04:38:46 -0500 Subject: [PATCH 320/546] remove adaptation for quotas PR that was itself changed --- .../impl/CreateNewDataFilesCommand.java | 24 ++----------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 269ba47643b..0470f59b861 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -3,20 +3,18 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; -import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota; import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; @@ -85,7 +83,7 @@ public class CreateNewDataFilesCommand extends AbstractCommand sio; - try { - sio = DataAccess.getDirectStorageIO(DataAccess.getLocationFromStorageId(newStorageIdentifier, version.getDataset())); - - // get file size - // Note - some stores (e.g. AWS S3) only offer eventual consistency and a call - // to get the size immediately after uploading may fail. As of the addition of - // PR#9409 adding storage quotas, we are now requiring size to be available - // earlier. If this is seen, adding - // a delay/retry may help - newFileSize = sio.retrieveSizeFromMedia(); - } catch (IOException e) { - // If we don't get a file size, a CommandExecutionException will be thrown later in the code - e.printStackTrace(); - } - } } // Finally, if none of the special cases above were applicable (or // if we were unable to unpack an uploaded file, etc.), we'll just From c78613e60ca7a2442753d6382b0ace3c7fd07316 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 5 Dec 2023 08:42:23 -0500 Subject: [PATCH 321/546] one more refinement for the flyway script. #8549 --- .../storageuse/StorageUseServiceBean.java | 33 ++++++++++--------- .../V6.0.0.5__8549-collection-quotas.sql | 13 ++++++++ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java index fbaaff22dee..7aea7a7b596 100644 --- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -46,23 +46,24 @@ public Long findStorageSizeByDvContainerId(Long dvObjectId) { */ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void incrementStorageSizeRecursively(Long dvObjectContainerId, Long increment) { - //@todo should throw exceptions if either parameter is null - Optional allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class); - if (!(allow.isPresent() && allow.get())) { - String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" - + "(" - + " SELECT id, owner_id\n" - + " FROM dvobject\n" - + " WHERE id=" + dvObjectContainerId + "\n" - + " UNION ALL\n" - + " SELECT dvobject.id, dvobject.owner_id\n" - + " FROM dvobject\n" - + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" - + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" - + "FROM uptree\n" - + "WHERE dvobjectcontainer_id = uptree.id;"; + if (dvObjectContainerId != null && increment != null) { + Optional allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class); + if (!(allow.isPresent() && allow.get())) { + String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" + + "(" + + " SELECT id, owner_id\n" + + " FROM dvobject\n" + + " WHERE id=" + dvObjectContainerId + "\n" + + " UNION ALL\n" + + " SELECT dvobject.id, dvobject.owner_id\n" + + " FROM dvobject\n" + + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" + + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" + + "FROM uptree\n" + + "WHERE dvobjectcontainer_id = uptree.id;"; - int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + } } // @todo throw an exception if the number of parent dvobjects updated by // the query is < 2 - ? diff --git a/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql index 3657642c267..d6c067056ec 100644 --- a/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql +++ b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql @@ -38,6 +38,19 @@ AND fileobject.id = file.id AND dt.datafile_id = file.id GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; +-- there may also be some auxiliary files registered in the database, such as +-- the content generated and deposited by external tools - diff. privacy stats +-- being one of the example. These are also considered the "payload" files that +-- we want to count for the purposes of calculating storage use. +UPDATE dvobject SET tempStorageSize=tempStorageSize+o.combinedStorageSize +FROM (SELECT datasetobject.id, COALESCE(SUM(aux.fileSize),0) AS combinedStorageSize +FROM dvobject fileobject, dvobject datasetobject, datafile file, auxiliaryFile aux +WHERE fileobject.owner_id = datasetobject.id +AND fileobject.id = file.id +AND aux.datafile_id = file.id +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; + + -- ... and then we can repeat the same for collections, by setting the storage size -- to the sum of the storage sizes of the datasets *directly* in each collection: -- (no attemp is made yet to recursively count the sizes all the chilld sub-collections) From 0c02b15aab711acbfb7f2c957c4482313b3997b9 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 5 Dec 2023 09:50:33 -0500 Subject: [PATCH 322/546] try QDR /logo endpoint --- .../edu/harvard/iq/dataverse/api/Datasets.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index af6059cf882..828ba218cc4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1971,6 +1971,22 @@ public Response getDatasetThumbnail(@PathParam("id") String idSupplied) { } } + @GET + @Produces({ "image/png" }) + @Path("{id}/logo") + public Response getDatasetLogo(@PathParam("id") String idSupplied) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + InputStream is = DatasetUtil.getLogoAsInputStream(dataset); + if (is == null) { + return notFound("Logo not available"); + } + return Response.ok(is).build(); + } catch (WrappedResponse wr) { + return notFound("Logo not available"); + } + } + // TODO: Rather than only supporting looking up files by their database IDs (dataFileIdSupplied), consider supporting persistent identifiers. @POST @AuthRequired From 8c9f1242d53aea5ecc906bd4a2a3f5d12a884224 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 5 Dec 2023 10:13:53 -0500 Subject: [PATCH 323/546] switch minio to creds jenkins expects #6783 --- docker-compose-dev.yml | 9 ++++----- .../java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 6bc50f7e764..98376e255dd 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -41,8 +41,8 @@ services: -Ddataverse.files.minio1.path-style-access=true -Ddataverse.files.minio1.upload-redirect=false -Ddataverse.files.minio1.download-redirect=false - -Ddataverse.files.minio1.access-key=minioadmin - -Ddataverse.files.minio1.secret-key=minioadmin + -Ddataverse.files.minio1.access-key=4cc355_k3y + -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k35 ports: - "8080:8080" # HTTP (Dataverse Application) - "4848:4848" # HTTP (Payara Admin Console) @@ -211,9 +211,8 @@ services: volumes: - minio_storage:/data environment: - # these are the defaults but are here for clarity - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin + MINIO_ROOT_USER: 4cc355_k3y + MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k35 command: server /data networks: diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index f5e4ce6a794..daf04bb3d14 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -54,8 +54,8 @@ public static void setUp() { .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyLocalStack, secretKeyLocalStack))) .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build(); - String accessKeyMinio = "minioadmin"; - String secretKeyMinio = "minioadmin"; + String accessKeyMinio = "4cc355_k3y"; + String secretKeyMinio = "s3cr3t_4cc355_k35"; s3minio = AmazonS3ClientBuilder.standard() // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local .withPathStyleAccessEnabled(Boolean.TRUE) From 6a7d8d1c6f76c8e54f9759f643204aa339c5bdd0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 5 Dec 2023 10:33:19 -0500 Subject: [PATCH 324/546] make assertions about users #6783 --- .../java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index daf04bb3d14..7c1531cbfaf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -99,9 +99,10 @@ public void testNonDirectUpload() { String driverLabel = "MinIO"; Response createSuperuser = UtilIT.createRandomUser(); + createSuperuser.then().assertThat().statusCode(200); String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); String superusername = UtilIT.getUsernameFromResponse(createSuperuser); - UtilIT.makeSuperUser(superusername); + UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); Response storageDrivers = listStorageDrivers(superuserApiToken); storageDrivers.prettyPrint(); // TODO where is "Local/local" coming from? @@ -118,6 +119,7 @@ public void testNonDirectUpload() { //create user who will make a dataverse/dataset Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(200); String username = UtilIT.getUsernameFromResponse(createUser); String apiToken = UtilIT.getApiTokenFromResponse(createUser); @@ -208,9 +210,10 @@ public void testDirectUpload() { String driverId = "localstack1"; String driverLabel = "LocalStack"; Response createSuperuser = UtilIT.createRandomUser(); + createSuperuser.then().assertThat().statusCode(200); String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); String superusername = UtilIT.getUsernameFromResponse(createSuperuser); - UtilIT.makeSuperUser(superusername); + UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); Response storageDrivers = listStorageDrivers(superuserApiToken); storageDrivers.prettyPrint(); // TODO where is "Local/local" coming from? @@ -227,6 +230,7 @@ public void testDirectUpload() { //create user who will make a dataverse/dataset Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(200); String username = UtilIT.getUsernameFromResponse(createUser); String apiToken = UtilIT.getApiTokenFromResponse(createUser); From b9f48913e498ec96ef8f5994c21e7bb549e747e0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 5 Dec 2023 10:41:45 -0500 Subject: [PATCH 325/546] move methods to UtilIT #6783 --- .../harvard/iq/dataverse/api/S3AccessIT.java | 75 +++---------------- .../edu/harvard/iq/dataverse/api/UtilIT.java | 50 +++++++++++++ 2 files changed, 62 insertions(+), 63 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 7c1531cbfaf..1306c30d9c1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -103,7 +103,7 @@ public void testNonDirectUpload() { String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); String superusername = UtilIT.getUsernameFromResponse(createSuperuser); UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); - Response storageDrivers = listStorageDrivers(superuserApiToken); + Response storageDrivers = UtilIT.listStorageDrivers(superuserApiToken); storageDrivers.prettyPrint(); // TODO where is "Local/local" coming from? String drivers = """ @@ -127,18 +127,18 @@ public void testNonDirectUpload() { createDataverseResponse.prettyPrint(); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken); + Response originalStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); originalStorageDriver.prettyPrint(); originalStorageDriver.then().assertThat() .body("data.message", equalTo("undefined")) .statusCode(200); - Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); + Response setStorageDriverToS3 = UtilIT.setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); setStorageDriverToS3.prettyPrint(); setStorageDriverToS3.then().assertThat() .statusCode(200); - Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken); + Response updatedStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); updatedStorageDriver.prettyPrint(); updatedStorageDriver.then().assertThat() .statusCode(200); @@ -214,7 +214,7 @@ public void testDirectUpload() { String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); String superusername = UtilIT.getUsernameFromResponse(createSuperuser); UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); - Response storageDrivers = listStorageDrivers(superuserApiToken); + Response storageDrivers = UtilIT.listStorageDrivers(superuserApiToken); storageDrivers.prettyPrint(); // TODO where is "Local/local" coming from? String drivers = """ @@ -238,18 +238,18 @@ public void testDirectUpload() { createDataverseResponse.prettyPrint(); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken); + Response originalStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); originalStorageDriver.prettyPrint(); originalStorageDriver.then().assertThat() .body("data.message", equalTo("undefined")) .statusCode(200); - Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); + Response setStorageDriverToS3 = UtilIT.setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); setStorageDriverToS3.prettyPrint(); setStorageDriverToS3.then().assertThat() .statusCode(200); - Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken); + Response updatedStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); updatedStorageDriver.prettyPrint(); updatedStorageDriver.then().assertThat() .statusCode(200); @@ -275,7 +275,7 @@ public void testDirectUpload() { // // String fileId = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.id"); long size = 1000000000l; - Response getUploadUrls = getUploadUrls(datasetPid, size, apiToken); + Response getUploadUrls = UtilIT.getUploadUrls(datasetPid, size, apiToken); getUploadUrls.prettyPrint(); getUploadUrls.then().assertThat().statusCode(200); @@ -298,7 +298,7 @@ public void testDirectUpload() { String contentsOfFile = "foobar"; InputStream inputStream = new ByteArrayInputStream(contentsOfFile.getBytes(StandardCharsets.UTF_8)); - Response uploadFileDirect = uploadFileDirect(localhostUrl, inputStream); + Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream); uploadFileDirect.prettyPrint(); /* Direct upload to MinIO is failing with errors like this: @@ -357,7 +357,7 @@ public void testDirectUpload() { assertEquals(contentsOfFile, s3Object); System.out.println("direct download..."); - Response getHeaders = downloadFileNoRedirect(Integer.valueOf(fileId), apiToken); + Response getHeaders = UtilIT.downloadFileNoRedirect(Integer.valueOf(fileId), apiToken); for (Header header : getHeaders.getHeaders()) { System.out.println("direct download header: " + header); } @@ -371,7 +371,7 @@ public void testDirectUpload() { } catch (UnsupportedEncodingException ex) { } - Response downloadFile = downloadFromUrl(decodedDownloadUrl); + Response downloadFile = UtilIT.downloadFromUrl(decodedDownloadUrl); downloadFile.prettyPrint(); downloadFile.then().assertThat().statusCode(200); @@ -394,55 +394,4 @@ public void testDirectUpload() { } - //TODO: move these into UtilIT. They are here for now to avoid merge conflicts - static Response listStorageDrivers(String apiToken) { - return given() - .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/admin/dataverse/storageDrivers"); - } - - static Response getStorageDriver(String dvAlias, String apiToken) { - return given() - .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/admin/dataverse/" + dvAlias + "/storageDriver"); - } - - static Response setStorageDriver(String dvAlias, String label, String apiToken) { - return given() - .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken) - .body(label) - .put("/api/admin/dataverse/" + dvAlias + "/storageDriver"); - } - - static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) { - String idInPath = idOrPersistentIdOfDataset; // Assume it's a number. - String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. - if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) { - idInPath = ":persistentId"; - optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset; - } - RequestSpecification requestSpecification = given(); - if (apiToken != null) { - requestSpecification = given() - .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); - } - return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam); - } - - static Response uploadFileDirect(String url, InputStream inputStream) { - return given() - .header("x-amz-tagging", "dv-state=temp") - .body(inputStream) - .put(url); - } - - static Response downloadFileNoRedirect(Integer fileId, String apiToken) { - return given().when().redirects().follow(false) - .get("/api/access/datafile/" + fileId + "?key=" + apiToken); - } - - static Response downloadFromUrl(String url) { - return given().get(url); - } - } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 9b264086c27..12bb069424f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2361,6 +2361,56 @@ static Response deleteStorageSite(long storageSiteId) { .delete("/api/admin/storageSites/" + storageSiteId); } + static Response listStorageDrivers(String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/admin/dataverse/storageDrivers"); + } + + static Response getStorageDriver(String dvAlias, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/admin/dataverse/" + dvAlias + "/storageDriver"); + } + + static Response setStorageDriver(String dvAlias, String label, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(label) + .put("/api/admin/dataverse/" + dvAlias + "/storageDriver"); + } + + static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) { + String idInPath = idOrPersistentIdOfDataset; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) { + idInPath = ":persistentId"; + optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam); + } + + static Response uploadFileDirect(String url, InputStream inputStream) { + return given() + .header("x-amz-tagging", "dv-state=temp") + .body(inputStream) + .put(url); + } + + static Response downloadFileNoRedirect(Integer fileId, String apiToken) { + return given().when().redirects().follow(false) + .get("/api/access/datafile/" + fileId + "?key=" + apiToken); + } + + static Response downloadFromUrl(String url) { + return given().get(url); + } + static Response metricsDataversesToMonth(String yyyymm, String queryParams) { String optionalYyyyMm = ""; if (yyyymm != null) { From 7349ed9f754e05ff7b16a24ea8f3c24c060ed593 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 10:43:38 -0500 Subject: [PATCH 326/546] get logo, picking 48px size for datafile thumbs FWIW: QDR generates a 400px version here and then uses styling to fit the page. Not sure what the motivation for that was without digging. --- .../iq/dataverse/dataset/DatasetUtil.java | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 096f1f87acc..ccf861ebdc8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -411,6 +411,69 @@ public static InputStream getThumbnailAsInputStream(Dataset dataset, int size) { return nonDefaultDatasetThumbnail; } } + + public static InputStream getLogoAsInputStream(Dataset dataset) { + if (dataset == null) { + return null; + } + StorageIO dataAccess = null; + + try { + dataAccess = DataAccess.getStorageIO(dataset); + } catch (IOException ioex) { + logger.warning("getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier() + + " (" + ioex.getMessage() + ")"); + } + + InputStream in = null; + try { + if (dataAccess == null) { + logger.warning( + "getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier()); + } else { + in = dataAccess.getAuxFileAsInputStream(datasetLogoFilenameFinal); + } + } catch (IOException ex) { + logger.fine( + "Dataset-level thumbnail file does not exist, or failed to open; will try to find an image file that can be used as the thumbnail."); + } + + if (in == null) { + DataFile thumbnailFile = dataset.getThumbnailFile(); + + if (thumbnailFile == null) { + if (dataset.isUseGenericThumbnail()) { + logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo and is 'Use Generic'."); + return null; + } else { + thumbnailFile = attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null); + if (thumbnailFile == null) { + logger.fine("Dataset (id :" + dataset.getId() + + ") does not have a logo available that could be selected automatically."); + return null; + } else { + + } + } + } + if (thumbnailFile.isRestricted()) { + logger.fine("Dataset (id :" + dataset.getId() + + ") has a logo the user selected but the file must have later been restricted. Returning null."); + return null; + } + + try { + in = ImageThumbConverter.getImageThumbnailAsInputStream(thumbnailFile.getStorageIO(), + ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE).getInputStream(); + } catch (IOException ioex) { + logger.warning("getLogo(): Failed to get logo from DataFile for " + dataset.getStorageIdentifier() + + " (" + ioex.getMessage() + ")"); + ioex.printStackTrace(); + } + + } + return in; + } /** * The dataset logo is the file that a user uploads which is *not* one of From 6f1cd087624fea70a1c37425aacaf05c9d7ba0bf Mon Sep 17 00:00:00 2001 From: GPortas Date: Tue, 5 Dec 2023 15:53:21 +0000 Subject: [PATCH 327/546] Added: checks before calling getFileMetadatas on canDownloadAtLeastOneFile method in PermissionServiceBean --- .../iq/dataverse/PermissionServiceBean.java | 51 ++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 9e6628617ce..2e4627576c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -41,6 +41,9 @@ import java.util.stream.Collectors; import static java.util.stream.Collectors.toList; import jakarta.persistence.Query; +import jakarta.persistence.criteria.CriteriaBuilder; +import jakarta.persistence.criteria.CriteriaQuery; +import jakarta.persistence.criteria.Root; /** * Your one-stop-shop for deciding which user can do what action on which @@ -837,12 +840,56 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio return false; } - public boolean canDownloadAtLeastOneFile(User requestUser, DatasetVersion datasetVersion) { + /** + * Checks if a User can download at least one file of the target DatasetVersion. + * + * @param user User to check + * @param datasetVersion DatasetVersion to check + * @return boolean indicating whether the user can download at least one file or not + */ + public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersion) { + if (user.isSuperuser()) { + return true; + } + if (hasReleasedFiles(datasetVersion)) { + return true; + } for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { - if (userOn(requestUser, fileMetadata.getDataFile()).has(Permission.DownloadFile)) { + if (userOn(user, fileMetadata.getDataFile()).has(Permission.DownloadFile)) { return true; } } return false; } + + /** + * Checks if a DatasetVersion has released files. + * + * This method is mostly based on {@link #isPublicallyDownloadable(DvObject)} although in this case, instead of basing + * the search on a particular file, it searches for the total number of files in the target version that are present + * in the released version. + * + * @param targetDatasetVersion DatasetVersion to check + * @return boolean indicating whether the dataset version has released files or not + */ + private boolean hasReleasedFiles(DatasetVersion targetDatasetVersion) { + Dataset targetDataset = targetDatasetVersion.getDataset(); + if (!targetDataset.isReleased()) { + return false; + } + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class); + Root datasetVersionRoot = criteriaQuery.from(DatasetVersion.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + criteriaQuery + .select(criteriaBuilder.count(fileMetadataRoot)) + .where(criteriaBuilder.and( + criteriaBuilder.equal(fileMetadataRoot.get("dataFile").get("restricted"), false), + criteriaBuilder.equal(datasetVersionRoot.get("dataset"), targetDataset), + criteriaBuilder.equal(datasetVersionRoot.get("versionState"), DatasetVersion.VersionState.RELEASED), + fileMetadataRoot.in(targetDatasetVersion.getFileMetadatas()), + fileMetadataRoot.in(datasetVersionRoot.get("fileMetadatas")))); + Long result = em.createQuery(criteriaQuery).getSingleResult(); + return result > 0; + } } From c194d74b2029917de050fe5d40b237b23bddf3ab Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 5 Dec 2023 10:59:46 -0500 Subject: [PATCH 328/546] Clarified the sentence about the initial deployment in the release note. #8549 --- doc/release-notes/8549-collection-quotas.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md index 29b84213cfb..b3635d0c5a1 100644 --- a/doc/release-notes/8549-collection-quotas.md +++ b/doc/release-notes/8549-collection-quotas.md @@ -1,3 +1,3 @@ This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. -Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the deployment. +Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 From cf7e664e626994419ca3a1c80785290da7efe683 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 5 Dec 2023 12:02:41 -0500 Subject: [PATCH 329/546] moved the entitymanager calls from a command to the service #8549 --- .../edu/harvard/iq/dataverse/DataverseServiceBean.java | 8 ++++++++ .../engine/command/impl/DeleteCollectionQuotaCommand.java | 4 +--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 487215c7a65..b6e666e8058 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -935,4 +935,12 @@ public void saveStorageQuota(Dataverse target, Long allocation) { } em.flush(); } + + public void disableStorageQuota(StorageQuota storageQuota) { + if (storageQuota != null && storageQuota.getAllocation() != null) { + storageQuota.setAllocation(null); + em.merge(storageQuota); + em.flush(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java index 4015228366b..c0f863686da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java @@ -46,9 +46,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException { StorageQuota storageQuota = targetDataverse.getStorageQuota(); if (storageQuota != null && storageQuota.getAllocation() != null) { - storageQuota.setAllocation(null); - ctxt.em().merge(storageQuota); - ctxt.em().flush(); + ctxt.dataverses().disableStorageQuota(storageQuota); } // ... and if no quota was enabled on the collection - nothing to do = success } From 6a4a9ab3d625f1e5835b3e119449f8fd88eaee23 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 5 Dec 2023 12:10:48 -0500 Subject: [PATCH 330/546] stub out diagnosing jenkins failures #10101 --- doc/sphinx-guides/source/qa/jenkins.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/doc/sphinx-guides/source/qa/jenkins.md b/doc/sphinx-guides/source/qa/jenkins.md index a4ca4d8688f..9259284beb9 100644 --- a/doc/sphinx-guides/source/qa/jenkins.md +++ b/doc/sphinx-guides/source/qa/jenkins.md @@ -42,3 +42,18 @@ How can you know if API tests are passing? Here are the steps, by way of example - Click "Test Result". - Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run. - Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error. + +## Diagnosing Failures + +API test failures can have multiple causes. As described above, from the "Test Result" page, you might see the failure under "All Failed Tests". However, the test could have failed because of some underlying system issue. + +If you have determined that the API tests have not run at all, your next step should be to click on "Console Output". For example, . Click "Full log" to see the full log in the browser or navigate to (for example) to get a plain text version. + +Go to the end of the log and then scroll up, looking for the failure. A failed Ansible task can look like this: + +``` +TASK [dataverse : download payara zip] ***************************************** +fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: ", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip"} +``` + +In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it! From dfa49c3720f866f36df0b6cd712f1c5144dfee44 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 14:31:32 -0500 Subject: [PATCH 331/546] rename flyway script --- ...thumb-failures.sql => V6.0.0.6__9506-track-thumb-failures.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V6.0.0.5__9506-track-thumb-failures.sql => V6.0.0.6__9506-track-thumb-failures.sql} (100%) diff --git a/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql similarity index 100% rename from src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql rename to src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql From 70a3442cc9a6c672ef8a553be8b279b3b8ea1b52 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 5 Dec 2023 14:36:21 -0500 Subject: [PATCH 332/546] updated aux. file service bean #8549 --- .../dataverse/AuxiliaryFileServiceBean.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 8c96f98ce39..363622ba3bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -2,6 +2,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -46,6 +47,8 @@ public class AuxiliaryFileServiceBean implements java.io.Serializable { @EJB private SystemConfig systemConfig; + @EJB + StorageUseServiceBean storageUseService; public AuxiliaryFile find(Object pk) { return em.find(AuxiliaryFile.class, pk); @@ -126,6 +129,13 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile } dataFile.getAuxiliaryFiles().add(auxFile); } + // We've just added this file to storage; increment the StorageUse + // record if needed. + if (auxFile.getFileSize() != null + && auxFile.getFileSize() > 0 + && dataFile.getOwner() != null ) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), auxFile.getFileSize()); + } } catch (IOException ioex) { logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage()); throw new InternalServerErrorException(); @@ -181,6 +191,7 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form if (af == null) { throw new FileNotFoundException(); } + Long auxFileSize = af.getFileSize(); em.remove(af); StorageIO storageIO; storageIO = dataFile.getStorageIO(); @@ -188,6 +199,14 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form if (storageIO.isAuxObjectCached(auxExtension)) { storageIO.deleteAuxObject(auxExtension); } + // We've just deleted this file from storage; update the StorageUse + // record if needed. + if (auxFileSize != null + && auxFileSize > 0 + && dataFile.getOwner() != null) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), (0L - auxFileSize)); + } + } public List findAuxiliaryFiles(DataFile dataFile) { From c54a85fca9377b74efc0e74e8a70a6de2f6fccc4 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 5 Dec 2023 14:52:23 -0500 Subject: [PATCH 333/546] #9464 add caveats to release note. --- doc/release-notes/9464-json-validation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md index 4b08f2ca9dd..f104263ba35 100644 --- a/doc/release-notes/9464-json-validation.md +++ b/doc/release-notes/9464-json-validation.md @@ -1,3 +1,3 @@ -Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. (Issue #9464 and #9465) +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html From 2379828c2737260901b23020a436f5cab6cc962a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 5 Dec 2023 15:05:12 -0500 Subject: [PATCH 334/546] Update native-api.rst --- doc/sphinx-guides/source/api/native-api.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 2d37c3b07ae..29aa7c880ac 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -510,7 +510,9 @@ The fully expanded example above (without environment variables) looks like this Retrieve a Dataset JSON Schema for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset: +Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset. This +first version of the schema only includes required elements and fields. In the future we plan to improve the schema by adding controlled +vocabulary and more robust dataset field format testing: .. code-block:: bash @@ -535,7 +537,8 @@ While it is recommended to download a copy of the JSON Schema from the collectio Validate Dataset JSON File for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Validates a dataset JSON file customized for a given collection prior to creating the dataset: +Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting +and the presence of required elements: .. code-block:: bash From dd2d9726e3125975493fa6dbf70578d76fa5f07c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 16:47:04 -0500 Subject: [PATCH 335/546] globus store options --- .../source/installation/config.rst | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 7b32da8f6c3..e0e4d4cd89e 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -499,14 +499,14 @@ Logging & Slow Performance .. _file-storage: -File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores ------------------------------------------------------------------------------------------------------ +File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores and/or Globus Stores +-------------------------------------------------------------------------------------------------------------------------- By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara6/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. -A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. +A Dataverse installation can alternately store files in a Swift or S3-compatible object store, or on a Globus endpoint, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis. -A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web-accessible trusted remote store. +A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web or Globus accessible trusted remote store. A Dataverse installation can be configured to allow out of band upload by setting the ``dataverse.files.\.upload-out-of-band`` JVM option to ``true``. By default, Dataverse supports uploading files via the :ref:`add-file-api`. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). @@ -958,7 +958,7 @@ Once you have configured a trusted remote store, you can point your users to the dataverse.files..type ``remote`` **Required** to mark this storage as remote. (none) dataverse.files..label **Required** label to be shown in the UI for this storage. (none) dataverse.files..base-url **Required** All files must have URLs of the form /* . (none) - dataverse.files..base-store **Optional** The id of a base store (of type file, s3, or swift). (the default store) + dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (the default store) dataverse.files..download-redirect ``true``/``false`` Enable direct download (should usually be true). ``false`` dataverse.files..secret-key A key used to sign download requests sent to the remote store. Optional. (none) dataverse.files..url-expiration-minutes If direct downloads and using signing: time until links expire. Optional. 60 @@ -967,6 +967,46 @@ Once you have configured a trusted remote store, you can point your users to the =========================================== ================== ========================================================================== =================== +.. _globus-storage: + +Globus Storage +++++++++++++++ + +Globus stores allow Dataverse to manage files stored in Globus endpoints or to reference files in remote Globus endpoints, with users leveraging Globus to transfer files to/from Dataverse (rather than using HTTP/HTTPS). +See :doc:`/developers/big-data-support` for additional information on how to use a globus store. Consult the `Globus documentation `_ for information about using Globus and configuring Globus endpoints. + +In addition to having the type "globus" and requiring a label, Globus Stores share many options with Trusted Remote Stores and options to specify and access a Globus endpoint(s). As with Remote Stores, Globus Stores also use a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). +These and other available options are described in the table below. + +There are two types of Globus stores +- managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse +- remote - where Dataverse references files that remain on trusted remote Globus endpoints + +For managed stores, there are two variants, connecting to standard/file-based Globus endpoints and to endpoints using an underlying S3 store via the Globus S3 Connector. +With the former, Dataverse has no direct access to the file contents and functionality related to ingest, fixity hash validation, etc. are not available. With the latter, Dataverse can access files internally via S3 and the functionality supported is similar to that when using S3 direct upload. + +Once you have configured a globus store, it is recommended that you install the `dataverse-globus app `_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support. + +.. table:: + :align: left + + ======================================================= ================== ========================================================================== =================== + JVM Option Value Description Default value + ======================================================= ================== ========================================================================== =================== + dataverse.files..type ``globus`` **Required** to mark this storage as globus enabled. (none) + dataverse.files..label **Required** label to be shown in the UI for this storage. (none) + dataverse.files..base-store **Required** The id of a base store (of type file, s3, or swift). (the default store) + dataverse.files..remote-store-name A short name used in the UI to indicate where a file is located. Optional. (none) + dataverse.files..remote-store-url A url to an info page about the remote store used in the UI. Optional. (none) + dataverse.files..managed ``true``/``false`` Whether dataverse manages an associated Globus endpoint ``false`` + dataverse.files..transfer-endpoint-with-basepath The *managed* Globus endpoint id and associated base path for file storage (none) + dataverse.files..globus-token A Globus token (base64 endcoded : + for a managed store) - using a microprofile alias is recommended (none) + dataverse.files..reference-endpoints-with-basepaths A comma separated list of *remote* trusted Globus endpoint id/s (none) + dataverse.files..files-not-accessible-by-dataverse ``true``/``false`` Should be true for S3 Connector-based *managed* stores ``false`` + + ======================================================= ================== ========================================================================== =================== + .. _temporary-file-storage: Temporary Upload File Storage From 4d7818a7be615033bd00261a6a0951c703c0ad3b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 16:59:04 -0500 Subject: [PATCH 336/546] merge miss --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 13ec049fa0a..8afc365417e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -87,7 +87,7 @@ /* Amazon AWS S3 driver */ -public class S3AccessIO extends StorageIO implements GlobusAccessibleStore { +public class S3AccessIO extends StorageIO { private static final Config config = ConfigProvider.getConfig(); private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO"); @@ -1194,7 +1194,6 @@ private static AmazonS3 getClient(String driverId) { * * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and * * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found. */ - String s3profile = getConfigParamForDriver(driverId, PROFILE,"default"); ArrayList providers = new ArrayList<>(); From ceacf7e92c045a61b96205536f442dc48142cb2a Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 5 Dec 2023 16:59:40 -0500 Subject: [PATCH 337/546] add a stub globus api page since it is referenced in the config doc --- .../source/developers/globus-api.rst | 282 ++++++++++++++++++ doc/sphinx-guides/source/developers/index.rst | 1 + 2 files changed, 283 insertions(+) create mode 100644 doc/sphinx-guides/source/developers/globus-api.rst diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst new file mode 100644 index 00000000000..2775ffd2142 --- /dev/null +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -0,0 +1,282 @@ +Globus Transfer API +=================== + +The Globus API addresses three use cases: +* Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector) +* Reference of files that will remain in a remote Globus endpoint +* Transfer from a Dataverse-managed Globus endpoint + +The ability for Dataverse to interact with Globus endpoints is configured via +Direct upload involves a series of three activities, each involving interacting with the server for a Dataverse installation: + +* Requesting initiation of a transfer from the server +* Use of the pre-signed URL(s) returned in that call to perform an upload/multipart-upload of the file to S3 +* A call to the server to register the file/files as part of the dataset/replace a file in the dataset or to cancel the transfer + +This API is only enabled when a Dataset is configured with a data store supporting direct S3 upload. +Administrators should be aware that partial transfers, where a client starts uploading the file/parts of the file and does not contact the server to complete/cancel the transfer, will result in data stored in S3 that is not referenced in the Dataverse installation (e.g. should be considered temporary and deleted.) + + +Requesting Direct Upload of a DataFile +-------------------------------------- +To initiate a transfer of a file to S3, make a call to the Dataverse installation indicating the size of the file to upload. The response will include a pre-signed URL(s) that allow the client to transfer the file. Pre-signed URLs include a short-lived token authorizing the action represented by the URL. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export SIZE=1000000000 + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/uploadurls?persistentId=$PERSISTENT_IDENTIFIER&size=$SIZE" + +The response to this call, assuming direct uploads are enabled, will be one of two forms: + +Single URL: when the file is smaller than the size at which uploads must be broken into multiple parts + +.. code-block:: bash + + { + "status":"OK", + "data":{ + "url":"...", + "partSize":1073741824, + "storageIdentifier":"s3://demo-dataverse-bucket:177883619b8-892ca9f7112e" + } + } + +Multiple URLs: when the file must be uploaded in multiple parts. The part size is set by the Dataverse installation and, for AWS-based storage, range from 5 MB to 5 GB + +.. code-block:: bash + + { + "status":"OK", + "data":{ + "urls":{ + "1":"...", + "2":"...", + "3":"...", + "4":"...", + "5":"..." + } + "abort":"/api/datasets/mpupload?...", + "complete":"/api/datasets/mpupload?..." + "partSize":1073741824, + "storageIdentifier":"s3://demo-dataverse-bucket:177883b000e-49cedef268ac" + } + +In the example responses above, the URLs, which are very long, have been omitted. These URLs reference the S3 server and the specific object identifier that will be used, starting with, for example, https://demo-dataverse-bucket.s3.amazonaws.com/10.5072/FK2FOQPJS/177883b000e-49cedef268ac?... + +The client must then use the URL(s) to PUT the file, or if the file is larger than the specified partSize, parts of the file. + +In the single part case, only one call to the supplied URL is required: + +.. code-block:: bash + + curl -H 'x-amz-tagging:dv-state=temp' -X PUT -T "" + + +In the multipart case, the client must send each part and collect the 'eTag' responses from the server. The calls for this are the same as the one for the single part case except that each call should send a slice of the total file, with the last part containing the remaining bytes. +The responses from the S3 server for these calls will include the 'eTag' for the uploaded part. + +To successfully conclude the multipart upload, the client must call the 'complete' URI, sending a json object including the part eTags: + +.. code-block:: bash + + curl -X PUT "$SERVER_URL/api/datasets/mpload?..." -d '{"1":"","2":"","3":"","4":"","5":""}' + +If the client is unable to complete the multipart upload, it should call the abort URL: + +.. code-block:: bash + + curl -X DELETE "$SERVER_URL/api/datasets/mpload?..." + + +.. _direct-add-to-dataset-api: + +Adding the Uploaded file to the Dataset +--------------------------------------- + +Once the file exists in the s3 bucket, a final API call is needed to add it to the Dataset. This call is the same call used to upload a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: + +* "storageIdentifier" - String, as specified in prior calls +* "fileName" - String +* "mimeType" - String +* fixity/checksum: either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + +To add multiple Uploaded Files to the Dataset +--------------------------------------------- + +Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: + +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ + {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + + +Replacing an existing file in the Dataset +----------------------------------------- + +Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for: + +* "storageIdentifier" - String, as specified in prior calls +* "fileName" - String +* "mimeType" - String +* fixity/checksum: either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512. +Note that the API call does not validate that the file matches the hash value supplied. If a Dataverse instance is configured to validate file fixity hashes at publication time, a mismatch would be caught at that time and cause publication to fail. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export FILE_IDENTIFIER=5072 + export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}' + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + +Replacing multiple existing files in the Dataset +------------------------------------------------ + +Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values: + +* "fileToReplaceId" - the id of the file being replaced +* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false) +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g. + +.. code-block:: + + { + "status": "OK", + "data": { + "Files": [ + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "errorMessage": "Bad Request:The file to replace does not belong to this dataset.", + "fileDetails": { + "fileToReplaceId": 10, + "description": "My description.", + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "restrict": "false", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", + "fileName": "file1.Bin", + "mimeType": "application/octet-stream", + "checksum": { + "@type": "SHA-1", + "@value": "123456" + } + } + }, + { + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "successMessage": "Replaced successfully in the dataset", + "fileDetails": { + "description": "My description.", + "label": "file2.txt", + "restricted": false, + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "dataFile": { + "persistentId": "", + "pidURL": "", + "filename": "file2.txt", + "contentType": "text/plain", + "filesize": 2407, + "description": "My description.", + "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", + "rootDataFileId": 11, + "previousDataFileId": 11, + "checksum": { + "type": "SHA-1", + "value": "123789" + } + } + } + } + ], + "Result": { + "Total number of files": 2, + "Number of files successfully replaced": 1 + } + } + } + + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 60d97feeef9..458a78a6c95 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -39,6 +39,7 @@ Developer Guide big-data-support aux-file-support s3-direct-upload-api + globus-api dataset-semantic-metadata-api dataset-migration-api workflows From 03a4c77155934060c33c33ed27ea2f7628301e91 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 10:58:33 +0000 Subject: [PATCH 338/546] Refactor: shortcut on datafile permission check --- .../harvard/iq/dataverse/PermissionServiceBean.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 2e4627576c6..107024bcfb9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -851,11 +851,13 @@ public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersio if (user.isSuperuser()) { return true; } - if (hasReleasedFiles(datasetVersion)) { + if (hasUnrestrictedReleasedFiles(datasetVersion)) { return true; } for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { - if (userOn(user, fileMetadata.getDataFile()).has(Permission.DownloadFile)) { + DataFile dataFile = fileMetadata.getDataFile(); + Set ras = new HashSet<>(groupService.groupsFor(user, dataFile)); + if (hasGroupPermissionsFor(ras, dataFile, EnumSet.of(Permission.DownloadFile))) { return true; } } @@ -863,7 +865,7 @@ public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersio } /** - * Checks if a DatasetVersion has released files. + * Checks if a DatasetVersion has unrestricted released files. * * This method is mostly based on {@link #isPublicallyDownloadable(DvObject)} although in this case, instead of basing * the search on a particular file, it searches for the total number of files in the target version that are present @@ -872,7 +874,7 @@ public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersio * @param targetDatasetVersion DatasetVersion to check * @return boolean indicating whether the dataset version has released files or not */ - private boolean hasReleasedFiles(DatasetVersion targetDatasetVersion) { + private boolean hasUnrestrictedReleasedFiles(DatasetVersion targetDatasetVersion) { Dataset targetDataset = targetDatasetVersion.getDataset(); if (!targetDataset.isReleased()) { return false; From 326b784da752091bf4c7b3bf4112ebfc327acb69 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 10:59:08 +0000 Subject: [PATCH 339/546] Refactor: variable extracted in isPublicallyDownloadable --- .../java/edu/harvard/iq/dataverse/PermissionServiceBean.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 107024bcfb9..1c568e83143 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -451,8 +451,9 @@ private boolean isPublicallyDownloadable(DvObject dvo) { if (!df.isRestricted()) { if (df.getOwner().getReleasedVersion() != null) { - if (df.getOwner().getReleasedVersion().getFileMetadatas() != null) { - for (FileMetadata fm : df.getOwner().getReleasedVersion().getFileMetadatas()) { + List fileMetadatas = df.getOwner().getReleasedVersion().getFileMetadatas(); + if (fileMetadatas != null) { + for (FileMetadata fm : fileMetadatas) { if (df.equals(fm.getDataFile())) { return true; } From 16c685dc30601d8a8b0140cec4b8621e1fe33a99 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 11:22:06 +0000 Subject: [PATCH 340/546] Changed: passing DataverseRequest instead of User to canDownloadAtLeastOneFile --- .../harvard/iq/dataverse/PermissionServiceBean.java | 11 ++++++----- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 1c568e83143..e87809ada56 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -844,20 +844,21 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio /** * Checks if a User can download at least one file of the target DatasetVersion. * - * @param user User to check + * @param dataverseRequest DataverseRequest to check * @param datasetVersion DatasetVersion to check * @return boolean indicating whether the user can download at least one file or not */ - public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersion) { - if (user.isSuperuser()) { + public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) { + if (dataverseRequest.getUser().isSuperuser()) { return true; } if (hasUnrestrictedReleasedFiles(datasetVersion)) { return true; } - for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { + List fileMetadatas = datasetVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : fileMetadatas) { DataFile dataFile = fileMetadata.getDataFile(); - Set ras = new HashSet<>(groupService.groupsFor(user, dataFile)); + Set ras = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile)); if (hasGroupPermissionsFor(ras, dataFile, EnumSet.of(Permission.DownloadFile))) { return true; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index a9cfefc33d8..6a1e11e690b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4145,7 +4145,7 @@ public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext cr @Context HttpHeaders headers) { return response(req -> { DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false); - return ok(permissionService.canDownloadAtLeastOneFile(getRequestUser(crc), datasetVersion)); + return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion)); }, getRequestUser(crc)); } } From 8ca2338723a0ec1a57a9affc923fe65229009909 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 11:22:51 +0000 Subject: [PATCH 341/546] Fixed: method doc --- .../java/edu/harvard/iq/dataverse/PermissionServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index e87809ada56..359e8823fce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -842,7 +842,7 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio } /** - * Checks if a User can download at least one file of the target DatasetVersion. + * Checks if a DataverseRequest can download at least one file of the target DatasetVersion. * * @param dataverseRequest DataverseRequest to check * @param datasetVersion DatasetVersion to check From 96cd5c9d55437180cfa256df38b0d5990c97ec6c Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 11:24:49 +0000 Subject: [PATCH 342/546] Added: explanatory comment --- .../java/edu/harvard/iq/dataverse/PermissionServiceBean.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 359e8823fce..6dc943f1ca8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -852,6 +852,7 @@ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, Data if (dataverseRequest.getUser().isSuperuser()) { return true; } + // This is a shortcut to avoid having to check version files if the condition is met if (hasUnrestrictedReleasedFiles(datasetVersion)) { return true; } From 3c1820b060b303da2bfa97132667ceccb5d5e977 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 11:48:09 +0000 Subject: [PATCH 343/546] Added: includeDeaccessioned query param to getCanDownloadAtLeastOneFile API endpoint --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 6a1e11e690b..579f4f78fe1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4141,10 +4141,11 @@ public Response getUserPermissionsOnDataset(@Context ContainerRequestContext crc public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response(req -> { - DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false); + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned); return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion)); }, getRequestUser(crc)); } From 811d79a7f8d017745fcfd782b233ec583d3669e2 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 6 Dec 2023 08:33:38 -0500 Subject: [PATCH 344/546] change minio access key, more l33t #6783 --- docker-compose-dev.yml | 2 +- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 98376e255dd..e68215d53d2 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -212,7 +212,7 @@ services: - minio_storage:/data environment: MINIO_ROOT_USER: 4cc355_k3y - MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k35 + MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y command: server /data networks: diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 1306c30d9c1..41446349093 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -55,7 +55,7 @@ public static void setUp() { .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build(); String accessKeyMinio = "4cc355_k3y"; - String secretKeyMinio = "s3cr3t_4cc355_k35"; + String secretKeyMinio = "s3cr3t_4cc355_k3y"; s3minio = AmazonS3ClientBuilder.standard() // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local .withPathStyleAccessEnabled(Boolean.TRUE) From e9a670c8620c068419080aad25421afa04641958 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 6 Dec 2023 10:39:09 -0500 Subject: [PATCH 345/546] collection not DB #10101 --- doc/sphinx-guides/source/qa/performance-tests.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md index f433226d4ff..447c4f6c54d 100644 --- a/doc/sphinx-guides/source/qa/performance-tests.md +++ b/doc/sphinx-guides/source/qa/performance-tests.md @@ -20,4 +20,4 @@ Please note the performance database is also used occasionally by Julian and the Executing the Performance Script -------------------------------- -To execute the performance test script, you need to install a local copy of the database-helper-scripts project at . We have since produced a stripped-down script that calls just the DB and ds and works with python3. +To execute the performance test script, you need to install a local copy of the database-helper-scripts project at . We have since produced a stripped-down script that calls just the collection and dataset and works with Python 3. From a81ad72a0896073e043ee57848e571d7a3754a8a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 6 Dec 2023 10:50:46 -0500 Subject: [PATCH 346/546] comment out optional listing of buckets #6783 --- .../harvard/iq/dataverse/api/S3AccessIT.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 41446349093..74150ca120a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -62,16 +62,15 @@ public static void setUp() { .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyMinio, secretKeyMinio))) .withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build(); - System.out.println("buckets on LocalStack before attempting to create " + BUCKET_NAME); - for (Bucket bucket : s3localstack.listBuckets()) { - System.out.println("bucket: " + bucket); - } - - System.out.println("buckets on MinIO before attempting to create " + BUCKET_NAME); - for (Bucket bucket : s3minio.listBuckets()) { - System.out.println("bucket: " + bucket); - } - +// System.out.println("buckets on LocalStack before attempting to create " + BUCKET_NAME); +// for (Bucket bucket : s3localstack.listBuckets()) { +// System.out.println("bucket: " + bucket); +// } +// +// System.out.println("buckets on MinIO before attempting to create " + BUCKET_NAME); +// for (Bucket bucket : s3minio.listBuckets()) { +// System.out.println("bucket: " + bucket); +// } // create bucket if it doesn't exist // Note that we create the localstack bucket with conf/localstack/buckets.sh // because we haven't figured out how to create it properly in Java. From 0bd9f139e5dca2851ca88ed12c5e31af9c5bbfe9 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Wed, 6 Dec 2023 11:01:04 -0500 Subject: [PATCH 347/546] Update doc/release-notes/6.1-release-notes.md Co-authored-by: Philip Durbin --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index c2b52ab34b8..06a3e01f7af 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -1,6 +1,6 @@ # Dataverse 6.1 -(If this note appears truncated on the GitHub Releases page, you can view it in full in the source tree: https://github.com/IQSS/dataverse/blob/master/doc/release-notes/6.1-release-notes.md) +Please note: To read these instructions in full, please go to https://github.com/IQSS/dataverse/releases/tag/v6.1 rather than the list of releases, which will cut them off. This release brings new features, enhancements, and bug fixes to the Dataverse software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. From c97d7b55e2932dacaa19e4e3ac403c88a25bd2ee Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 11:01:56 -0500 Subject: [PATCH 348/546] globus api doc --- .../source/developers/globus-api.rst | 348 ++++++++---------- 1 file changed, 149 insertions(+), 199 deletions(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 2775ffd2142..6a94f220dc2 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -6,277 +6,227 @@ The Globus API addresses three use cases: * Reference of files that will remain in a remote Globus endpoint * Transfer from a Dataverse-managed Globus endpoint -The ability for Dataverse to interact with Globus endpoints is configured via -Direct upload involves a series of three activities, each involving interacting with the server for a Dataverse installation: +The ability for Dataverse to interact with Globus endpoints is configured via a Globus store - see :ref:`globus-storage`. -* Requesting initiation of a transfer from the server -* Use of the pre-signed URL(s) returned in that call to perform an upload/multipart-upload of the file to S3 -* A call to the server to register the file/files as part of the dataset/replace a file in the dataset or to cancel the transfer +Globus transfers (or referencing a remote endpoint) for upload and download transfers involve a series of steps. These can be accomplished using the Dataverse and Globus APIs. (These are used internally by the `dataverse-globus app `_ when transfers are done via the Dataverse UI.) -This API is only enabled when a Dataset is configured with a data store supporting direct S3 upload. -Administrators should be aware that partial transfers, where a client starts uploading the file/parts of the file and does not contact the server to complete/cancel the transfer, will result in data stored in S3 that is not referenced in the Dataverse installation (e.g. should be considered temporary and deleted.) +Requesting Upload or Download Parameters +---------------------------------------- - -Requesting Direct Upload of a DataFile --------------------------------------- -To initiate a transfer of a file to S3, make a call to the Dataverse installation indicating the size of the file to upload. The response will include a pre-signed URL(s) that allow the client to transfer the file. Pre-signed URLs include a short-lived token authorizing the action represented by the URL. +The first step in preparing for a Globus transfer/reference operation is to request the parameters relevant for a given dataset: .. code-block:: bash - export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV - export SIZE=1000000000 - - curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/uploadurls?persistentId=$PERSISTENT_IDENTIFIER&size=$SIZE" - -The response to this call, assuming direct uploads are enabled, will be one of two forms: + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/globusUploadParameters?locale=$LOCALE" -Single URL: when the file is smaller than the size at which uploads must be broken into multiple parts +The response will be of the form: .. code-block:: bash { - "status":"OK", - "data":{ - "url":"...", - "partSize":1073741824, - "storageIdentifier":"s3://demo-dataverse-bucket:177883619b8-892ca9f7112e" + "status": "OK", + "data": { + "queryParameters": { + "datasetId": 29, + "siteUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com", + "datasetVersion": ":draft", + "dvLocale": "en", + "datasetPid": "doi:10.5072/FK2/ILLPXE", + "managed": "true", + "endpoint": "d8c42580-6528-4605-9ad8-116a61982644" + }, + "signedUrls": [ + { + "name": "requestGlobusTransferPaths", + "httpMethod": "POST", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/requestGlobusUploadPaths?until=2023-11-22T01:52:03.648&user=dataverseAdmin&method=POST&token=63ac4bb748d12078dded1074916508e19e6f6b61f64294d38e0b528010b07d48783cf2e975d7a1cb6d4a3c535f209b981c7c6858bc63afdfc0f8ecc8a139b44a", + "timeOut": 300 + }, + { + "name": "addGlobusFiles", + "httpMethod": "POST", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/addGlobusFiles?until=2023-11-22T01:52:03.648&user=dataverseAdmin&method=POST&token=2aaa03f6b9f851a72e112acf584ffc0758ed0cc8d749c5a6f8c20494bb7bc13197ab123e1933f3dde2711f13b347c05e6cec1809a8f0b5484982570198564025", + "timeOut": 300 + }, + { + "name": "getDatasetMetadata", + "httpMethod": "GET", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/versions/:draft?until=2023-11-22T01:52:03.649&user=dataverseAdmin&method=GET&token=1878d6a829cd5540e89c07bdaf647f1bea5314cc7a55433b0b506350dd330cad61ade3714a8ee199a7b464fb3b8cddaea0f32a89ac3bfc4a86cd2ea3004ecbb8", + "timeOut": 300 + }, + { + "name": "getFileListing", + "httpMethod": "GET", + "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/versions/:draft/files?until=2023-11-22T01:52:03.650&user=dataverseAdmin&method=GET&token=78e8ca8321624f42602af659227998374ef3788d0feb43d696a0e19086e0f2b3b66b96981903a1565e836416c504b6248cd3c6f7c2644566979bd16e23a99622", + "timeOut": 300 + } + ] + } } - } -Multiple URLs: when the file must be uploaded in multiple parts. The part size is set by the Dataverse installation and, for AWS-based storage, range from 5 MB to 5 GB +The response includes the id for the Globus endpoint to use along with several signed URLs. -.. code-block:: bash +The getDatasetMetadata and getFileListing URLs are just signed versions of the standard Dataset metadata and file listing API calls. The other two are Globus specific. - { - "status":"OK", - "data":{ - "urls":{ - "1":"...", - "2":"...", - "3":"...", - "4":"...", - "5":"..." - } - "abort":"/api/datasets/mpupload?...", - "complete":"/api/datasets/mpupload?..." - "partSize":1073741824, - "storageIdentifier":"s3://demo-dataverse-bucket:177883b000e-49cedef268ac" - } +If called for a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a +the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the +requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are +describe further below. + +The call to set up for a transfer out (download) is similar: -In the example responses above, the URLs, which are very long, have been omitted. These URLs reference the S3 server and the specific object identifier that will be used, starting with, for example, https://demo-dataverse-bucket.s3.amazonaws.com/10.5072/FK2FOQPJS/177883b000e-49cedef268ac?... +.. code-block:: bash -The client must then use the URL(s) to PUT the file, or if the file is larger than the specified partSize, parts of the file. + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/globusDownloadParameters?locale=$LOCALE" -In the single part case, only one call to the supplied URL is required: +Note that this API call supports an additional downloadId query parameter. This is only used when the globus-dataverse app is called from the Dataverse user interface. There is no need to use it when calling the API directly. -.. code-block:: bash +The returned response includes the same getDatasetMetadata and getFileListing URLs as in the upload case and includes "monitorGlobusDownload" and "requestGlobusDownload" URLs. The response will also indicate whether the store is "managed" and will provide the "endpoint" from which downloads can be made. - curl -H 'x-amz-tagging:dv-state=temp' -X PUT -T "" +Performing an Upload/Transfer In +-------------------------------- -In the multipart case, the client must send each part and collect the 'eTag' responses from the server. The calls for this are the same as the one for the single part case except that each call should send a slice of the total file, with the last part containing the remaining bytes. -The responses from the S3 server for these calls will include the 'eTag' for the uploaded part. +The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer or to reference files (based on the "managed" parameter). -To successfully conclude the multipart upload, the client must call the 'complete' URI, sending a json object including the part eTags: +Once the user identifies which files are to be added, the requestGlobusTransferPaths or requestGlobusReferencePaths URLs can be called. These both reference the same API call but must be used with different entries in the JSON body sent: .. code-block:: bash - curl -X PUT "$SERVER_URL/api/datasets/mpload?..." -d '{"1":"","2":"","3":"","4":"","5":""}' - -If the client is unable to complete the multipart upload, it should call the abort URL: + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export LOCALE=en-US + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUpload" -.. code-block:: bash - - curl -X DELETE "$SERVER_URL/api/datasets/mpload?..." - +Note that when using the dataverse-globus app or the return from the previous call, the URL for this call will be signed and no API_TOKEN is needed. -.. _direct-add-to-dataset-api: +In the managed case, the JSON body sent must include the id of the Globus user that will perform the transfer and the number of files that will be transferred: -Adding the Uploaded file to the Dataset ---------------------------------------- +.. code-block:: bash + { + "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", + "numberOfFiles":2 + } -Once the file exists in the s3 bucket, a final API call is needed to add it to the Dataset. This call is the same call used to upload a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: +In the remote reference case, the JSON body sent must include the Globus endpoint/paths that will be referenced: -* "storageIdentifier" - String, as specified in prior calls -* "fileName" - String -* "mimeType" - String -* fixity/checksum: either: +.. code-block:: bash + { + "referencedFiles":[ + "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt" + ] + } + +The response will include a JSON object. In the managed case, the map is from new assigned file storageidentifiers and specific paths on the managed Globus endpoint: +.. code-block:: bash - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + { + "status":"OK", + "data":{ + "globusm://18b49d3688c-62137dcb06e4":"/hdc1/10.5072/FK2/ILLPXE/18b49d3688c-62137dcb06e4", + "globusm://18b49d3688c-5c17d575e820":"/hdc1/10.5072/FK2/ILLPXE/18b49d3688c-5c17d575e820" + } + } -The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 +In the managed case, the specified Globus principal is granted write permission to the specified endpoint/path, +which will allow initiation of a transfer from the external endpoint to the managed endpoint using the Globus API. +The permission will be revoked if the transfer is not started and the next call to Dataverse to finish the transfer are not made within a short time (configurable, default of 5 minutes). + +In the remote/reference case, the map is from the initially supplied endpoint/paths to the new assigned file storageidentifiers: .. code-block:: bash - export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV - export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}" - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" - -Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. - -To add multiple Uploaded Files to the Dataset ---------------------------------------------- + { + "status":"OK", + "data":{ + "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt":"globus://18bf8c933f4-ed2661e7d19b//d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt" + } + } -Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. -jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: -* "description" - A description of the file -* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset -* "storageIdentifier" - String -* "fileName" - String -* "mimeType" - String -* "fixity/checksum" either: - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings +Adding Files to the Dataset +--------------------------- -The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 +In the managed case, once a Globus transfer has been initiated a final API call is made to Dataverse to provide it with the task identifier of the transfer and information about the files being transferred: .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV - export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ - {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + export JSON_DATA="{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \ + "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \ + {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}" - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA"" -Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. +Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL. +With this information, Dataverse will begin to monitor the transfer and when it completes, will add all files for which the transfer succeeded. +As the transfer can take significant time and the API call is asynchronous, the only way to determine if the transfer succeeded via API is to use the standard calls to check the dataset lock state and contents. -Replacing an existing file in the Dataset ------------------------------------------ +Once the transfer completes, Dataverse will remove the write permission for the principal. -Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter. -jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for: +Note that when using a managed endpoint that uses the Globus S3 Connector, the checksum should be correct as Dataverse can validate it. For file-based endpoints, the checksum should be included if available but Dataverse cannot verify it. -* "storageIdentifier" - String, as specified in prior calls -* "fileName" - String -* "mimeType" - String -* fixity/checksum: either: +In the remote/reference case, where there is no transfer to monitor, the standard /addFiles API call (see :ref:`direct-add-to-dataset-api`) is used instead. There are no changes for the Globus case. - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings +Downloading/Transfer Out Via Globus +----------------------------------- -The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512. -Note that the API call does not validate that the file matches the hash value supplied. If a Dataverse instance is configured to validate file fixity hashes at publication time, a mismatch would be caught at that time and cause publication to fail. +To begin downloading files, the requestGlobusDownload URL is used: .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - export FILE_IDENTIFIER=5072 - export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}' - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA" + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV -Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusDownload" -Replacing multiple existing files in the Dataset ------------------------------------------------- +The JSON body sent should include a list of file ids to download and, for a managed endpoint, the Globus principal that will make the transfer: -Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter. -jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values: +.. code-block:: bash + { + "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", + "fileIds":[60, 61] + } + +Note that this API call takes an optional downloadId parameter that is used with the dataverse-globus app. When downloadId is included, the list of fileIds is not needed. -* "fileToReplaceId" - the id of the file being replaced -* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false) -* "description" - A description of the file -* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset -* "storageIdentifier" - String -* "fileName" - String -* "mimeType" - String -* "fixity/checksum" either: +The response is a JSON object mapping the requested file Ids to Globus endpoint/paths. In the managed case, the principal will have been given read permissions for the specified paths: - * "md5Hash" - String with MD5 hash value, or - * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings +.. code-block:: bash + { + "status":"OK", + "data":{ + "60": "d8c42580-6528-4605-9ad8-116a61982644/hdc1/10.5072/FK2/ILLPXE/18bf3af9c78-92b8e168090e", + "61": "d8c42580-6528-4605-9ad8-116a61982644/hdc1/10.5072/FK2/ILLPXE/18bf3af9c78-c8d81569305c" + } + } -The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 +For the remote case, the use can perform the transfer without further contact with Dataverse. In the managed case, the user must initiate the transfer via the Globus API and then inform Dataverse. +Dataverse will then monitor the transfer and revoke the read permission when the transfer is complete. (Not making this last call could result in failure of the transfer.) .. code-block:: bash export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org - export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV - export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' - - curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" - -The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g. + export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV + + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/monitorGlobusDownload" + +The JSON body sent just contains the task identifier for the transfer: -.. code-block:: +.. code-block:: bash { - "status": "OK", - "data": { - "Files": [ - { - "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", - "errorMessage": "Bad Request:The file to replace does not belong to this dataset.", - "fileDetails": { - "fileToReplaceId": 10, - "description": "My description.", - "directoryLabel": "data/subdir1", - "categories": [ - "Data" - ], - "restrict": "false", - "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", - "fileName": "file1.Bin", - "mimeType": "application/octet-stream", - "checksum": { - "@type": "SHA-1", - "@value": "123456" - } - } - }, - { - "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", - "successMessage": "Replaced successfully in the dataset", - "fileDetails": { - "description": "My description.", - "label": "file2.txt", - "restricted": false, - "directoryLabel": "data/subdir1", - "categories": [ - "Data" - ], - "dataFile": { - "persistentId": "", - "pidURL": "", - "filename": "file2.txt", - "contentType": "text/plain", - "filesize": 2407, - "description": "My description.", - "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", - "rootDataFileId": 11, - "previousDataFileId": 11, - "checksum": { - "type": "SHA-1", - "value": "123789" - } - } - } - } - ], - "Result": { - "Total number of files": 2, - "Number of files successfully replaced": 1 - } - } + "taskIdentifier":"b5fd01aa-8963-11ee-83ae-d5484943e99a" } + - -Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide. -With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. From c7d73f64177745fa7892543407025f9130dcb83b Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 11:25:22 -0500 Subject: [PATCH 349/546] default for globus-cache-maxage --- src/main/resources/META-INF/microprofile-config.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 504b5e46735..ec8427795ee 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -18,6 +18,7 @@ dataverse.build= dataverse.files.directory=${STORAGE_DIR:/tmp/dataverse} dataverse.files.uploads=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/uploads dataverse.files.docroot=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/docroot +dataverse.files.globus-cache-maxage=5 # SEARCH INDEX dataverse.solr.host=localhost From 1fb7ddf6d89a1b36f9a059f016ac617aa6ec3758 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 11:27:42 -0500 Subject: [PATCH 350/546] fix spacing --- doc/sphinx-guides/source/developers/globus-api.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 6a94f220dc2..5b2b6982866 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -105,6 +105,7 @@ Note that when using the dataverse-globus app or the return from the previous ca In the managed case, the JSON body sent must include the id of the Globus user that will perform the transfer and the number of files that will be transferred: .. code-block:: bash + { "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", "numberOfFiles":2 @@ -113,6 +114,7 @@ In the managed case, the JSON body sent must include the id of the Globus user t In the remote reference case, the JSON body sent must include the Globus endpoint/paths that will be referenced: .. code-block:: bash + { "referencedFiles":[ "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt" @@ -120,6 +122,7 @@ In the remote reference case, the JSON body sent must include the Globus endpoin } The response will include a JSON object. In the managed case, the map is from new assigned file storageidentifiers and specific paths on the managed Globus endpoint: + .. code-block:: bash { @@ -161,7 +164,6 @@ In the managed case, once a Globus transfer has been initiated a final API call "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \ {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}" - curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA"" Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL. @@ -191,6 +193,7 @@ To begin downloading files, the requestGlobusDownload URL is used: The JSON body sent should include a list of file ids to download and, for a managed endpoint, the Globus principal that will make the transfer: .. code-block:: bash + { "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", "fileIds":[60, 61] From c2ad0092c545a41f071129bcd85c398775a53a1e Mon Sep 17 00:00:00 2001 From: sbondka Date: Wed, 6 Dec 2023 17:28:40 +0100 Subject: [PATCH 351/546] Add modifications --- .../source/_static/admin/dataverse-external-tools.tsv | 1 + doc/sphinx-guides/source/admin/integrations.rst | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 4f4c29d0670..ba60be59227 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -5,3 +5,4 @@ Binder explore dataset Binder allows you to spin up custom computing environment File Previewers explore file "A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers" Data Curation Tool configure file "A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions." Ask the Data query file Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. +JupyterHub explore file The `Dataverse-to-JupyterHub Data Transfer Connector `_ is a tool that simplifies the transfer of data between Dataverse repositories and the cloud-based platform JupyterHub. It is designed for researchers, scientists, and data analysts, facilitating collaboration on projects by seamlessly moving datasets and files. The tool is a lightweight client-side web application built using React and relies on the Dataverse External Tool feature, allowing for easy deployment on modern integration systems. Currently optimized for small to medium-sized files, future plans include extending support for larger files and signed Dataverse endpoints. For more details, you can refer to the external tool manifest: https://forgemia.inra.fr/dipso/eosc-pillar/dataverse-jupyterhub-connector/-/blob/master/externalTools.json diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index a9b962f33ca..ed3860a9ca1 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -188,12 +188,12 @@ Researchers can use a Google Sheets add-on to search for Dataverse installation' JupyterHub ++++++++++ -The Dataverse-to-JupyterHub Data Transfer Connector streamlines data transfer between Dataverse repositories and the cloud-based platform JupyterHub, enhancing collaborative research. +The `Dataverse-to-JupyterHub Data Transfer Connector `_ streamlines data transfer between Dataverse repositories and the cloud-based platform JupyterHub, enhancing collaborative research. This connector facilitates seamless two-way transfer of datasets and files, emphasizing the potential of an integrated research environment. It is a lightweight client-side web application built using React and relying on the Dataverse External Tool feature, allowing for easy deployment on modern integration systems. Currently, it supports small to medium-sized files, with plans to enable support for large files and signed Dataverse endpoints in the future. What kind of user is the feature intended for? -The feature is intended for reasearchers, scientists and data analyst working with Dataverse instances and JupyterHub looking to ease the data transfer process. +The feature is intended for researchers, scientists and data analyst who are working with Dataverse instances and JupyterHub looking to ease the data transfer process. .. _integrations-discovery: From a9a8f0cadec9bc3b31f0546805c46cdbf578aef1 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 6 Dec 2023 11:37:06 -0500 Subject: [PATCH 352/546] clarify it's pages we're hitting #10101 --- doc/sphinx-guides/source/qa/performance-tests.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md index 447c4f6c54d..ad7972bd75e 100644 --- a/doc/sphinx-guides/source/qa/performance-tests.md +++ b/doc/sphinx-guides/source/qa/performance-tests.md @@ -20,4 +20,4 @@ Please note the performance database is also used occasionally by Julian and the Executing the Performance Script -------------------------------- -To execute the performance test script, you need to install a local copy of the database-helper-scripts project at . We have since produced a stripped-down script that calls just the collection and dataset and works with Python 3. +To execute the performance test script, you need to install a local copy of the database-helper-scripts project at . We have since produced a stripped-down script that calls just the collection and dataset pages and works with Python 3. From 6fee16dec8125390ea6aa7221a19fde0db2b9730 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 6 Dec 2023 11:52:24 -0500 Subject: [PATCH 353/546] #10151 incorporate json schema --- doc/release-notes/6.1-release-notes.md | 6 +++++- doc/release-notes/9464-json-validation.md | 3 --- 2 files changed, 5 insertions(+), 4 deletions(-) delete mode 100644 doc/release-notes/9464-json-validation.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 06a3e01f7af..990ba219cad 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -24,7 +24,7 @@ With the upload-out-of-band option enabled, it is also possible for file upload Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. -For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] +For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] ### Improvements in the /versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions @@ -45,6 +45,8 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. +- getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection. +- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset json file is in proper format and contains the required elements and fields for a given dataverse collection. ### Extended the existing endpoints: - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. @@ -112,6 +114,8 @@ to generate updated versions. - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html See also #10060. +- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) + ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md deleted file mode 100644 index f104263ba35..00000000000 --- a/doc/release-notes/9464-json-validation.md +++ /dev/null @@ -1,3 +0,0 @@ -Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) - -For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html From 15e80aa4c847cb5ce8574fe600723c9cc81a5bc2 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 16:56:37 +0000 Subject: [PATCH 354/546] Fixed: roleAssignees setup in canDownloadAtLeastOneFile --- .../edu/harvard/iq/dataverse/PermissionServiceBean.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 6dc943f1ca8..471cac31e77 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -849,7 +849,8 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio * @return boolean indicating whether the user can download at least one file or not */ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) { - if (dataverseRequest.getUser().isSuperuser()) { + User user = dataverseRequest.getUser(); + if (user.isSuperuser()) { return true; } // This is a shortcut to avoid having to check version files if the condition is met @@ -859,8 +860,9 @@ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, Data List fileMetadatas = datasetVersion.getFileMetadatas(); for (FileMetadata fileMetadata : fileMetadatas) { DataFile dataFile = fileMetadata.getDataFile(); - Set ras = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile)); - if (hasGroupPermissionsFor(ras, dataFile, EnumSet.of(Permission.DownloadFile))) { + Set roleAssignees = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile)); + roleAssignees.add(user); + if (hasGroupPermissionsFor(roleAssignees, dataFile, EnumSet.of(Permission.DownloadFile))) { return true; } } From 4b71b36305fb6c18f7282530dc4491976a352936 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 6 Dec 2023 17:02:07 +0000 Subject: [PATCH 355/546] Added: IT for getCanDownloadAtLeastOneFile endpoint --- .../harvard/iq/dataverse/api/DatasetsIT.java | 71 +++++++++++++++---- 1 file changed, 58 insertions(+), 13 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 945b741a94b..3510f2c06ef 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -80,7 +80,6 @@ import javax.xml.stream.XMLStreamReader; import static java.lang.Thread.sleep; -import static org.junit.jupiter.api.Assertions.assertEquals; import org.hamcrest.CoreMatchers; @@ -90,11 +89,7 @@ import static org.hamcrest.CoreMatchers.startsWith; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.Matchers.contains; - -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.*; public class DatasetsIT { @@ -4123,10 +4118,10 @@ public void testGetUserPermissionsOnDataset() { } @Test - public void testGetCanDownloadAtLeastOneFile() { - Response createUser = UtilIT.createRandomUser(); - createUser.then().assertThat().statusCode(OK.getStatusCode()); - String apiToken = UtilIT.getApiTokenFromResponse(createUser); + public void testGetCanDownloadAtLeastOneFile() throws InterruptedException { + Response createUserResponse = UtilIT.createRandomUser(); + createUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); @@ -4135,15 +4130,65 @@ public void testGetCanDownloadAtLeastOneFile() { Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String datasetPersistentId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); - // Call with valid dataset id - Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, apiToken); + // Upload file + String pathToTestFile = "src/test/resources/images/coffeeshop.png"; + Response uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToTestFile, Json.createObjectBuilder().build(), apiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + + String fileId = JsonPath.from(uploadResponse.body().asString()).getString("data.files[0].dataFile.id"); + + // Publish dataset version + Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Make sure the dataset is published + Thread.sleep(3000); + + // Create a second user to call the getCanDownloadAtLeastOneFile method + Response createSecondUserResponse = UtilIT.createRandomUser(); + createSecondUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + String secondUserApiToken = UtilIT.getApiTokenFromResponse(createSecondUserResponse); + String secondUserUsername = UtilIT.getUsernameFromResponse(createSecondUserResponse); + + // Call with a valid dataset id when a file is released + Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); boolean canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); assertTrue(canDownloadAtLeastOneFile); + // Restrict file + Response restrictFileResponse = UtilIT.restrictFile(fileId, true, apiToken); + restrictFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Publish dataset version + publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Make sure the dataset is published + Thread.sleep(3000); + + // Call with a valid dataset id when a file is restricted and the user does not have access + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertFalse(canDownloadAtLeastOneFile); + + // Grant restricted file access to the user + Response grantFileAccessResponse = UtilIT.grantFileAccess(fileId, "@" + secondUserUsername, apiToken); + grantFileAccessResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Call with a valid dataset id when a file is restricted and the user has access + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertTrue(canDownloadAtLeastOneFile); + // Call with invalid dataset id - Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, apiToken); + Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, secondUserApiToken); getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); } } From 6d2f87ca93c108a9b4ec4905372a2e1709b3f5cf Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 12:24:26 -0500 Subject: [PATCH 356/546] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 990ba219cad..4b5c20f3953 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -12,8 +12,8 @@ This release contains major upgrades to core components. Detailed upgrade instru ## Detailed Release Highlights, New Features and Use Case Scenarios ### Dataverse installation can be now be configured to allow out-of-band upload -- Installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. -By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). +In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. +By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. ### Alternative Title is made repeatable. @@ -23,7 +23,7 @@ With the upload-out-of-band option enabled, it is also possible for file upload Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` -Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. +Since Alternative Title is repeatable now, old json apis would not be compatible with a new version since value of alternative title has changed from simple string to an array. For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] ### Improvements in the /versions API @@ -70,7 +70,6 @@ This parameter applies a filter criteria to the operation and supports the follo - Can delete the dataset draft - getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. - ### DataFile API payload has been extended to include the following fields: - tabularData: Boolean field to know if the DataFile is of tabular type - fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) @@ -114,7 +113,7 @@ to generate updated versions. - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html See also #10060. -- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. @@ -125,12 +124,13 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools - - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. +- `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is +also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. +- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. ## OpenID Connect Authentication Provider Improvements @@ -175,6 +175,8 @@ As part of these testing improvements, the code coverage report file for unit te - dataverse.auth.oidc.subtitle - dataverse.auth.oidc.pkce.max-cache-size - dataverse.auth.oidc.pkce.max-cache-age +- dataverse.files.{driverId}.upload-out-of-band +- dataverse.files.guestbook-at-request ## Installation @@ -182,14 +184,17 @@ If this is a new installation, please follow our [Installation Guide](https://gu Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! -You are also very welcome to join the [Global Dataverse Community Consortium](https://dataversecommunity.global) (GDCC). +You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC). ## Upgrade Instructions - Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. +## Backward Incompatibilities +- Since Alternative Title is repeatable now, old json apis would not be compatible with a new version +- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, + ## Complete List of Changes For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. From 90ff56ca979cd71f1c467ff1cfa0dfeb8f619691 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:43:43 -0500 Subject: [PATCH 357/546] Update doc/release-notes/6.1-release-notes.md Co-authored-by: Philip Durbin --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 4b5c20f3953..e1a9214a982 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -122,7 +122,7 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development -- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. +- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. From 10e0e25fe10dda9f49b6126f591b9483adb2f765 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:44:49 -0500 Subject: [PATCH 358/546] Update doc/release-notes/6.1-release-notes.md Co-authored-by: Philip Durbin --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index e1a9214a982..427a07a4c2c 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -123,7 +123,7 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. -For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools +For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. From 3d55ed31de8fb9e45a2cedfecf07e22c82dae12a Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 12:47:53 -0500 Subject: [PATCH 359/546] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 427a07a4c2c..189f21f2322 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -19,21 +19,22 @@ With the upload-out-of-band option enabled, it is also possible for file upload ### Alternative Title is made repeatable. - One will need to update database with updated citation block. `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -- One will also need to update solr schema: - Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` - Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` +- One will also need to update Solr schema: + Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml` + Reload Solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` -Since Alternative Title is repeatable now, old json apis would not be compatible with a new version since value of alternative title has changed from simple string to an array. +Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array. For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] -### Improvements in the /versions API +### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output - when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. -### The following API endpoints have been added: +### The following API endpoints have been added: +- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - /api/files/{id}/downloadCount - /api/files/{id}/dataTables - /api/files/{id}/metadata/tabularTags New endpoint to set tabular file tags. @@ -42,11 +43,10 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created. - userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. - hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. -- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. - getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection. -- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset json file is in proper format and contains the required elements and fields for a given dataverse collection. +- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection. ### Extended the existing endpoints: - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. @@ -113,7 +113,7 @@ to generate updated versions. - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html See also #10060. -- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. @@ -192,7 +192,7 @@ Upgrading requires a maintenance window and downtime. Please plan ahead, create These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. ## Backward Incompatibilities -- Since Alternative Title is repeatable now, old json apis would not be compatible with a new version +- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, ## Complete List of Changes From 1be5d4b6b2baddc5f30bf598d81bd5ed991f73ee Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 12:52:39 -0500 Subject: [PATCH 360/546] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 189f21f2322..d0fe895565c 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -87,10 +87,8 @@ This parameter applies a filter criteria to the operation and supports the follo ### Misc - Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. - - Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. - - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports (which include the OAI-ORE metadata export file) have been updated to include information about the dataset version state, e.g. RELEASED or DEACCESSIONED @@ -104,7 +102,7 @@ Dataverse installations that have been using archival Bags may wish to update an existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) to generate updated versions. - +- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information @@ -124,7 +122,6 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. -- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. From 8e2ff826bdd0f41e598a56012fa780d5f9148a2e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 13:41:35 -0500 Subject: [PATCH 361/546] store tests --- .../dataaccess/GlobusOverlayAccessIOTest.java | 148 ++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index e69de29bb2d..792a9974076 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -0,0 +1,148 @@ +/* + * SPDX-License-Identifier: Apache 2.0 + */ +package edu.harvard.iq.dataverse.dataaccess; + +import edu.harvard.iq.dataverse.DOIServiceBean; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import static org.junit.jupiter.api.Assertions.*; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import java.io.IOException; +import java.nio.file.Paths; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.STRICT_STUBS) +public class GlobusOverlayAccessIOTest { + + @Mock + + private Dataset dataset; + private DataFile mDatafile; + private DataFile rDatafile; + private String baseStoreId1 = "182ad2bda2f-c3508e719076"; + private String baseStoreId2 = "182ad2bda2f-c3508e719077"; + private String logoPath = "d7c42580-6538-4605-9ad8-116a61982644/hdc1/image002.mrc"; + private String authority = "10.5072"; + private String identifier = "F2ABCDEF"; + + @BeforeEach + public void setUp() { + // Base Store + System.setProperty("dataverse.files.base.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + System.setProperty("dataverse.files.base.label", "default"); + System.setProperty("dataverse.files.base.directory", "/tmp/files"); + + // Managed Globus Store + + // Nonsense endpoint/paths + System.setProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + // Nonsense value of the right form + System.setProperty("dataverse.files.globusm.globus-token", + "NzM2NTQxMDMtOTg1Yy00NDgzLWE1MTYtYTJlNDk0ZmI3MDhkOkpJZGZaZGxMZStQNUo3MTRIMDY2cDh6YzIrOXI2RmMrbFR6UG0zcSsycjA9"); + System.setProperty("dataverse.files.globusm.remote-store-name", "GlobusEndpoint1"); + System.setProperty("dataverse.files.globusm.type", "globus"); + System.setProperty("dataverse.files.globusm.managed", "true"); + System.setProperty("dataverse.files.globusm.base-store", "base"); + System.setProperty("dataverse.files.globusm.label", "globusManaged"); + + // Remote Store + System.setProperty("dataverse.files.globusr.type", "globus"); + System.setProperty("dataverse.files.globusr.base-store", "base"); + System.setProperty("dataverse.files.globusr.managed", "false"); + System.setProperty("dataverse.files.globusm.label", "globusRemote"); + System.setProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + System.setProperty("dataverse.files.globusr.remote-store-name", "DemoDataCorp"); + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", + DOIServiceBean.DOI_RESOLVER_URL, null)); + mDatafile = MocksFactory.makeDataFile(); + mDatafile.setOwner(dataset); + mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); + + rDatafile = MocksFactory.makeDataFile(); + rDatafile.setOwner(dataset); + rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); + } + + @AfterEach + public void tearDown() { + System.clearProperty("dataverse.files.base.type"); + System.clearProperty("dataverse.files.base.label"); + System.clearProperty("dataverse.files.base.directory"); + System.clearProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH); + System.clearProperty("dataverse.files.globusm.globus-token"); + System.clearProperty("dataverse.files.globusm.remote-store-name"); + System.clearProperty("dataverse.files.globusm.type"); + System.clearProperty("dataverse.files.globusm.managed"); + System.clearProperty("dataverse.files.globusm.base-store"); + System.clearProperty("dataverse.files.globusm.label"); + System.clearProperty("dataverse.files.globusr.type"); + System.clearProperty("dataverse.files.globusr.base-store"); + System.clearProperty("dataverse.files.globusr.managed"); + System.clearProperty("dataverse.files.globusm.label"); + System.clearProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS); + System.clearProperty("dataverse.files.globusr.remote-store-name"); + } + + @Test + void testGlobusOverlayIdentifiers() throws IOException { + assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusm", mDatafile.getStorageIdentifier())); + assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusr", rDatafile.getStorageIdentifier())); + assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusm", "globusr://localid//../of/the/hill")); + assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusr", + rDatafile.getStorageIdentifier().replace("hdc1", ""))); + + // We can read the storageIdentifier and get the driver + assertTrue(mDatafile.getStorageIdentifier() + .startsWith(DataAccess.getStorageDriverFromIdentifier(mDatafile.getStorageIdentifier()))); + assertTrue(rDatafile.getStorageIdentifier() + .startsWith(DataAccess.getStorageDriverFromIdentifier(rDatafile.getStorageIdentifier()))); + + // We can get the driver type from it's ID + assertTrue(DataAccess.getDriverType("globusm").equals(System.getProperty("dataverse.files.globusm.type"))); + assertTrue(DataAccess.getDriverType("globusr").equals(System.getProperty("dataverse.files.globusr.type"))); + + // When we get a StorageIO for the file, it is the right type + StorageIO mStorageIO = DataAccess.getStorageIO(mDatafile); + assertTrue(mStorageIO instanceof GlobusOverlayAccessIO); + StorageIO rStorageIO = DataAccess.getStorageIO(rDatafile); + assertTrue(rStorageIO instanceof GlobusOverlayAccessIO); + + // When we use it, we can get properties like the remote store name + assertTrue(mStorageIO.getRemoteStoreName() + .equals(System.getProperty("dataverse.files.globusm.remote-store-name"))); + assertTrue(rStorageIO.getRemoteStoreName() + .equals(System.getProperty("dataverse.files.globusr.remote-store-name"))); + + // Storage Locations are correct + String mLocation = mStorageIO.getStorageLocation(); + assertEquals("globusm:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + + "/" + baseStoreId1, mLocation); + String rLocation = rStorageIO.getStorageLocation(); + assertEquals("globusr://" + baseStoreId2 + "//" + logoPath, rLocation); + + // If we ask for the path for an aux file, it is correct + System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, + identifier, baseStoreId1 + ".auxobject").toString()); + System.out.println(mStorageIO.getAuxObjectAsPath("auxobject").toString()); + assertTrue(Paths.get(System.getProperty("dataverse.files.base.directory", "/tmp/files"), authority, identifier, + baseStoreId1 + ".auxobject").equals(mStorageIO.getAuxObjectAsPath("auxobject"))); + assertTrue(Paths.get(System.getProperty("dataverse.files.base.directory", "/tmp/files"), authority, identifier, + baseStoreId2 + ".auxobject").equals(rStorageIO.getAuxObjectAsPath("auxobject"))); + } +} From 865c9feb4230a0a3bc9880cb6088a563b3fe21fc Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 13:53:35 -0500 Subject: [PATCH 362/546] getConfig tests --- .../iq/dataverse/dataaccess/StorageIOTest.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java index 2ed9d18036d..84a241b90f6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java @@ -243,4 +243,16 @@ public void testGenerateVariableHeader() { assertEquals("Random Random\n", instance.generateVariableHeader(dvs)); assertEquals(null, instance.generateVariableHeader(null)); } + + @Test + public void testGetConfigParam() { + System.setProperty("dataverse.files.globus.type", "globus"); + assertEquals("globus", StorageIO.getConfigParamForDriver("globus", StorageIO.TYPE)); + System.clearProperty("dataverse.files.globus.type"); + } + + @Test + public void testGetConfigParamWithDefault() { + assertEquals(DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER, StorageIO.getConfigParamForDriver("globus", AbstractRemoteOverlayAccessIO.BASE_STORE, DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER)); + } } From cb1beaae490126c2274219dfcb4cae56094b096a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 6 Dec 2023 14:11:15 -0500 Subject: [PATCH 363/546] finish changing minio secret key #6783 This should have been part of 811d79a7 --- docker-compose-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index e68215d53d2..5265a6b7c2d 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -42,7 +42,7 @@ services: -Ddataverse.files.minio1.upload-redirect=false -Ddataverse.files.minio1.download-redirect=false -Ddataverse.files.minio1.access-key=4cc355_k3y - -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k35 + -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y ports: - "8080:8080" # HTTP (Dataverse Application) - "4848:4848" # HTTP (Payara Admin Console) From 5b7a560a380db12d083e82a19a865eb79559e0a4 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 14:41:31 -0500 Subject: [PATCH 364/546] refactor, test for getFileMap --- .../harvard/iq/dataverse/api/Datasets.java | 3 +- .../AbstractRemoteOverlayAccessIO.java | 2 +- .../dataverse/globus/GlobusServiceBean.java | 134 +++++++++--------- .../iq/dataverse/globus/GlobusUtil.java | 33 +++++ .../dataaccess/GlobusOverlayAccessIOTest.java | 1 - .../iq/dataverse/globus/GlobusUtilTest.java | 88 ++++++++++++ 6 files changed, 190 insertions(+), 71 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 939ebf1dcd4..b3bfc476423 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -110,6 +110,7 @@ import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.globus.GlobusUtil; import java.io.IOException; import java.io.InputStream; @@ -3996,7 +3997,7 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat } } // Allowed to download all requested files - JsonObject files = globusService.getFilesMap(dataFiles, dataset); + JsonObject files = GlobusUtil.getFilesMap(dataFiles, dataset); if (GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { // If managed, give the principal read permissions int status = globusService.setPermissionForDownload(dataset, body.getString("principal")); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java index 8d058b7c9e3..6c26502acfa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -42,7 +42,7 @@ public abstract class AbstractRemoteOverlayAccessIO extends StorageIO { protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); - protected static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; + public static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; static final String BASE_STORE = "base-store"; protected static final String SECRET_KEY = "secret-key"; static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 37959188857..8cc8e491416 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -159,9 +159,11 @@ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger } } - /** Request read/write access for the specified principal and generate a list of accessible paths for new files for the specified dataset. + /** + * Request read/write access for the specified principal and generate a list of + * accessible paths for new files for the specified dataset. * - * @param principal - the id of the Globus principal doing the transfer + * @param principal - the id of the Globus principal doing the transfer * @param dataset * @param numberOfPaths - how many files are to be transferred * @return @@ -230,10 +232,15 @@ private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissi } } - /** Given an array of remote files to be referenced in the dataset, create a set of valid storage identifiers and return a map of the remote file paths to storage identifiers. + /** + * Given an array of remote files to be referenced in the dataset, create a set + * of valid storage identifiers and return a map of the remote file paths to + * storage identifiers. * * @param dataset - * @param referencedFiles - a JSON array of remote files to be referenced in the dataset - each should be a string with the /path/to/file + * @param referencedFiles - a JSON array of remote files to be referenced in the + * dataset - each should be a string with the /path/to/file * @return - a map of supplied paths to valid storage identifiers */ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) { @@ -262,15 +269,17 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref return fileMap.build(); } - - /** A cache of temporary permission requests - for upload (rw) and download (r) access. - * When a temporary permission request is created, it is added to the cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, the permission will be revoked/deleted. - * (If a transfer has been started, the permission will not be revoked/deleted until the transfer is complete. This is handled in other methods.) + /** + * A cache of temporary permission requests - for upload (rw) and download (r) + * access. When a temporary permission request is created, it is added to the + * cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, + * the permission will be revoked/deleted. (If a transfer has been started, the + * permission will not be revoked/deleted until the transfer is complete. This + * is handled in other methods.) */ // Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() - .expireAfterWrite( - Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) + .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> { // Delete rules that expire logger.fine("Rule " + ruleId + " expired"); @@ -280,20 +289,24 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref .build(); - //Convenience method to add a temporary permission request to the cache - allows logging of temporary permission requests + // Convenience method to add a temporary permission request to the cache - + // allows logging of temporary permission requests private void monitorTemporaryPermissions(String ruleId, long datasetId) { logger.fine("Adding rule " + ruleId + " for dataset " + datasetId); rulesCache.put(ruleId, datasetId); } -/** Call the Globus API to get info about the transfer. - * - * @param accessToken - * @param taskId - the Globus task id supplied by the user - * @param globusLogger - the transaction-specific logger to use (separate log files are created in general, some calls may use the class logger) - * @return - * @throws MalformedURLException - */ + /** + * Call the Globus API to get info about the transfer. + * + * @param accessToken + * @param taskId - the Globus task id supplied by the user + * @param globusLogger - the transaction-specific logger to use (separate log + * files are created in general, some calls may use the + * class logger) + * @return + * @throws MalformedURLException + */ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); @@ -313,9 +326,12 @@ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger return task; } - /** Globus call to get an access token for the user using the long-term token we hold. + /** + * Globus call to get an access token for the user using the long-term token we + * hold. * - * @param globusBasicToken - the base64 encoded Globus Basic token comprised of the : + * @param globusBasicToken - the base64 encoded Globus Basic token comprised of + * the : * @return - a valid Globus access token */ public static AccessToken getClientToken(String globusBasicToken) { @@ -433,7 +449,6 @@ static class MakeRequestResponse { } - /** * Cache of open download Requests This cache keeps track of the set of files * selected for transfer out (download) via Globus. It is a means of @@ -480,10 +495,11 @@ public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); } - /** Generated the App URl for upload (in) or download (out) + /** + * Generated the App URl for upload (in) or download (out) * - * @param d - the dataset involved - * @param upload - boolean, true for upload, false for download + * @param d - the dataset involved + * @param upload - boolean, true for upload, false for download * @param dataFiles - a list of the DataFiles to be downloaded * @return */ @@ -516,7 +532,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List downloadDFList) { return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList)); - } @Asynchronous @@ -608,8 +605,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S rulesCache.invalidate(ruleId); } } - - //Wait before first check + + // Wait before first check Thread.sleep(5000); // globus task status check task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); @@ -907,8 +904,8 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro } task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - - //Transfer is done (success or failure) so delete the rule + + // Transfer is done (success or failure) so delete the rule if (ruleId != null) { logger.info("Deleting: rule: " + ruleId); deletePermission(ruleId, dataset, globusLogger); @@ -1150,13 +1147,14 @@ private GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { return endpoint; } - + // This helper method is called from the Download terms/guestbook/etc. popup, // when the user clicks the "ok" button. We use it, instead of calling // downloadServiceBean directly, in order to differentiate between single // file downloads and multiple (batch) downloads - since both use the same // terms/etc. popup. - public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, boolean doNotSaveGuestbookResponse) { + public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, + boolean doNotSaveGuestbookResponse) { PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); @@ -1170,7 +1168,7 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, apiToken = new ApiToken(); apiToken.setTokenString(privUrl.getToken()); } - + DataFile df = guestbookResponse.getDataFile(); if (df != null) { logger.fine("Single datafile case for writeGuestbookAndStartTransfer"); @@ -1179,35 +1177,35 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, if (!doNotSaveGuestbookResponse) { fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); } - PrimeFaces.current() - .executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList)); + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList)); } else { - //Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload + // Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload List list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(","))); List selectedFiles = new ArrayList(); for (String idAsString : list) { try { Long fileId = Long.parseLong(idAsString); - // If we need to create a GuestBookResponse record, we have to - // look up the DataFile object for this file: - if (!doNotSaveGuestbookResponse) { - df = dataFileService.findCheapAndEasy(fileId); - guestbookResponse.setDataFile(df); - fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); - selectedFiles.add(df); - } + // If we need to create a GuestBookResponse record, we have to + // look up the DataFile object for this file: + if (!doNotSaveGuestbookResponse) { + df = dataFileService.findCheapAndEasy(fileId); + guestbookResponse.setDataFile(df); + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + selectedFiles.add(df); + } } catch (NumberFormatException nfe) { - logger.warning("A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + idAsString); + logger.warning( + "A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + + idAsString); return; } } if (!selectedFiles.isEmpty()) { - //Use dataset from one file - files should all be from the same dataset - PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, - selectedFiles)); + // Use dataset from one file - files should all be from the same dataset + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, selectedFiles)); } } - } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java new file mode 100644 index 00000000000..92cf8ac7704 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java @@ -0,0 +1,33 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.List; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; + +public class GlobusUtil { + + public static JsonObject getFilesMap(List dataFiles, Dataset d) { + JsonObjectBuilder filesBuilder = Json.createObjectBuilder(); + for (DataFile df : dataFiles) { + String storageId = df.getStorageIdentifier(); + String[] parts = DataAccess + .getDriverIdAndStorageLocation(DataAccess.getLocationFromStorageId(storageId, d)); + String driverId = parts[0]; + String fileLocation = parts[1]; + if (GlobusAccessibleStore.isDataverseManaged(driverId)) { + String endpointWithBasePath = GlobusAccessibleStore.getTransferEnpointWithPath(driverId); + fileLocation = endpointWithBasePath + "/" + fileLocation; + } else { + fileLocation = storageId.substring(storageId.lastIndexOf("//") + 2); + } + filesBuilder.add(df.getId().toString(), fileLocation); + } + return filesBuilder.build(); + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index 792a9974076..856d71d7dc0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -6,7 +6,6 @@ import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.mocks.MocksFactory; import org.junit.jupiter.api.AfterEach; diff --git a/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java new file mode 100644 index 00000000000..56f8731b9c8 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java @@ -0,0 +1,88 @@ +package edu.harvard.iq.dataverse.globus; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.mock; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; + +import edu.harvard.iq.dataverse.DOIServiceBean; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.json.JsonObject; + +public class GlobusUtilTest { + + private Dataset dataset; + private DataFile mDatafile; + private DataFile rDatafile; + private String baseStoreId1 = "182ad2bda2f-c3508e719076"; + private String baseStoreId2 = "182ad2bda2f-c3508e719077"; + private String logoPath = "d7c42580-6538-4605-9ad8-116a61982644/hdc1/image002.mrc"; + private String authority = "10.5072"; + private String identifier = "F2ABCDEF"; + + @BeforeEach + public void setUp() { + + // Managed Globus Store + + // Nonsense endpoint/paths + System.setProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + System.setProperty("dataverse.files.globusm.managed", "true"); + + // Remote Store + System.setProperty("dataverse.files.globusr.managed", "false"); + System.setProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", + DOIServiceBean.DOI_RESOLVER_URL, null)); + mDatafile = MocksFactory.makeDataFile(); + mDatafile.setOwner(dataset); + mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); + + rDatafile = MocksFactory.makeDataFile(); + rDatafile.setOwner(dataset); + rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); + List files = new ArrayList(); + files.add(mDatafile); + files.add(rDatafile); + dataset.setFiles(files); + } + + @AfterEach + public void tearDown() { + System.clearProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH); + System.clearProperty("dataverse.files.globusm.managed"); + System.clearProperty("dataverse.files.globusr.managed"); + System.clearProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS); + } + + + @Test + public void testgetFilesMap() { + + JsonObject jo = GlobusUtil.getFilesMap(dataset.getFiles(), dataset); + System.out.println(JsonUtil.prettyPrint(jo)); + assertEquals(jo.getString(Long.toString(mDatafile.getId())), "d7c42580-6538-4605-9ad8-116a61982644/hdc1/10.5072/F2ABCDEF/182ad2bda2f-c3508e719076"); + assertEquals(jo.getString(Long.toString(rDatafile.getId())), logoPath); + } +} From 4ba629d643678acdd0b649128b8a76a805ee6906 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 15:28:32 -0500 Subject: [PATCH 365/546] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index d0fe895565c..38b99e6580b 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -11,6 +11,10 @@ This release contains major upgrades to core components. Detailed upgrade instru ## Detailed Release Highlights, New Features and Use Case Scenarios +### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started +Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). + The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. + ### Dataverse installation can be now be configured to allow out-of-band upload In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). @@ -142,7 +146,7 @@ life easier during instance setups and reconfiguration. You no longer need to ge necessary JSON file. ### Adding PKCE Support - +[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) From 93d9b35a07625622523a4490eee8f55d617defec Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 16:32:17 -0500 Subject: [PATCH 366/546] future test code - requires config of Globus stores --- .../harvard/iq/dataverse/api/DatasetsIT.java | 53 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 32 +++++++++++ .../dataaccess/GlobusOverlayAccessIOTest.java | 34 ++++++------ 3 files changed, 104 insertions(+), 15 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 6a746b7c5b5..928574eb82b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -42,6 +42,9 @@ import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIOTest; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import org.apache.commons.lang3.StringUtils; @@ -135,6 +138,7 @@ public static void setUpClass() { .statusCode(200); */ } + @AfterAll public static void afterClass() { @@ -4175,4 +4179,53 @@ public void testGetUserPermissionsOnDataset() { Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getUserPermissionsOnDataset("testInvalidId", apiToken); getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); } + + //Requires that a Globus remote store be set up as with the parameters in the GlobusOverlayAccessIOTest class + //Tests whether the API call succeeds and has some of the expected parameters + @Test + @Disabled + public void testGetGlobusUploadParameters() { + //Creates managed and remote Globus stores + GlobusOverlayAccessIOTest.setUp(); + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + Response makeSuperUser = UtilIT.makeSuperUser(username); + assertEquals(200, makeSuperUser.getStatusCode()); + + Response setDriver = UtilIT.setDatasetStorageDriver(datasetId, System.getProperty("dataverse.files.globusr.label"), apiToken); + assertEquals(200, setDriver.getStatusCode()); + + Response getUploadParams = UtilIT.getDatasetGlobusUploadParameters(datasetId, "en_us", apiToken); + assertEquals(200, getUploadParams.getStatusCode()); + JsonObject data = JsonUtil.getJsonObject(getUploadParams.getBody().asString()); + JsonObject queryParams = data.getJsonObject("queryParameters"); + assertEquals("en_us", queryParams.getString("dvLocale")); + assertEquals("false", queryParams.getString("managed")); + //Assumes only one reference endpoint with a basepath is configured + assertTrue(queryParams.getJsonArray("referenceEndpointsWithPaths").get(0).toString().indexOf(System.getProperty("dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS)) > -1); + JsonArray signedUrls = data.getJsonArray("signedUrls"); + boolean found = false; + for (int i = 0; i < signedUrls.size(); i++) { + JsonObject signedUrl = signedUrls.getJsonObject(i); + if (signedUrl.getString("name").equals("requestGlobusReferencePaths")) { + found=true; + break; + } + } + assertTrue(found); + //Removes managed and remote Globus stores + GlobusOverlayAccessIOTest.tearDown(); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 869e755a183..bd2fe7e6f0b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3636,4 +3636,36 @@ static Response downloadTmpFile(String fullyQualifiedPathToFile, String apiToken .get("/api/admin/downloadTmpFile?fullyQualifiedPathToFile=" + fullyQualifiedPathToFile); } + static Response setDatasetStorageDriver(Integer datasetId, String driverLabel, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(driverLabel) + .put("/api/datasets/" + datasetId + "/storageDriver"); + } + + + //Globus Store related - not currently used + + static Response getDatasetGlobusUploadParameters(Integer datasetId, String locale, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .get("/api/datasets/" + datasetId + "/globusUploadParameters?locale=" + locale); + } + + static Response getDatasetGlobusDownloadParameters(Integer datasetId, String locale, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .get("/api/datasets/" + datasetId + "/globusDownloadParameters?locale=" + locale); + } + + static Response requestGlobusDownload(Integer datasetId, JsonObject body, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(body) + .contentType("application/json") + .post("/api/datasets/" + datasetId + "/requestGlobusDownload"); + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index 856d71d7dc0..1c84fa90a9e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -8,8 +8,9 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.mocks.MocksFactory; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import static org.junit.jupiter.api.Assertions.*; @@ -35,8 +36,8 @@ public class GlobusOverlayAccessIOTest { private String authority = "10.5072"; private String identifier = "F2ABCDEF"; - @BeforeEach - public void setUp() { + @BeforeAll + public static void setUp() { // Base Store System.setProperty("dataverse.files.base.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); System.setProperty("dataverse.files.base.label", "default"); @@ -65,20 +66,11 @@ public void setUp() { "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); System.setProperty("dataverse.files.globusr.remote-store-name", "DemoDataCorp"); - dataset = MocksFactory.makeDataset(); - dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", - DOIServiceBean.DOI_RESOLVER_URL, null)); - mDatafile = MocksFactory.makeDataFile(); - mDatafile.setOwner(dataset); - mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); - rDatafile = MocksFactory.makeDataFile(); - rDatafile.setOwner(dataset); - rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); } - @AfterEach - public void tearDown() { + @AfterAll + public static void tearDown() { System.clearProperty("dataverse.files.base.type"); System.clearProperty("dataverse.files.base.label"); System.clearProperty("dataverse.files.base.directory"); @@ -100,6 +92,18 @@ public void tearDown() { @Test void testGlobusOverlayIdentifiers() throws IOException { + + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", + DOIServiceBean.DOI_RESOLVER_URL, null)); + mDatafile = MocksFactory.makeDataFile(); + mDatafile.setOwner(dataset); + mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); + + rDatafile = MocksFactory.makeDataFile(); + rDatafile.setOwner(dataset); + rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); + assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusm", mDatafile.getStorageIdentifier())); assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusr", rDatafile.getStorageIdentifier())); assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusm", "globusr://localid//../of/the/hill")); From 12b7c306dd31ebd987a2bae5f36dae27e4f0ba56 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 16:32:24 -0500 Subject: [PATCH 367/546] typo --- .../iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java index 1c84fa90a9e..ad980aa28cd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -61,7 +61,7 @@ public static void setUp() { System.setProperty("dataverse.files.globusr.type", "globus"); System.setProperty("dataverse.files.globusr.base-store", "base"); System.setProperty("dataverse.files.globusr.managed", "false"); - System.setProperty("dataverse.files.globusm.label", "globusRemote"); + System.setProperty("dataverse.files.globusr.label", "globusRemote"); System.setProperty( "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); From 1426dfb6fc52ace869e3c822a732d5b408ca7c4c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 6 Dec 2023 16:47:54 -0500 Subject: [PATCH 368/546] add missing setting to release notes, add a todo to use two delays --- doc/release-notes/10162-globus-support.md | 7 ++++++- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md index d64e72b70a1..7bc3990f840 100644 --- a/doc/release-notes/10162-globus-support.md +++ b/doc/release-notes/10162-globus-support.md @@ -1,4 +1,6 @@ -Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) +Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, +and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. +Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) - Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incomatibilities. - The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model - Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus) @@ -10,5 +12,8 @@ Backward Incompatibilities: New JVM Options: - A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). +- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. + + Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 8cc8e491416..d0660a55a6a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -277,6 +277,8 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref * permission will not be revoked/deleted until the transfer is complete. This * is handled in other methods.) */ + // ToDo - nominally this doesn't need to be as long as the allowed time for the + // downloadCache so there could be two separate settings. // Single cache of open rules/permission requests private final Cache rulesCache = Caffeine.newBuilder() .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) From d2427bd39046f104c95e27d1869d1665b969724f Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 09:49:52 -0500 Subject: [PATCH 369/546] #10151 incorporate recent additions --- doc/release-notes/6.1-release-notes.md | 22 +++++++++++++++++++++ doc/release-notes/8549-collection-quotas.md | 3 --- doc/release-notes/8760-bagit.md | 15 -------------- 3 files changed, 22 insertions(+), 18 deletions(-) delete mode 100644 doc/release-notes/8549-collection-quotas.md delete mode 100644 doc/release-notes/8760-bagit.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 38b99e6580b..38a7a1064e6 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -30,6 +30,28 @@ With the upload-out-of-band option enabled, it is also possible for file upload Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array. For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] +### Collection Storage Size Quota Support +-This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. +Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 + +### BagIT Export Configurations Updated +For BagIT export, it is now possible to configure the following information in bag-info.txt... + +Source-Organization: Harvard Dataverse +Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA +Organization-Email: support@dataverse.harvard.edu + +... using new JVM/MPCONFIG options: + +- dataverse.bagit.sourceorg.name +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.email + +Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. + +For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt + + ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md deleted file mode 100644 index b3635d0c5a1..00000000000 --- a/doc/release-notes/8549-collection-quotas.md +++ /dev/null @@ -1,3 +0,0 @@ -This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. -Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 - diff --git a/doc/release-notes/8760-bagit.md b/doc/release-notes/8760-bagit.md deleted file mode 100644 index 30601857309..00000000000 --- a/doc/release-notes/8760-bagit.md +++ /dev/null @@ -1,15 +0,0 @@ -For BagIT export, it is now possible to configure the following information in bag-info.txt... - -Source-Organization: Harvard Dataverse -Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA -Organization-Email: support@dataverse.harvard.edu - -... using new JVM/MPCONFIG options: - -- dataverse.bagit.sourceorg.name -- dataverse.bagit.sourceorg.address -- dataverse.bagit.sourceorg.email - -Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. - -For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt From 05c53066ea26c809b6376051ff336f11a4bcee9d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 7 Dec 2023 10:29:47 -0500 Subject: [PATCH 370/546] mention download tmp file API #10151 --- doc/release-notes/6.1-release-notes.md | 1 + doc/release-notes/8760-download-tmp-file.md | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 doc/release-notes/8760-download-tmp-file.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 38a7a1064e6..1b4e884cded 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -73,6 +73,7 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. - getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection. - validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection. +- downloadTmpFile (/api/admin/downloadTmpFile): For testing purposes, allows files to be downloaded from /tmp. ### Extended the existing endpoints: - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. diff --git a/doc/release-notes/8760-download-tmp-file.md b/doc/release-notes/8760-download-tmp-file.md deleted file mode 100644 index 7623a91ac9a..00000000000 --- a/doc/release-notes/8760-download-tmp-file.md +++ /dev/null @@ -1,3 +0,0 @@ -A new API has been added for testing purposes that allows files to be downloaded from /tmp. - -See From 97c33218fa7224c544657e72f52c27d9cd8951bf Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 7 Dec 2023 10:30:23 -0500 Subject: [PATCH 371/546] remove duplicate "new" heading in API changelog #10151 --- doc/sphinx-guides/source/api/changelog.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index d2908533a14..910134e14f3 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -12,9 +12,6 @@ New ~~~ - **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`. - **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`. - -New -~~~ - **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`. - **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`. From 3a13ac8c56385ed2cc82bcc9db4f57fea7688a67 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 10:34:21 -0500 Subject: [PATCH 372/546] #10151 add upgrade instructions --- doc/release-notes/6.1-release-notes.md | 81 +++++++++++++++++++ .../9002_allow_direct_upload_setting.md | 5 -- 2 files changed, 81 insertions(+), 5 deletions(-) delete mode 100644 doc/release-notes/9002_allow_direct_upload_setting.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 38a7a1064e6..d5972338124 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -51,6 +51,13 @@ Previously, customization was possible by editing `Bundle.properties` but this i For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt +### Direct Upload setting added +A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. + +By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). + +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. + ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions @@ -138,6 +145,7 @@ to generate updated versions. See also #10060. - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. @@ -214,6 +222,79 @@ Upgrading requires a maintenance window and downtime. Please plan ahead, create These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. +0\. These instructions assume that you are upgrading from 6.0. If you are running an earlier version, the only safe way to upgrade is to progress through the upgrades to all the releases in between before attempting the upgrade to 5.14. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +In the following commands we assume that Payara 6 is installed in `/usr/local/payara6`. If not, adjust as needed. + +`export PAYARA=/usr/local/payara6` + +(or `setenv PAYARA /usr/local/payara6` if you are using a `csh`-like shell) + +1\. Undeploy the previous version. + +- `$PAYARA/bin/asadmin undeploy dataverse-6.0` + +2\. Stop Payara and remove the generated directory + +- `service payara stop` +- `rm -rf $PAYARA/glassfish/domains/domain1/generated` + +3\. Start Payara + +- `service payara start` + +4\. Deploy this version. + +- `$PAYARA/bin/asadmin deploy dataverse-6.1.war` + +5\. Restart Payara + +- `service payara stop` +- `service payara start` + +6\. Update Geospatial Metadata Block (to improve validation of bounding box values) + +- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv` + +6a\. Update Citation Metadata Block (to make Alternative Title repeatable) + +- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` + +7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). + +7a\. For installations without custom or experimental metadata blocks: + +- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script)) + +- Replace schema.xml + + - `cp /tmp/dvinstall/schema.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf` + +- Start Solr instance (usually `service solr start`, depending on Solr/OS) + +7b\. For installations with custom or experimental metadata blocks: + +- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script)) + +- There are 2 ways to regenerate the schema: Either by collecting the output of the Dataverse schema API and feeding it to the `update-fields.sh` script that we supply, as in the example below (modify the command lines as needed): +``` + wget https://raw.githubusercontent.com/IQSS/dataverse/master/conf/solr/9.3.0/update-fields.sh + chmod +x update-fields.sh + curl "http://localhost:8080/api/admin/index/solr/schema" | ./update-fields.sh /usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml +``` +OR, alternatively, you can edit the following line in your schema.xml by hand as follows (to indicate that alternative title is now `multiValued="true"`): +``` + +``` + +- Restart Solr instance (usually `service solr restart` depending on solr/OS) + +8\. Run ReExportAll to update dataset metadata exports. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/5.14/admin/metadataexport.html#batch-exports-through-the-api). + + ## Backward Incompatibilities - Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, diff --git a/doc/release-notes/9002_allow_direct_upload_setting.md b/doc/release-notes/9002_allow_direct_upload_setting.md deleted file mode 100644 index 1e76ed4ad47..00000000000 --- a/doc/release-notes/9002_allow_direct_upload_setting.md +++ /dev/null @@ -1,5 +0,0 @@ -A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. - -By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). - -With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. From a78213633e6f5bf345d1aedf4328eee5ee231ffb Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 10:43:40 -0500 Subject: [PATCH 373/546] #10151 remove notes previously incorporated --- .../9547-validation-for-geospatial-metadata.md | 9 --------- doc/release-notes/9859-ORE and Bag updates.md | 14 -------------- 2 files changed, 23 deletions(-) delete mode 100644 doc/release-notes/9547-validation-for-geospatial-metadata.md delete mode 100644 doc/release-notes/9859-ORE and Bag updates.md diff --git a/doc/release-notes/9547-validation-for-geospatial-metadata.md b/doc/release-notes/9547-validation-for-geospatial-metadata.md deleted file mode 100644 index a44e1a3732b..00000000000 --- a/doc/release-notes/9547-validation-for-geospatial-metadata.md +++ /dev/null @@ -1,9 +0,0 @@ -Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. - -For the "upgrade" steps section: - -Update Geospatial Metadata Block - -- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv` -- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv` - diff --git a/doc/release-notes/9859-ORE and Bag updates.md b/doc/release-notes/9859-ORE and Bag updates.md deleted file mode 100644 index dd3ae3bbbe1..00000000000 --- a/doc/release-notes/9859-ORE and Bag updates.md +++ /dev/null @@ -1,14 +0,0 @@ -Dataverse's OAI_ORE Metadata Export format and archival BagIT exports -(which include the OAI-ORE metadata export file) have been updated to include -information about the dataset version state, e.g. RELEASED or DEACCESSIONED -and to indicate which version of Dataverse was used to create the archival Bag. -As part of the latter, the current OAI_ORE Metadata format has been given a 1.0.0 -version designation and it is expected that any future changes to the OAI_ORE export -format will result in a version change and that tools such as DVUploader that can -recreate datasets from archival Bags will start indicating which version(s) of the -OAI_ORE format they can read. - -Dataverse installations that have been using archival Bags may wish to update any -existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse -[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) -to generate updated versions. \ No newline at end of file From b517f6e0fca1802faa4455522a72e711963714ba Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 10:53:07 -0500 Subject: [PATCH 374/546] #10151 S3 test notes --- doc/release-notes/6.1-release-notes.md | 2 ++ doc/release-notes/6783-s3-tests.md | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 doc/release-notes/6783-s3-tests.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 9a35a31a734..375717ab9c9 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -163,6 +163,8 @@ For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.ht - `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. - As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. +- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. +- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. ## OpenID Connect Authentication Provider Improvements diff --git a/doc/release-notes/6783-s3-tests.md b/doc/release-notes/6783-s3-tests.md deleted file mode 100644 index 1b9bb400cc6..00000000000 --- a/doc/release-notes/6783-s3-tests.md +++ /dev/null @@ -1,3 +0,0 @@ -Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. - -In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. From 07a8659b60acdb766fb5a4742cf4ac4537e34615 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 8 Dec 2023 14:24:24 -0500 Subject: [PATCH 375/546] #10151 remove duplicate release note out of band setting previously added --- doc/release-notes/6.1-release-notes.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 375717ab9c9..b6bb7d8b806 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -51,14 +51,6 @@ Previously, customization was possible by editing `Bundle.properties` but this i For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt -### Direct Upload setting added -A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. - -By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). - -With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. - - ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output From ed5b0dbde90fd4b8592aa2bdce7ae205482063c8 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Dec 2023 15:44:18 -0500 Subject: [PATCH 376/546] Apply suggestions from code review Co-authored-by: Philip Durbin --- doc/release-notes/10162-globus-support.md | 2 +- doc/sphinx-guides/source/developers/big-data-support.rst | 4 ++-- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md index 7bc3990f840..60670b5b101 100644 --- a/doc/release-notes/10162-globus-support.md +++ b/doc/release-notes/10162-globus-support.md @@ -1,7 +1,7 @@ Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) -- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incomatibilities. +- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities. - The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model - Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus) - The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index fe49f9f6150..8d891e63317 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -149,7 +149,7 @@ Globus File Transfer Note: Globus file transfer is still experimental but feedback is welcome! See :ref:`support`. -Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) +Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) and a community-developed `dataverse-globus `_ app has been properly installed and configured. Globus endpoints can be in a variety of places, from data centers to personal computers. @@ -168,7 +168,7 @@ Dataverse-managed endpoints must be Globus 'guest collections' hosted on either S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint. Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost). -With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionlity related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionlity normally associated with direct uploads to S3 is available.) +With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionality related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionality normally associated with direct uploads to S3 is available.) For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access). diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 5b2b6982866..37d80d0a6cd 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -71,7 +71,7 @@ The getDatasetMetadata and getFileListing URLs are just signed versions of the s If called for a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are -describe further below. +described further below. The call to set up for a transfer out (download) is similar: From 1d668970df1562c3cbc85d60be2abc55d8a96572 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 8 Dec 2023 15:56:27 -0500 Subject: [PATCH 377/546] #10151 standard guide links --- doc/release-notes/6.1-release-notes.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index b6bb7d8b806..24194a02026 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -49,14 +49,14 @@ Organization-Email: support@dataverse.harvard.edu Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. -For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt +For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output - when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. -This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide. ### The following API endpoints have been added: - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). @@ -128,13 +128,13 @@ Dataverse installations that have been using archival Bags may wish to update an existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) to generate updated versions. -- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews +- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. -- We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html +- We have started maintaining an API changelog: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) From 85206de08acb6a8373199fb0d4eec2768cb6763d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 8 Dec 2023 15:59:21 -0500 Subject: [PATCH 378/546] simply API changelog to be about breaking changes only #10151 --- doc/release-notes/6.1-release-notes.md | 2 +- doc/sphinx-guides/source/api/changelog.rst | 19 +++++-------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 24194a02026..a3b04749d68 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -134,7 +134,7 @@ to generate updated versions. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. -- We have started maintaining an API changelog: https://guides.dataverse.org/en/6.1/api/changelog.html +- We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 910134e14f3..20225b99b5c 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -1,5 +1,7 @@ -API Changelog -============= +API Changelog (Breaking Changes) +================================ + +This API changelog is experimental and we would love feedback on its usefulness. Its primary purpose is to inform API developers of any breaking changes. (We try not ship any backward incompatible changes, but it happens.) To see a list of new APIs and backward-compatible changes to existing API, please see each version's release notes at https://github.com/IQSS/dataverse/releases .. contents:: |toctitle| :local: @@ -8,20 +10,9 @@ API Changelog v6.1 ---- -New -~~~ -- **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`. -- **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`. -- **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`. -- **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`. - -Changes -~~~~~~~ -- **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`. +- The metadata field "Alternative Title" now supports multiple values so you must pass an array rather than a string when populating that field via API. See https://github.com/IQSS/dataverse/pull/9440 v6.0 ---- -Changes -~~~~~~~ - **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. See :doc:`dataaccess`. From 0cd87d167211ee6bc047de3cba3e79acfb520e28 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 8 Dec 2023 16:37:03 -0500 Subject: [PATCH 379/546] address Review comments --- .../source/admin/integrations.rst | 12 ++++++++ doc/sphinx-guides/source/api/intro.rst | 4 +++ .../source/developers/globus-api.rst | 6 ++-- .../source/installation/config.rst | 9 +++--- .../edu/harvard/iq/dataverse/DatasetPage.java | 29 ------------------- .../AbstractRemoteOverlayAccessIO.java | 2 +- 6 files changed, 25 insertions(+), 37 deletions(-) diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index 9a24cf0715c..db566106b49 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -121,6 +121,18 @@ Its goal is to make the dashboard adjustable for a Dataverse installation's need The integrations dashboard is currently in development. A preview and more information can be found at: `rdm-integration GitHub repository `_ +Globus +++++++ + +Globus transfer uses an efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files: + +* robust file transfer capable of restarting after network or endpoint failures +* third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation + +Users can transfer files via `Globus `_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) +and a community-developed `dataverse-globus `_ app has been properly installed and configured. + + Embedding Data on Websites -------------------------- diff --git a/doc/sphinx-guides/source/api/intro.rst b/doc/sphinx-guides/source/api/intro.rst index 6c61bb8c20d..8eb11798dd7 100755 --- a/doc/sphinx-guides/source/api/intro.rst +++ b/doc/sphinx-guides/source/api/intro.rst @@ -187,6 +187,10 @@ Lists of Dataverse APIs - Files - etc. +- :doc:`/developers/dataset-semantic-metadata-api`: For creating, reading, editing, and deleting dataset metadata using JSON-LD. +- :doc:`/developers/dataset-migration-api`: For migrating datasets from other repositories while retaining the original persistent identifiers and publication date. +- :doc:`/developers/s3-direct-upload-api`: For the transfer of larger files/larger numbers of files directly to an S3 bucket managed by Dataverse. +- :doc:`/developers/globus-api`: For the Globus transfer of larger files/larger numbers of files directly via Globus endpoints managed by Dataverse or referencing files in remote endpoints. - :doc:`metrics`: For query statistics about usage of a Dataverse installation. - :doc:`sword`: For depositing data using a standards-based approach rather than the :doc:`native-api`. diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 37d80d0a6cd..de9df06a798 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -160,11 +160,11 @@ In the managed case, once a Globus transfer has been initiated a final API call export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx export SERVER_URL=https://demo.dataverse.org export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV - export JSON_DATA="{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \ + export JSON_DATA='{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \ "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \ - {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}" + {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}' - curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA"" + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA" Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 4540219fc7c..f6c05a3bde8 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -499,8 +499,8 @@ Logging & Slow Performance .. _file-storage: -File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores and/or Globus Stores --------------------------------------------------------------------------------------------------------------------------- +File Storage +------------ By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara6/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\.directory`` JVM option described below. @@ -999,7 +999,8 @@ See :doc:`/developers/big-data-support` for additional information on how to use In addition to having the type "globus" and requiring a label, Globus Stores share many options with Trusted Remote Stores and options to specify and access a Globus endpoint(s). As with Remote Stores, Globus Stores also use a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). These and other available options are described in the table below. -There are two types of Globus stores +There are two types of Globus stores: + - managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse - remote - where Dataverse references files that remain on trusted remote Globus endpoints @@ -1024,7 +1025,7 @@ Once you have configured a globus store, it is recommended that you install the dataverse.files..globus-token A Globus token (base64 endcoded : for a managed store) - using a microprofile alias is recommended (none) dataverse.files..reference-endpoints-with-basepaths A comma separated list of *remote* trusted Globus endpoint id/s (none) - dataverse.files..files-not-accessible-by-dataverse ``true``/``false`` Should be true for S3 Connector-based *managed* stores ``false`` + dataverse.files..files-not-accessible-by-dataverse ``true``/``false`` Should be false for S3 Connector-based *managed* stores, true for others ``false`` ======================================================= ================== ========================================================================== =================== diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 9c7d599ba33..b79f387f20b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -5312,36 +5312,7 @@ public boolean isFileAccessRequestMultiButtonEnabled(){ } return false; } -/* These appear to be unused - toDo - delete - private Boolean downloadButtonAllEnabled = null; - public boolean isDownloadAllButtonEnabled() { - - if (downloadButtonAllEnabled == null) { - for (FileMetadata fmd : workingVersion.getFileMetadatas()) { - if (!this.fileDownloadHelper.canDownloadFile(fmd)) { - downloadButtonAllEnabled = false; - break; - } - } - downloadButtonAllEnabled = true; - } - return downloadButtonAllEnabled; - } - - public boolean isDownloadSelectedButtonEnabled(){ - - if( this.selectedFiles == null || this.selectedFiles.isEmpty() ){ - return false; - } - for (FileMetadata fmd : this.selectedFiles){ - if (this.fileDownloadHelper.canDownloadFile(fmd)){ - return true; - } - } - return false; - } -*/ public boolean isFileAccessRequestMultiSignUpButtonRequired(){ if (isSessionUserAuthenticated()){ return false; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java index 6c26502acfa..10ff68a56f3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -50,7 +50,7 @@ public abstract class AbstractRemoteOverlayAccessIO extends protected static final String REMOTE_STORE_URL = "remote-store-url"; // Whether Dataverse can access the file bytes - //Currently True for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits + // Currently False only for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits static final String FILES_NOT_ACCESSIBLE_BY_DATAVERSE = "files-not-accessible-by-dataverse"; protected StorageIO baseStore = null; From 9dd3f9785c6a5c8939bd9f023400f5f10c3ef58d Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 11 Dec 2023 09:28:16 +0000 Subject: [PATCH 380/546] Added: release notes for #10155 --- .../10155-datasets-can-download-at-least-one-file.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/10155-datasets-can-download-at-least-one-file.md diff --git a/doc/release-notes/10155-datasets-can-download-at-least-one-file.md b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md new file mode 100644 index 00000000000..566d505f7ca --- /dev/null +++ b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md @@ -0,0 +1,3 @@ +The getCanDownloadAtLeastOneFile (/api/datasets/{id}/versions/{versionId}/canDownloadAtLeastOneFile) endpoint has been created. + +This endpoint allows to know if the calling user can download at least one file of a particular dataset version. From 9fb44d3d45080a2e5c9de15ab0445cc052c956b3 Mon Sep 17 00:00:00 2001 From: GPortas Date: Mon, 11 Dec 2023 09:33:56 +0000 Subject: [PATCH 381/546] Added: docs for #10155 --- doc/sphinx-guides/source/api/native-api.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 56190dd342c..99438520120 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2686,6 +2686,19 @@ In particular, the user permissions that this API call checks, returned as boole curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/userPermissions" +Know if a User can download at least one File from a Dataset Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This API call allows to know if the calling user can download at least one file of a dataset version. + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export ID=24 + export VERSION=1.0 + + curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/versions/$VERSION/canDownloadAtLeastOneFile" + Files ----- From ca706662cd9f19b36d31530cf2747d810923ca3e Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 11 Dec 2023 11:06:36 -0500 Subject: [PATCH 382/546] bug fix - allowing S3 w/Globus config to work for download --- .../iq/dataverse/dataaccess/GlobusAccessibleStore.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java index e4d062f0619..8bed60d8302 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -65,7 +65,11 @@ public static String getGlobusToken(String storeId) { } public static boolean isGlobusAccessible(String storeId) { - if(StorageIO.getConfigParamForDriver(storeId, StorageIO.TYPE).equals(DataAccess.GLOBUS)) { + String type = StorageIO.getConfigParamForDriver(storeId, StorageIO.TYPE); + if (type.equals(DataAccess.GLOBUS)) { + return true; + } else if (type.equals(DataAccess.S3) + && StorageIO.getConfigParamForDriver(storeId, TRANSFER_ENDPOINT_WITH_BASEPATH) != null) { return true; } return false; From 09a227b30a2b5da05829297a9173952596e2df9c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 11 Dec 2023 11:12:04 -0500 Subject: [PATCH 383/546] Change docs tp make clear that an S3 store can be used --- doc/sphinx-guides/source/installation/config.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f6c05a3bde8..a7d7905ca4a 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1004,10 +1004,10 @@ There are two types of Globus stores: - managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse - remote - where Dataverse references files that remain on trusted remote Globus endpoints -For managed stores, there are two variants, connecting to standard/file-based Globus endpoints and to endpoints using an underlying S3 store via the Globus S3 Connector. +A managed Globus store connects to standard/file-based Globus endpoint. It is also possible to configure an S3 store as a managed store, if the managed endpoint uses an underlying S3 store via the Globus S3 Connector. With the former, Dataverse has no direct access to the file contents and functionality related to ingest, fixity hash validation, etc. are not available. With the latter, Dataverse can access files internally via S3 and the functionality supported is similar to that when using S3 direct upload. -Once you have configured a globus store, it is recommended that you install the `dataverse-globus app `_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support. +Once you have configured a globus store, or configured an S3 store for Globus access, it is recommended that you install the `dataverse-globus app `_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support. .. table:: :align: left From 44bd5b7fb6d697d356d857a73847e1637aaa5763 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 11 Dec 2023 11:19:46 -0500 Subject: [PATCH 384/546] add perf test results --- doc/release-notes/6.1-release-notes.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index a3b04749d68..b03a7a62baa 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -258,7 +258,7 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). +7\. Update Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). 7a\. For installations without custom or experimental metadata blocks: @@ -298,6 +298,10 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. +## Performance Testing Results +The results of performance testing can be found here: +https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit#gid=0 + ## Getting Help For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. From 173b8a7a067b392de8e1c900c3e1d9eb806c71d6 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 11 Dec 2023 11:25:44 -0500 Subject: [PATCH 385/546] fix backward comp Alternative Title --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index b03a7a62baa..5bc0df4640c 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -291,7 +291,7 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as ## Backward Incompatibilities -- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version +- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version. Alternative Title must now be passed as an array of strings rather than a single string ([alt title]) - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, ## Complete List of Changes From 1959f2ff22d9bbc4290a586fc49f1f49eccdbd04 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 11 Dec 2023 11:29:24 -0500 Subject: [PATCH 386/546] removed unneeded header --- doc/release-notes/6.1-release-notes.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 5bc0df4640c..6d3d1912f81 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -6,11 +6,8 @@ This release brings new features, enhancements, and bug fixes to the Dataverse s Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. ## Release Highlights (Major Upgrades, Breaking Changes) - This release contains major upgrades to core components. Detailed upgrade instructions can be found below. -## Detailed Release Highlights, New Features and Use Case Scenarios - ### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. From a4e25e17155896ae5c335ea8169229f248eaf22b Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 12:15:56 -0500 Subject: [PATCH 387/546] reorg 6.1 release notes, add globus #10151 --- doc/release-notes/6.1-release-notes.md | 262 +++++++++++++------------ 1 file changed, 137 insertions(+), 125 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 6d3d1912f81..475d4fc0887 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -5,57 +5,96 @@ Please note: To read these instructions in full, please go to https://github.com This release brings new features, enhancements, and bug fixes to the Dataverse software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. -## Release Highlights (Major Upgrades, Breaking Changes) -This release contains major upgrades to core components. Detailed upgrade instructions can be found below. +## Release highlights -### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started -Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). - The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. +### Guestbook at request + +Dataverse can now be configured (via the `dataverse.files.guestbook-at-request` option) to display any configured guestbook to users when they request restricted files (new functionality) or when they download files (previous behavior). + +The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default, showing guestbooks when files are downloaded, remains as it was in prior Dataverse versions. + +### Collection-level storage quotas + +This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. +Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of Dataverse 6.1. + +### Globus support + +Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible and for the case of referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Please note: + +- Globus functionality remains experimental/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities. +- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model. +- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus). +- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) + +Backward incompatibilities: +- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism +- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. + +New JVM options: +- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). +- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. + +Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used + +Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) + +### Alternative Title now allows multiple values + +Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below for details. + +### External tools: configure tools now available at the dataset level + +Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. + +### S3 out-of-band upload -### Dataverse installation can be now be configured to allow out-of-band upload In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. -### Alternative Title is made repeatable. -- One will need to update database with updated citation block. - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -- One will also need to update Solr schema: - Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml` - Reload Solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` +### JSON Schema for datasets -Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array. -For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) -### Collection Storage Size Quota Support --This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. -Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 +### OpenID Connect authentication provider improvements -### BagIT Export Configurations Updated -For BagIT export, it is now possible to configure the following information in bag-info.txt... +#### Using MicroProfile Config for provisioning -Source-Organization: Harvard Dataverse -Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA -Organization-Email: support@dataverse.harvard.edu +With this release it is possible to provision a single OIDC-based authentication provider +by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. -... using new JVM/MPCONFIG options: +If you are using an external OIDC provider component as an identity management system and/or broker +to other authentication providers such as Google, eduGain SAML and so on, this might make your +life easier during instance setups and reconfiguration. You no longer need to generate the +necessary JSON file. -- dataverse.bagit.sourceorg.name -- dataverse.bagit.sourceorg.address -- dataverse.bagit.sourceorg.email +#### Adding PKCE Support -Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. +[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) +Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable +support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) -For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt +### Solr improvements -### Improvements in the dataset versions API -- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions -- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output -- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. +As of this release, application-side support has been added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. -This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide. +Please see the "Installing Solr" section of the Installation Prerequisites guide. + +### New release of Dataverse Previewers (including a Markdown previewer) + +Version 1.4 of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. Please note: + +- SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. +- Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. +- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews + +### New or improved APIs + +The development of a [new UI for Dataverse](https://github.com/IQSS/dataverse-frontend) is driving the addition or improvement of many APIs. + +#### New API endpoints -### The following API endpoints have been added: - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - /api/files/{id}/downloadCount - /api/files/{id}/dataTables @@ -71,7 +110,33 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection. - downloadTmpFile (/api/admin/downloadTmpFile): For testing purposes, allows files to be downloaded from /tmp. -### Extended the existing endpoints: +#### Pagination of files in dataset versions + +- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions +- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output +- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. + +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide. + + +#### DataFile API payload has been extended to include the following fields + +- tabularData: Boolean field to know if the DataFile is of tabular type +- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) +- friendlyType: String + +#### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering + +- Access status: through the `accessStatus` query parameter, which supports the following values: + - Public + - Restricted + - EmbargoedThenRestricted + - EmbargoedThenPublic +- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. +- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". + +#### Additional improvements to existing API endpoints + - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. - getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain file counts. Added support for filtering by optional criteria query parameter: - contentType @@ -93,25 +158,21 @@ This parameter applies a filter criteria to the operation and supports the follo - Can delete the dataset draft - getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. -### DataFile API payload has been extended to include the following fields: -- tabularData: Boolean field to know if the DataFile is of tabular type -- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) -- friendlyType: String +### Improvements for developers -### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering -- Access status: through the `accessStatus` query parameter, which supports the following values: - - Public - - Restricted - - EmbargoedThenRestricted - - EmbargoedThenPublic -- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. -- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". +- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (with the Payara Platform Tools plugin). For details, see https://guides.dataverse.org/en/6.1/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. +- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. +- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. +- With this release, we add a new type of testing to Dataverse: integration tests which are not end-to-end tests (like our API tests). Starting with OIDC authentication support, we test regularly on CI for working condition of both OIDC login options in UI and API. +- The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. +- The support for setting JVM options during testing has been improved for developers. You now may add the `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. +- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. +## Major use cases and infrastructure enhancements -### Misc -- Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. -- Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). -The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. +Changes and fixes in this release not already mentioned above include: + +- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue #9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports (which include the OAI-ORE metadata export file) have been updated to include information about the dataset version state, e.g. RELEASED or DEACCESSIONED @@ -125,68 +186,18 @@ Dataverse installations that have been using archival Bags may wish to update an existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) to generate updated versions. -- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews +- For BagIT export, it is now possible to configure the following information in bag-info.txt. (Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.) For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt + - Source-Organization from `dataverse.bagit.sourceorg.name`. + - Organization-Address from `dataverse.bagit.sourceorg.address`. + - Organization-Email from `dataverse.bagit.sourceorg.address`. - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. - - We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. -- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) -- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. - -### Solr Improvements -- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. - -Please see the "Installing Solr" section of the Installation Prerequisites guide. - - -### Development -- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. -For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. -- A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. -- `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is -also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. -- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. -- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. -- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. - -## OpenID Connect Authentication Provider Improvements - -### Using MicroProfile Config For Provisioning - -With this release it is possible to provision a single OIDC-based authentication provider -by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. - -If you are using an external OIDC provider component as an identity management system and/or broker -to other authentication providers such as Google, eduGain SAML and so on, this might make your -life easier during instance setups and reconfiguration. You no longer need to generate the -necessary JSON file. - -### Adding PKCE Support -[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) -Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable -support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) - -## Improved Testing - -With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests -like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition -of both OIDC login options in UI and API. - -The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. - -The support for setting JVM options during testing has been improved for developers. You now may add the -`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is -also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. - -As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. - -## New Configuration Options +## New configuration options - dataverse.auth.oidc.enabled - dataverse.auth.oidc.client-id @@ -199,8 +210,24 @@ As part of these testing improvements, the code coverage report file for unit te - dataverse.auth.oidc.pkce.max-cache-size - dataverse.auth.oidc.pkce.max-cache-age - dataverse.files.{driverId}.upload-out-of-band +- dataverse.files.globus-cache-maxage - dataverse.files.guestbook-at-request +## Backward incompatibilities + +- Since Alternative Title is now repeatable, the JSON you send to create or edit a dataset must be an array rather than a simple string. For example, instead of "value": "Alternative Title", you must send "value": ["Alternative Title1", "Alternative Title2"] +- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility. See above for details. +- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new JVM options `dataverse.bagit.sourceorg.name`, `dataverse.bagit.sourceorg.address`, and `dataverse.bagit.sourceorg.email`. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt +- See "Globus support" above for backward incompatibilies specific to Globus. + +## Complete list of changes + +For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. + +## Getting help + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. + ## Installation If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! @@ -209,7 +236,7 @@ Once you are in production, we would be delighted to update our [map of Datavers You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC). -## Upgrade Instructions +## Upgrade instructions Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. @@ -241,6 +268,8 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa - `$PAYARA/bin/asadmin deploy dataverse-6.1.war` +As noted above, deployment of the war file might take several minutes due a database migration script required for the new storage quotas feature. + 5\. Restart Payara - `service payara stop` @@ -255,7 +284,7 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -7\. Update Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). +7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). 7a\. For installations without custom or experimental metadata blocks: @@ -285,20 +314,3 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as - Restart Solr instance (usually `service solr restart` depending on solr/OS) 8\. Run ReExportAll to update dataset metadata exports. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/5.14/admin/metadataexport.html#batch-exports-through-the-api). - - -## Backward Incompatibilities -- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version. Alternative Title must now be passed as an array of strings rather than a single string ([alt title]) -- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, - -## Complete List of Changes - -For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. - -## Performance Testing Results -The results of performance testing can be found here: -https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit#gid=0 - -## Getting Help - -For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. From 011b9291e6f694631d237bd047c3a170e6e93a2e Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 13:58:08 -0500 Subject: [PATCH 388/546] remove globus snippet (already added) #10151 --- doc/release-notes/10162-globus-support.md | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 doc/release-notes/10162-globus-support.md diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md deleted file mode 100644 index 60670b5b101..00000000000 --- a/doc/release-notes/10162-globus-support.md +++ /dev/null @@ -1,19 +0,0 @@ -Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, -and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. -Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) -- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities. -- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model -- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus) -- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) - -Backward Incompatibilities: -- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism -- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. - -New JVM Options: -- A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). -- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. - - - -Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used From 3e32f42959dce41e9c21c9e2285fdf719b048dc0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 14:57:52 -0500 Subject: [PATCH 389/546] link to guides in more places, other tweaks #10151 --- doc/release-notes/6.1-release-notes.md | 43 +++++++++++++------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 475d4fc0887..fab11ce4959 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -13,12 +13,17 @@ Dataverse can now be configured (via the `dataverse.files.guestbook-at-request` The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default, showing guestbooks when files are downloaded, remains as it was in prior Dataverse versions. +For details, see [dataverse.files.guestbook-at-request](https://guides.dataverse.org/en/6.1/installation/config.html#dataverse-files-guestbook-at-request) and PR #9599. + ### Collection-level storage quotas This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. + Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of Dataverse 6.1. -### Globus support +For details, see [Storage Quotas for Collections](https://guides.dataverse.org/en/6.1/admin/collectionquotas.html) in the Admin Guide. + +### Globus support (experimental), continued Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible and for the case of referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Please note: @@ -32,54 +37,50 @@ Backward incompatibilities: - The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. New JVM options: -- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). +- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage](https://guides.dataverse.org/en/6.1/installation/config.html#file-storage) section of the Installation Guide. - dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used -Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) +Further details can be found in the [Big Data Support](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) section of the Developer Guide. ### Alternative Title now allows multiple values -Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below for details. +Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below and PR #9440 for details. ### External tools: configure tools now available at the dataset level -Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. +Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See [External Tools](https://guides.dataverse.org/en/6.1/admin/external-tools.html#dataset-level-configure-tools) in the Admin Guide and PR #9925. ### S3 out-of-band upload In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). -With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://guides.dataverse.org/en/6.1/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://guides.dataverse.org/en/6.1/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. ### JSON Schema for datasets -Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.1/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10109. -### OpenID Connect authentication provider improvements +### OpenID Connect (OIDC) improvements #### Using MicroProfile Config for provisioning -With this release it is possible to provision a single OIDC-based authentication provider -by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. +With this release it is possible to provision a single OIDC-based authentication provider by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. -If you are using an external OIDC provider component as an identity management system and/or broker -to other authentication providers such as Google, eduGain SAML and so on, this might make your -life easier during instance setups and reconfiguration. You no longer need to generate the -necessary JSON file. +If you are using an external OIDC provider component as an identity management system and/or broker to other authentication providers such as Google, eduGain SAML and so on, this might make your life easier during instance setups and reconfiguration. You no longer need to generate the necessary JSON file. #### Adding PKCE Support -[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) -Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable -support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) +Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) + +For both features, see the [OIDC](https://guides.dataverse.org/en/6.0/installation/oidc.html) section of the Installation Guide and PR #9273. ### Solr improvements As of this release, application-side support has been added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. -Please see the "Installing Solr" section of the Installation Prerequisites guide. +Please see the [Installing Solr](https://guides.dataverse.org/en/6.1/installation/prerequisites.html#installing-solr) section of the Installation Guide. ### New release of Dataverse Previewers (including a Markdown previewer) @@ -87,7 +88,7 @@ Version 1.4 of the standard Dataverse Previewers from https://github/com/gdcc/da - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. -- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews +- There is now a [Markdown (.md)](https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews) previewer. ### New or improved APIs @@ -172,7 +173,7 @@ This parameter applies a filter criteria to the operation and supports the follo Changes and fixes in this release not already mentioned above include: -- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue #9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. +- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. See PR #10142. - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports (which include the OAI-ORE metadata export file) have been updated to include information about the dataset version state, e.g. RELEASED or DEACCESSIONED @@ -184,7 +185,7 @@ recreate datasets from archival Bags will start indicating which version(s) of t OAI_ORE format they can read. Dataverse installations that have been using archival Bags may wish to update any existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse -[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) +[archival Bag export API](https://guides.dataverse.org/en/6.1/installation/config.html#bagit-export-api-calls) to generate updated versions. - For BagIT export, it is now possible to configure the following information in bag-info.txt. (Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.) For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt - Source-Organization from `dataverse.bagit.sourceorg.name`. From 92a298da25c03822c848e5a43253f039193665f9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 15:42:55 -0500 Subject: [PATCH 390/546] add missing new config options and sort #10151 --- doc/release-notes/6.1-release-notes.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index fab11ce4959..1e09a207104 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -200,25 +200,28 @@ See also #10060. ## New configuration options -- dataverse.auth.oidc.enabled +- dataverse.auth.oidc.auth-server-url - dataverse.auth.oidc.client-id - dataverse.auth.oidc.client-secret -- dataverse.auth.oidc.auth-server-url +- dataverse.auth.oidc.enabled - dataverse.auth.oidc.pkce.enabled +- dataverse.auth.oidc.pkce.max-cache-age +- dataverse.auth.oidc.pkce.max-cache-size - dataverse.auth.oidc.pkce.method -- dataverse.auth.oidc.title - dataverse.auth.oidc.subtitle -- dataverse.auth.oidc.pkce.max-cache-size -- dataverse.auth.oidc.pkce.max-cache-age -- dataverse.files.{driverId}.upload-out-of-band +- dataverse.auth.oidc.title +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.name - dataverse.files.globus-cache-maxage - dataverse.files.guestbook-at-request +- dataverse.files.{driverId}.upload-out-of-band ## Backward incompatibilities - Since Alternative Title is now repeatable, the JSON you send to create or edit a dataset must be an array rather than a simple string. For example, instead of "value": "Alternative Title", you must send "value": ["Alternative Title1", "Alternative Title2"] - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility. See above for details. -- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new JVM options `dataverse.bagit.sourceorg.name`, `dataverse.bagit.sourceorg.address`, and `dataverse.bagit.sourceorg.email`. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt +- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new `dataverse.bagit` JVM options mentioned above. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt - See "Globus support" above for backward incompatibilies specific to Globus. ## Complete list of changes From 80634c7a59d7bfce4ab0e871d80d34f446579123 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 15:54:30 -0500 Subject: [PATCH 391/546] address feedback from review #9919 --- doc/sphinx-guides/source/developers/performance.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/developers/performance.rst b/doc/sphinx-guides/source/developers/performance.rst index aa50cd6e40c..46c152f322e 100644 --- a/doc/sphinx-guides/source/developers/performance.rst +++ b/doc/sphinx-guides/source/developers/performance.rst @@ -116,12 +116,12 @@ We'd like to rate limit commands (CreateDataset, etc.) so that we can keep them Solr ~~~~ -While in the past Solr performance hasn't been much of a concern, in recent years we've noticed performance problems when Harvard Dataverse is under load. We are investigating in `#9635 `_. +While in the past Solr performance hasn't been much of a concern, in recent years we've noticed performance problems when Harvard Dataverse is under load. Improvements were made in `PR #10050 `_, for example. Datasets with Large Numbers of Files or Versions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -We'd like to scale Dataverse to better handle large number of files or versions (`#9763 `_). +We'd like to scale Dataverse to better handle large number of files or versions. Progress was made in `PR #9883 `_. Withstanding Bots ~~~~~~~~~~~~~~~~~ @@ -183,7 +183,7 @@ Most likely there is training available that is oriented toward performance. The Learn from the Community How They Monitor Performance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Some members of the Dataverse community are likely users of newish tools like the ELK stack (Elasticsearch, Logstash, and Kibana), the TICK stack (Telegraph InfluxDB Chronograph and Kapacitor), GoAccess, Prometheus, Graphite, and more we haven't even heard of. In the :doc:`/admin/monitoring` section of the Admin Guide, we already encourage the community to share findings (, but we could dedicate time to this topic at our annual meeting or community calls. +Some members of the Dataverse community are likely users of newish tools like the ELK stack (Elasticsearch, Logstash, and Kibana), the TICK stack (Telegraph InfluxDB Chronograph and Kapacitor), GoAccess, Prometheus, Graphite, and more we haven't even heard of. In the :doc:`/admin/monitoring` section of the Admin Guide, we already encourage the community to share findings, but we could dedicate time to this topic at our annual meeting or community calls. Teach the Community to Do Performance Testing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 3d6343eca2846edca97e4d9699f3305fb7c19c62 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 16:09:46 -0500 Subject: [PATCH 392/546] mention configurable docroot #10151 --- doc/release-notes/6.1-release-notes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 1e09a207104..1279d09a023 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -195,6 +195,7 @@ to generate updated versions. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. +- It's now possible to configure the docroot, which holds collection logos and more. See [dataverse.files.docroot](https://guides.dataverse.org/en/6.1/installation/config.html#dataverse-files-docroot) in the Installation Guide and PR #9819. - We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. @@ -213,6 +214,7 @@ See also #10060. - dataverse.bagit.sourceorg.address - dataverse.bagit.sourceorg.address - dataverse.bagit.sourceorg.name +- dataverse.files.docroot - dataverse.files.globus-cache-maxage - dataverse.files.guestbook-at-request - dataverse.files.{driverId}.upload-out-of-band From fa32ef5a413f6b0fbfab7d6e96e602a31bc18ac4 Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Tue, 12 Dec 2023 11:36:52 +0000 Subject: [PATCH 393/546] Update doc/sphinx-guides/source/api/native-api.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/api/native-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 99438520120..1e86f24356b 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2686,7 +2686,7 @@ In particular, the user permissions that this API call checks, returned as boole curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/userPermissions" -Know if a User can download at least one File from a Dataset Version +Know If a User Can Download at Least One File from a Dataset Version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This API call allows to know if the calling user can download at least one file of a dataset version. From 476977b48925ae6eae4dabf69b0de0d7d40d6841 Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Tue, 12 Dec 2023 11:37:01 +0000 Subject: [PATCH 394/546] Update doc/sphinx-guides/source/api/native-api.rst Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/api/native-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 1e86f24356b..9ceeb4410ef 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2689,7 +2689,7 @@ In particular, the user permissions that this API call checks, returned as boole Know If a User Can Download at Least One File from a Dataset Version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This API call allows to know if the calling user can download at least one file of a dataset version. +This API endpoint indicates if the calling user can download at least one file from a dataset version. Note that Shibboleth group permissions are not considered. .. code-block:: bash From 64861afbc11c4475ca3d85e729f4b73e962d5efa Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Tue, 12 Dec 2023 11:37:36 +0000 Subject: [PATCH 395/546] Update doc/release-notes/10155-datasets-can-download-at-least-one-file.md Co-authored-by: Philip Durbin --- .../10155-datasets-can-download-at-least-one-file.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/10155-datasets-can-download-at-least-one-file.md b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md index 566d505f7ca..a0b0d02310a 100644 --- a/doc/release-notes/10155-datasets-can-download-at-least-one-file.md +++ b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md @@ -1,3 +1,3 @@ The getCanDownloadAtLeastOneFile (/api/datasets/{id}/versions/{versionId}/canDownloadAtLeastOneFile) endpoint has been created. -This endpoint allows to know if the calling user can download at least one file of a particular dataset version. +This API endpoint indicates if the calling user can download at least one file from a dataset version. Note that Shibboleth group permissions are not considered. From 39e4bcee0f164854301b45f0ba6cbd4e11b4cf5c Mon Sep 17 00:00:00 2001 From: GPortas Date: Tue, 12 Dec 2023 13:42:46 +0000 Subject: [PATCH 396/546] Fixed: minio storage volume mapping --- docker-compose-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 5265a6b7c2d..6f8decc0dfb 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -209,7 +209,7 @@ services: networks: - dataverse volumes: - - minio_storage:/data + - ./docker-dev-volumes/minio_storage:/data environment: MINIO_ROOT_USER: 4cc355_k3y MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y From 0c279adc3e93bd09bedc08a3f1bda48876fc1de3 Mon Sep 17 00:00:00 2001 From: GPortas Date: Tue, 12 Dec 2023 13:50:08 +0000 Subject: [PATCH 397/546] Removed: sleep calls from testGetCanDownloadAtLeastOneFile IT --- .../java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index b2cf5c75467..f36b93b85ab 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -4225,7 +4225,7 @@ public void testGetGlobusUploadParameters() { } @Test - public void testGetCanDownloadAtLeastOneFile() throws InterruptedException { + public void testGetCanDownloadAtLeastOneFile() { Response createUserResponse = UtilIT.createRandomUser(); createUserResponse.then().assertThat().statusCode(OK.getStatusCode()); String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); @@ -4252,9 +4252,6 @@ public void testGetCanDownloadAtLeastOneFile() throws InterruptedException { Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); - // Make sure the dataset is published - Thread.sleep(3000); - // Create a second user to call the getCanDownloadAtLeastOneFile method Response createSecondUserResponse = UtilIT.createRandomUser(); createSecondUserResponse.then().assertThat().statusCode(OK.getStatusCode()); @@ -4275,9 +4272,6 @@ public void testGetCanDownloadAtLeastOneFile() throws InterruptedException { publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); - // Make sure the dataset is published - Thread.sleep(3000); - // Call with a valid dataset id when a file is restricted and the user does not have access canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); From 960a20c79dc8a3292ff3d26973d8e35d8a4f481c Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 12 Dec 2023 14:06:21 -0500 Subject: [PATCH 398/546] #10168 fix error response status --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index b3bfc476423..05355cbbc68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4288,7 +4288,7 @@ public Response getDatasetVersionArchivalStatus(@Context ContainerRequestContext headers); if (dsv.getArchivalCopyLocation() == null) { - return error(Status.NO_CONTENT, "This dataset version has not been archived"); + return error(Status.NOT_FOUND, "This dataset version has not been archived"); } else { JsonObject status = JsonUtil.getJsonObject(dsv.getArchivalCopyLocation()); return ok(status); From 40e5d39c73ec2097fb16d65e8fff33078168498b Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 12 Dec 2023 14:53:45 -0500 Subject: [PATCH 399/546] how to test Docker images made during a release --- .../source/developers/making-releases.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index 23c4773a06e..432b4ca2672 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -67,6 +67,19 @@ Once important tests have passed (compile, unit tests, etc.), merge the pull req If this is a hotfix release, skip this whole "merge develop to master" step (the "develop" branch is not involved until later). +(Optional) Test Docker Images +----------------------------- + +After the "master" branch has been updated and the GitHub Action to build and push Docker images has run (see `PR #9776 `_), go to https://hub.docker.com/u/gdcc and make sure the "alpha" tag for the following images has been updated: + +- https://hub.docker.com/r/gdcc/base +- https://hub.docker.com/r/gdcc/dataverse +- https://hub.docker.com/r/gdcc/configbaker + +To test these images against our API test suite, go to the "alpha" workflow at https://github.com/gdcc/api-test-runner/actions/workflows/alpha.yml and run it. + +If there are failures, additional dependencies or settings may have been added to the "develop" workflow. Copy them over and try again. + Build the Guides for the Release -------------------------------- From a240bd0fa81cc4a9db0cc9c8ddb37ad733324fcd Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Tue, 12 Dec 2023 15:20:07 -0500 Subject: [PATCH 400/546] bump htmlunit to 3.9.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 34b0ad2e835..d690e509f46 100644 --- a/pom.xml +++ b/pom.xml @@ -650,7 +650,7 @@ org.htmlunit htmlunit - 3.2.0 + 3.9.0 test From b1f15bb95ff58dd62c7aaa1a2ababa1f44b83881 Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Tue, 12 Dec 2023 15:30:54 -0500 Subject: [PATCH 401/546] bump DuraCloud to 8.0.0 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 34b0ad2e835..be4fa605aab 100644 --- a/pom.xml +++ b/pom.xml @@ -466,7 +466,7 @@ org.duracloud common - 7.1.1 + 8.0.0 org.slf4j @@ -481,7 +481,7 @@ org.duracloud storeclient - 7.1.1 + 8.0.0 org.slf4j From daf89261174600b1db106974cc941213fa0b36bd Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 12 Dec 2023 15:37:27 -0500 Subject: [PATCH 402/546] #10168 update integration tests --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 928574eb82b..7efd44b9533 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3291,7 +3291,8 @@ public void testArchivalStatusAPI() throws IOException { //Verify the status is empty Response nullStatus = UtilIT.getDatasetVersionArchivalStatus(datasetId, "1.0", apiToken); - nullStatus.then().assertThat().statusCode(NO_CONTENT.getStatusCode()); + nullStatus.prettyPrint(); + nullStatus.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); //Set it Response setStatus = UtilIT.setDatasetVersionArchivalStatus(datasetId, "1.0", apiToken, "pending", @@ -3309,7 +3310,7 @@ public void testArchivalStatusAPI() throws IOException { //Make sure it's gone Response nullStatus2 = UtilIT.getDatasetVersionArchivalStatus(datasetId, "1.0", apiToken); - nullStatus2.then().assertThat().statusCode(NO_CONTENT.getStatusCode()); + nullStatus2.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); } From 2ce0fb8f083ef8dfedfb71feea0d58ff2f9c7647 Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Tue, 12 Dec 2023 16:06:52 -0500 Subject: [PATCH 403/546] bump google.cloud.version to 0.209.0 --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 7b305cad581..25d714b39ed 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -152,7 +152,7 @@ 42.6.0 9.3.0 1.12.290 - 0.177.0 + 0.209.0 8.0.0 From 349f7dbcaaaf260c00126567f9f4c6d32b0c367c Mon Sep 17 00:00:00 2001 From: sbondka Date: Wed, 13 Dec 2023 15:31:31 +0100 Subject: [PATCH 404/546] Add presentation link --- doc/sphinx-guides/source/admin/integrations.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index ed3860a9ca1..53a663b942e 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -193,7 +193,7 @@ This connector facilitates seamless two-way transfer of datasets and files, emph It is a lightweight client-side web application built using React and relying on the Dataverse External Tool feature, allowing for easy deployment on modern integration systems. Currently, it supports small to medium-sized files, with plans to enable support for large files and signed Dataverse endpoints in the future. What kind of user is the feature intended for? -The feature is intended for researchers, scientists and data analyst who are working with Dataverse instances and JupyterHub looking to ease the data transfer process. +The feature is intended for researchers, scientists and data analyst who are working with Dataverse instances and JupyterHub looking to ease the data transfer process. See `presentation `_ for details. .. _integrations-discovery: From ea644b89a3149ff8599fe3fcaa3a2bf6f5804e71 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 13 Dec 2023 14:16:47 -0500 Subject: [PATCH 405/546] add "message sent" success message #2638 --- src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java | 2 ++ src/main/java/propertyFiles/Bundle.properties | 1 + src/main/webapp/contactFormFragment.xhtml | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java index 6be768321c4..68912969003 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java +++ b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.Optional; @@ -217,6 +218,7 @@ public String sendMessage() { } logger.fine("sending feedback: " + feedback); mailService.sendMail(feedback.getFromEmail(), feedback.getToEmail(), feedback.getCcEmail(), feedback.getSubject(), feedback.getBody()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("contact.sent")); return null; } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 10576c0c116..0c6ce979a94 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -184,6 +184,7 @@ contact.context.file.intro={0}\n\nYou have just been sent the following message contact.context.file.ending=\n\n---\n\n{0}\n{1}\n\nGo to file {2}/file.xhtml?fileId={3}\n\nYou received this email because you have been listed as a contact for the dataset. If you believe this was an error, please contact {4} at {5}. To respond directly to the individual who sent the message, simply reply to this email. contact.context.support.intro={0},\n\nThe following message was sent from {1}.\n\n---\n\n contact.context.support.ending=\n\n---\n\nMessage sent from Support contact form. +contact.sent=Message sent. # dataverseuser.xhtml account.info=Account Information diff --git a/src/main/webapp/contactFormFragment.xhtml b/src/main/webapp/contactFormFragment.xhtml index cb4eb3d0872..8950ec5acf8 100644 --- a/src/main/webapp/contactFormFragment.xhtml +++ b/src/main/webapp/contactFormFragment.xhtml @@ -81,7 +81,7 @@
    + update="@form,messagePanel" oncomplete="if (args && !args.validationFailed) PF('contactForm').hide();" actionListener="#{sendFeedbackDialog.sendMessage}">
    From 057d1b926513a4716737a4b766a8fb46e709d44e Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 4 Jan 2024 09:05:21 -0500 Subject: [PATCH 436/546] add docker compose config to get HarvestingServerIT to pass #9275 --- docker-compose-dev.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 6f8decc0dfb..ce9f39a418a 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -19,6 +19,9 @@ services: DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test DATAVERSE_JSF_REFRESH_PERIOD: "1" + # to get HarvestingServerIT to pass + dataverse_oai_server_maxidentifiers: "2" + dataverse_oai_server_maxrecords: "2" JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 -Ddataverse.files.file1.type=file -Ddataverse.files.file1.label=Filesystem From 37d3d41a51867758cac611215f830ad2af1d31a1 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 4 Jan 2024 09:11:41 -0500 Subject: [PATCH 437/546] assert 500 error when invalid query params are passed #9275 --- .../harvard/iq/dataverse/api/HarvestingServerIT.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index e02964ef28f..07788eca6db 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -860,7 +860,16 @@ public void testMultiRecordOaiSet() throws InterruptedException { logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); assertEquals(200, deleteResponse.getStatusCode(), "Failed to delete the control multi-record set"); } - + + @Test + public void testInvalidQueryParams() { + // "foo" is not a valid verb + String oaiVerbPath = "/oai?foo=bar"; + Response identifyResponse = given().get(oaiVerbPath); + // TODO Why is this 500? https://github.com/IQSS/dataverse/issues/9275 + identifyResponse.then().assertThat().statusCode(500); + } + // TODO: // What else can we test? // Some ideas: From 2ab5ba99a357fa88f44fe72201f827cb26cff448 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 4 Jan 2024 10:50:15 -0500 Subject: [PATCH 438/546] #9686 update migration script --- ...gclient-id.sql => V6.1.0.1__9686-move-harvestingclient-id.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V6.0.0.5__9686-move-harvestingclient-id.sql => V6.1.0.1__9686-move-harvestingclient-id.sql} (100%) diff --git a/src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql similarity index 100% rename from src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql rename to src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql From 27fa15458cf9d68192a3e0eed53f43371990de8e Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 4 Jan 2024 16:21:16 -0500 Subject: [PATCH 439/546] show errors (in XML) for verb params #9275 --- .../9275-harvest-invalid-query-params.md | 4 +++ .../server/web/servlet/OAIServlet.java | 18 ++++++++-- .../iq/dataverse/api/HarvestingServerIT.java | 34 ++++++++++++++++--- 3 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 doc/release-notes/9275-harvest-invalid-query-params.md diff --git a/doc/release-notes/9275-harvest-invalid-query-params.md b/doc/release-notes/9275-harvest-invalid-query-params.md new file mode 100644 index 00000000000..33d7c7bac13 --- /dev/null +++ b/doc/release-notes/9275-harvest-invalid-query-params.md @@ -0,0 +1,4 @@ +OAI-PMH error handling has been improved to display a machine-readable error in XML rather than a 500 error with no further information. + +- /oai?foo=bar will show "No argument 'verb' found" +- /oai?verb=foo&verb=bar will show "Verb must be singular, given: '[foo, bar]'" diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 96a19acc0e8..34152a2d8bd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -31,8 +31,11 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import io.gdcc.xoai.exceptions.BadArgumentException; +import io.gdcc.xoai.exceptions.BadVerbException; import io.gdcc.xoai.exceptions.OAIException; import io.gdcc.xoai.model.oaipmh.Granularity; +import io.gdcc.xoai.model.oaipmh.verbs.Verb; import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat; import org.apache.commons.lang3.StringUtils; @@ -256,9 +259,18 @@ private void processRequest(HttpServletRequest httpServletRequest, HttpServletRe "Sorry. OAI Service is disabled on this Dataverse node."); return; } - - RawRequest rawRequest = RequestBuilder.buildRawRequest(httpServletRequest.getParameterMap()); - + + RawRequest rawRequest = null; + try { + rawRequest = RequestBuilder.buildRawRequest(httpServletRequest.getParameterMap()); + } catch (BadVerbException bve) { + // Verb.Type is required. Hard-code one. + rawRequest = new RawRequest(Verb.Type.Identify); + // Ideally, withError would accept a BadVerbException. + BadArgumentException bae = new BadArgumentException(bve.getLocalizedMessage()); + rawRequest.withError(bae); + } + OAIPMH handle = dataProvider.handle(rawRequest); response.setContentType("text/xml;charset=UTF-8"); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 07788eca6db..3936a240826 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -863,11 +863,35 @@ public void testMultiRecordOaiSet() throws InterruptedException { @Test public void testInvalidQueryParams() { - // "foo" is not a valid verb - String oaiVerbPath = "/oai?foo=bar"; - Response identifyResponse = given().get(oaiVerbPath); - // TODO Why is this 500? https://github.com/IQSS/dataverse/issues/9275 - identifyResponse.then().assertThat().statusCode(500); + + // The query parameter "verb" must appear. + Response noVerbArg = given().get("/oai?foo=bar"); + noVerbArg.prettyPrint(); + noVerbArg.then().assertThat() + .statusCode(OK.getStatusCode()) + // This should be "badVerb" + .body("oai.error.@code", equalTo("badArgument")) + .body("oai.error", equalTo("No argument 'verb' found")); + + // The query parameter "verb" cannot appear more than once. + Response repeated = given().get( "/oai?verb=foo&verb=bar"); + repeated.prettyPrint(); + repeated.then().assertThat() + .statusCode(OK.getStatusCode()) + // This should be "badVerb" + .body("oai.error.@code", equalTo("badArgument")) + .body("oai.error", equalTo("Verb must be singular, given: '[foo, bar]'")); + + } + + @Test + public void testNoSuchSetError() { + Response noSuchSet = given().get("/oai?verb=ListIdentifiers&set=census&metadataPrefix=dc"); + noSuchSet.prettyPrint(); + noSuchSet.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("oai.error.@code", equalTo("noRecordsMatch")) + .body("oai.error", equalTo("Requested set 'census' does not exist")); } // TODO: From 6db3e3b9c64a0163c52b3cf988669d9bfd3a919f Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Thu, 4 Jan 2024 16:42:16 -0500 Subject: [PATCH 440/546] Fix for "latest" dataset version --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- .../impl/GetLatestAccessibleDatasetVersionCommand.java | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 094f2b88c92..83b1a4e861b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2796,7 +2796,7 @@ private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String @Override public Command handleLatest() { - return new GetLatestAccessibleDatasetVersionCommand(req, ds, includeDeaccessioned); + return new GetLatestAccessibleDatasetVersionCommand(req, ds, includeDeaccessioned, checkPermsWhenDeaccessioned); } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java index 96e8ee73a50..7bcc851bde2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java @@ -25,15 +25,17 @@ public class GetLatestAccessibleDatasetVersionCommand extends AbstractCommand { private final Dataset ds; private final boolean includeDeaccessioned; + private boolean checkPerms; public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) { - this(aRequest, anAffectedDataset, false); + this(aRequest, anAffectedDataset, false, false); } - public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned) { + public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPerms) { super(aRequest, anAffectedDataset); ds = anAffectedDataset; this.includeDeaccessioned = includeDeaccessioned; + this.checkPerms = checkPerms; } @Override @@ -41,6 +43,6 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { if (ds.getLatestVersion().isDraft() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.ViewUnpublishedDataset)) { return ctxt.engine().submit(new GetDraftDatasetVersionCommand(getRequest(), ds)); } - return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned, true)); + return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned, checkPerms)); } } From d017bf6843189a0228ff1be229614ba7685fcf0b Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 5 Jan 2024 11:48:00 -0500 Subject: [PATCH 441/546] #9686 assign harvest client id to harvested files --- .../harvard/iq/dataverse/api/imports/ImportServiceBean.java | 5 +++++ .../harvest/client/HarvestingClientServiceBean.java | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index c17ba909230..c5812403f31 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -332,6 +332,11 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId().asString()); + //adding the harvesting client id to harvested files #9686 + for (DataFile df : ds.getFiles()){ + df.setHarvestedFrom(harvestingClient); + } + if (existingDs != null) { // If this dataset already exists IN ANOTHER DATAVERSE // we are just going to skip it! diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java index 7ec6d75a41c..5747c64d217 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -199,8 +199,8 @@ public void recordHarvestJobStatus(Long hcId, Date finishTime, int harvestedCoun public Long getNumberOfHarvestedDatasetsByAllClients() { try { - return (Long) em.createNativeQuery("SELECT count(d.id) FROM dataset d " - + " WHERE d.harvestingclient_id IS NOT NULL").getSingleResult(); + return (Long) em.createNativeQuery("SELECT count(d.id) FROM dvobject d " + + " WHERE d.harvestingclient_id IS NOT NULL and d.dtype = 'Dataset'").getSingleResult(); } catch (Exception ex) { logger.info("Warning: exception looking up the total number of harvested datasets: " + ex.getMessage()); From e085ca926274a4688faeb61f842c319ffc41b538 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 5 Jan 2024 15:27:06 -0500 Subject: [PATCH 442/546] Adds test to cover latest, latest published and specific scenarios. --- .../harvard/iq/dataverse/api/DatasetsIT.java | 302 +++++++++++++++--- 1 file changed, 249 insertions(+), 53 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 200cfbaf1ff..9ac05ce5704 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -70,6 +70,7 @@ import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.CoreMatchers.not; import static org.hamcrest.CoreMatchers.startsWith; import static org.hamcrest.CoreMatchers.nullValue; import static org.hamcrest.Matchers.contains; @@ -613,6 +614,7 @@ public void testCreatePublishDestroyDataset() { */ @Test public void testDatasetVersionsAPI() { + // Create user String apiToken = UtilIT.createRandomUserGetToken(); @@ -650,6 +652,11 @@ public void testDatasetVersionsAPI() { .statusCode(OK.getStatusCode()) .body("data.files", equalTo(null)); + unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiTokenNoPerms, excludeFiles, false); + unpublishedDraft.prettyPrint(); + unpublishedDraft.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()); + excludeFiles = false; unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, excludeFiles, false); unpublishedDraft.prettyPrint(); @@ -657,7 +664,11 @@ public void testDatasetVersionsAPI() { .statusCode(OK.getStatusCode()) .body("data.files.size()", equalTo(1)); - + unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiTokenNoPerms, excludeFiles, false); + unpublishedDraft.prettyPrint(); + unpublishedDraft.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()); + // Publish collection and dataset UtilIT.publishDataverseViaNativeApi(collectionAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode()); @@ -680,7 +691,8 @@ public void testDatasetVersionsAPI() { .body("data.size()", equalTo(2)) .body("data[0].files.size()", equalTo(2)) .body("data[1].files.size()", equalTo(1)); - + + // Now call this api with the new (as of 6.1) pagination parameters Integer offset = 0; Integer howmany = 1; @@ -690,15 +702,16 @@ public void testDatasetVersionsAPI() { versionsResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.size()", equalTo(1)) + .body("data.versionState[0]", equalTo("DRAFT")) .body("data[0].files.size()", equalTo(2)); // And now call it with an un-privileged token, to make sure only one - // (the published) version is shown: - + // (the published) version is shown: versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms); versionsResponse.prettyPrint(); versionsResponse.then().assertThat() .statusCode(OK.getStatusCode()) + .body("data.versionState[0]", not("DRAFT")) .body("data.size()", equalTo(1)); // And now call the "short", no-files version of the same api @@ -711,35 +724,98 @@ public void testDatasetVersionsAPI() { - //Set of tests on non-deaccesioned dataset - - boolean includeDeaccessioned = true; - excludeFiles = true; - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null)); - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null)); - - excludeFiles = false; - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1)); - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1)); + //Set of tests on non-deaccesioned dataset + String specificVersion = "1.0"; + boolean includeDeaccessioned = false; + Response datasetVersion = null; - includeDeaccessioned = false; excludeFiles = true; - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null)); - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null)); + //Latest published authorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); + + //Latest published unauthorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); + + //Latest authorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files", equalTo(null)); + + //Latest unauthorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); + + //Specific version authorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); + + //Specific version unauthorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); excludeFiles = false; - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1)); - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1)); - + //Latest published authorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + + //Latest published unauthorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + + //Latest authorized token, user is authenticated should get the Draft version + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files.size()", equalTo(2)); + + //Latest unauthorized token, user has no permissions should get the latest Published version + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + + //Specific version authorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + + //Specific version unauthorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + //We deaccession the dataset Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); @@ -747,38 +823,158 @@ public void testDatasetVersionsAPI() { //Set of tests on deaccesioned dataset, only 3/9 should return OK message includeDeaccessioned = true; - excludeFiles = true; - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null)); - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null)); excludeFiles = false; - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));; - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(404); - - includeDeaccessioned = false; - excludeFiles = true; - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(404); - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(404); - excludeFiles = false; - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(404); - UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned). - then().assertThat().statusCode(404); - + //Latest published authorized token with deaccessioned dataset + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files.size()", equalTo(1)); + + //Latest published requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest authorized token should get the DRAFT version + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files.size()", equalTo(2)); + + //Latest unauthorized token requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version authorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files.size()", equalTo(1)); + + //Specific version unauthorized token requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + excludeFiles = true; + //Latest published exclude files authorized token with deaccessioned dataset + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Latest published exclude files, should get the DEACCESSIONED version + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Latest authorized token should get the DRAFT version with no files + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files", equalTo(null)); + + //Latest unauthorized token excluding files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Specific version authorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Specific version unauthorized token requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Set of test when we have a deaccessioned dataset but we don't include deaccessioned + includeDeaccessioned = false; + excludeFiles = false; + //Latest published authorized token with deaccessioned dataset not included + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest published unauthorized token with deaccessioned dataset not included + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest authorized token should get the DRAFT version + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files.size()", equalTo(2)); + + //Latest unauthorized token one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version authorized token, the version is DEACCESSIONED so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version unauthorized token, the version is DEACCESSIONED so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); - - + excludeFiles = true; - + //Latest published authorized token with deaccessioned dataset not included + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest published unauthorized token with deaccessioned dataset not included + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest authorized token should get the DRAFT version + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files", equalTo(null)); + + //Latest unauthorized token one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version authorized token, the version is DEACCESSIONED so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version unauthorized token, the version is DEACCESSIONED so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + } From 4db74b6e5ddd3cf7f2ee49b94b9b229e2746bd35 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 5 Jan 2024 16:20:27 -0500 Subject: [PATCH 443/546] how to write release note snippets #9264 --- .../source/developers/making-releases.rst | 10 ++-- .../source/developers/version-control.rst | 54 ++++++++++++++++--- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index e73811a77e1..6b94282d55e 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -14,16 +14,18 @@ See :doc:`version-control` for background on our branching strategy. The steps below describe making both regular releases and hotfix releases. +.. _write-release-notes: + Write Release Notes ------------------- -Developers express the need for an addition to release notes by creating a file in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. +Developers express the need for an addition to release notes by creating a "release note snippet" in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. See :ref:`writing-release-note-snippets` for how this is described for contributors. -The task at or near release time is to collect these notes into a single doc. +The task at or near release time is to collect these snippets into a single file. - Create an issue in GitHub to track the work of creating release notes for the upcoming release. -- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the issue-specific release notes mentioned above. -- Delete the previously-created, issue-specific release notes as the content is added to the main release notes file. +- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the release note snippets mentioned above. +- Delete the release note snippets as the content is added to the main release notes file. - Include instructions to describe the steps required to upgrade the application from the previous version. These must be customized for release numbers and special circumstances such as changes to metadata blocks and infrastructure. - Take the release notes .md through the regular Code Review and QA process. diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 91f59c76e61..12f3d5b81fd 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -65,23 +65,65 @@ The example of creating a pull request below has to do with fixing an important Find or Create a GitHub Issue ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Issue is a bug (unexpected behavior) or a new feature in Dataverse, to know how to find or create an issue in dataverse please see https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md +An issue represents a bug (unexpected behavior) or a new feature in Dataverse. We'll use the issue number in the branch we create for our pull request. -For guidance on which issue to work on, please ask! with email to support@dataverse.org +Finding GitHub Issues to Work On +******************************** -Let's say you want to tackle https://github.com/IQSS/dataverse/issues/3728 which points out a typo in a page of the Dataverse Software's documentation. +Assuming this is your first contribution to Dataverse, you should start with something small. The following issue labels might be helpful in your search: + +- `good first issue `_ (these appear at https://github.com/IQSS/dataverse/contribute ) +- `hacktoberfest `_ +- `Help Wanted: Code `_ +- `Help Wanted: Documentation `_ + +For guidance on which issue to work on, please ask! :ref:`getting-help-developers` explains how to get in touch. + +Creating GitHub Issues to Work On +********************************* + +You are very welcome to create a GitHub issue to work on. However, for significant changes, please reach out (see :ref:`getting-help-developers`) to make sure the team and community agree with the proposed change. + +For small changes and especially typo fixes, please don't worry about reaching out first. + +Communicate Which Issue You Are Working On +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the issue you can simply leave a comment to say you're working on it. If you tell us your GitHub username we are happy to add you to the "read only" team at https://github.com/orgs/IQSS/teams/dataverse-readonly/members so that we can assign the issue to you while you're working on it. You can also tell us if you'd like to be added to the `Dataverse Community Contributors spreadsheet `_. -Create a New Branch off the develop Branch +Create a New Branch Off the develop Branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing, and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells. +Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing (e.g. `#3728 `_) and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells. Commit Your Change to Your New Branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Making a commit (or several commits) to that branch, enter a description of the changes you have made. Ideally the first line of your commit message includes the number of the issue you are addressing, such as ``Fixed BlockedApiPolicy #3728``. +For each commit to that branch, try to include the issue number along with a summary in the first line of the commit message, such as ``Fixed BlockedApiPolicy #3728``. You are welcome to write longer descriptions in the body as well! + +.. _writing-release-note-snippets: + +Writing a Release Note Snippet +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We highly value your insight as a contributor when in comes to describing your work in our release notes. Not every pull request will be mentioned in release notes but most are. + +As described at :ref:`write-release-notes`, at release time we compile together release note "snippets" into the final release notes. + +Here's how to add a release note snippet to your pull request: + +- Create a Markdown file under ``doc/release-notes``. You can reuse the name of your branch and append ".md" to it, e.g. ``3728-doc-apipolicy-fix.md`` +- Edit the snippet to include anything you think should be mentioned in the release notes, such as: + + - Descriptions of new features + - Explanations of bugs fixed + - New configuration settings + - Upgrade instructions + - Etc. + +Release note snippets do not need to be long. For a new feature, a single line description might be enough. Please note that your release note will likely be edited (expanded or shortened) when the final release notes are being created. Push Your Branch to GitHub ~~~~~~~~~~~~~~~~~~~~~~~~~~ From 826d4bdcd2d0418c8d65c8409107de0d66f6dd19 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 5 Jan 2024 17:46:26 -0500 Subject: [PATCH 444/546] per QA --- doc/sphinx-guides/source/developers/globus-api.rst | 1 + .../java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index de9df06a798..2f922fb1fc0 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -2,6 +2,7 @@ Globus Transfer API =================== The Globus API addresses three use cases: + * Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector) * Reference of files that will remain in a remote Globus endpoint * Transfer from a Dataverse-managed Globus endpoint diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 61884045f35..3e60441850b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -240,7 +240,7 @@ private int makeDir(GlobusEndpoint endpoint, String dir) { MakeRequestResponse result = null; String body = "{\"DATA_TYPE\":\"mkdir\",\"path\":\"" + dir + "\"}"; try { - logger.info(body); + logger.fine(body); URL url = new URL( "https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint.getId() + "/mkdir"); result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", body); From dbab6ca9269a93bd7d292b37b00c42dc0fbad55f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 8 Jan 2024 10:30:25 -0500 Subject: [PATCH 445/546] use name@email.xyz to match citation block #2638 From datasetfieldtype.datasetContactEmail.watermark --- src/main/java/propertyFiles/Bundle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index b1c38e52496..ece3f070cdd 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -155,7 +155,7 @@ contact.support=Support contact.from=From contact.from.required=User email is required. contact.from.invalid=Email is invalid. -contact.from.emailPlaceholder=valid@email.org +contact.from.emailPlaceholder=name@email.xyz contact.subject=Subject contact.subject.required=Subject is required. contact.subject.selectTab.top=Select subject... From 88af3d4ed1316df681ce53fc0d4c00d03ac56e7d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 8 Jan 2024 12:16:51 -0500 Subject: [PATCH 446/546] clean up error handling #9275 dataProvider.handle(params) allows us to return the correct error. --- .../harvest/server/web/servlet/OAIServlet.java | 16 ++++++---------- .../iq/dataverse/api/HarvestingServerIT.java | 6 ++---- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 34152a2d8bd..233ca94f5fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -31,11 +31,9 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; -import io.gdcc.xoai.exceptions.BadArgumentException; import io.gdcc.xoai.exceptions.BadVerbException; import io.gdcc.xoai.exceptions.OAIException; import io.gdcc.xoai.model.oaipmh.Granularity; -import io.gdcc.xoai.model.oaipmh.verbs.Verb; import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat; import org.apache.commons.lang3.StringUtils; @@ -51,6 +49,7 @@ import jakarta.servlet.http.HttpServlet; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; +import java.util.Map; import javax.xml.stream.XMLStreamException; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -260,18 +259,15 @@ private void processRequest(HttpServletRequest httpServletRequest, HttpServletRe return; } - RawRequest rawRequest = null; + Map params = httpServletRequest.getParameterMap(); + OAIPMH handle; try { - rawRequest = RequestBuilder.buildRawRequest(httpServletRequest.getParameterMap()); + RawRequest rawRequest = RequestBuilder.buildRawRequest(params); + handle = dataProvider.handle(rawRequest); } catch (BadVerbException bve) { - // Verb.Type is required. Hard-code one. - rawRequest = new RawRequest(Verb.Type.Identify); - // Ideally, withError would accept a BadVerbException. - BadArgumentException bae = new BadArgumentException(bve.getLocalizedMessage()); - rawRequest.withError(bae); + handle = dataProvider.handle(params); } - OAIPMH handle = dataProvider.handle(rawRequest); response.setContentType("text/xml;charset=UTF-8"); try (XmlWriter xmlWriter = new XmlWriter(response.getOutputStream(), repositoryConfiguration);) { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 3936a240826..45dd0c08226 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -869,8 +869,7 @@ public void testInvalidQueryParams() { noVerbArg.prettyPrint(); noVerbArg.then().assertThat() .statusCode(OK.getStatusCode()) - // This should be "badVerb" - .body("oai.error.@code", equalTo("badArgument")) + .body("oai.error.@code", equalTo("badVerb")) .body("oai.error", equalTo("No argument 'verb' found")); // The query parameter "verb" cannot appear more than once. @@ -878,8 +877,7 @@ public void testInvalidQueryParams() { repeated.prettyPrint(); repeated.then().assertThat() .statusCode(OK.getStatusCode()) - // This should be "badVerb" - .body("oai.error.@code", equalTo("badArgument")) + .body("oai.error.@code", equalTo("badVerb")) .body("oai.error", equalTo("Verb must be singular, given: '[foo, bar]'")); } From 2b1e5dd4bda6788f644c2737cf56310e7eaefb7d Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 8 Jan 2024 16:10:58 -0500 Subject: [PATCH 447/546] Extend getVersionFiles API endpoint to include the total file count --- .../iq/dataverse/api/AbstractApiBean.java | 64 +++----- .../harvard/iq/dataverse/api/Datasets.java | 146 +++++------------- .../harvard/iq/dataverse/api/DatasetsIT.java | 98 ++++++------ 3 files changed, 108 insertions(+), 200 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 58565bcc9d6..2a2843c0494 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -1,29 +1,6 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetLinkingDataverse; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersionServiceBean; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseLinkingDataverse; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseRoleServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; -import edu.harvard.iq.dataverse.MetadataBlock; -import edu.harvard.iq.dataverse.MetadataBlockServiceBean; -import edu.harvard.iq.dataverse.PermissionServiceBean; -import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; -import edu.harvard.iq.dataverse.UserNotificationServiceBean; -import edu.harvard.iq.dataverse.UserServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; @@ -40,8 +17,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; -import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean; +import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -51,33 +28,30 @@ import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; -import java.io.InputStream; -import java.net.URI; -import java.util.Arrays; -import java.util.Collections; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.logging.Level; -import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonException; -import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonValue; +import jakarta.json.*; import jakarta.json.JsonValue.ValueType; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; import jakarta.persistence.PersistenceContext; import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.*; +import jakarta.ws.rs.core.Context; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.Response.ResponseBuilder; import jakarta.ws.rs.core.Response.Status; +import java.io.InputStream; +import java.net.URI; +import java.util.Arrays; +import java.util.Collections; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.logging.Level; +import java.util.logging.Logger; + import static org.apache.commons.lang3.StringUtils.isNumeric; /** @@ -661,7 +635,13 @@ protected Response ok( JsonArrayBuilder bld ) { .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } - + protected Response ok( JsonArrayBuilder bld , long totalCount) { + return Response.ok(Json.createObjectBuilder() + .add("status", ApiConstants.STATUS_OK) + .add("total_count", totalCount) + .add("data", bld).build()) + .type(MediaType.APPLICATION_JSON).build(); + } protected Response ok( JsonArray ja ) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 094f2b88c92..56b9e8df319 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1,9 +1,11 @@ package edu.harvard.iq.dataverse.api; +import com.amazonaws.services.s3.model.PartETag; import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; import edu.harvard.iq.dataverse.api.auth.AuthRequired; +import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; @@ -13,6 +15,7 @@ import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.dataaccess.*; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; @@ -23,92 +26,47 @@ import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AddLockCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CuratePublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeaccessionDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetLinkingDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetPrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ImportFromFileSystemCommand; -import edu.harvard.iq.dataverse.engine.command.impl.LinkDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListRoleAssignments; -import edu.harvard.iq.dataverse.engine.command.impl.ListVersionsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.MoveDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult; -import edu.harvard.iq.dataverse.engine.command.impl.RemoveLockCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ReturnDatasetToAuthorCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SetDatasetCitationDateCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SetCurationStatusCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetTargetURLCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; +import edu.harvard.iq.dataverse.engine.command.impl.*; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.globus.GlobusUtil; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; -import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; -import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; -import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; -import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDvObjectPIDMetadataCommand; -import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitations; -import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean; -import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics; -import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; +import edu.harvard.iq.dataverse.makedatacount.*; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.metrics.MetricsUtil; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.ArchiverUtil; -import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.EjbUtil; -import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.MarkupChecker; -import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.*; import edu.harvard.iq.dataverse.util.bagit.OREMap; -import edu.harvard.iq.dataverse.util.json.JSONLDUtil; -import edu.harvard.iq.dataverse.util.json.JsonLDTerm; -import edu.harvard.iq.dataverse.util.json.JsonParseException; -import edu.harvard.iq.dataverse.util.json.JsonUtil; -import edu.harvard.iq.dataverse.util.SignpostingResources; -import edu.harvard.iq.dataverse.search.IndexServiceBean; -import static edu.harvard.iq.dataverse.api.ApiConstants.*; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; +import edu.harvard.iq.dataverse.util.json.*; import edu.harvard.iq.dataverse.workflow.Workflow; import edu.harvard.iq.dataverse.workflow.WorkflowContext; -import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; -import edu.harvard.iq.dataverse.globus.GlobusServiceBean; -import edu.harvard.iq.dataverse.globus.GlobusUtil; +import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.inject.Inject; +import jakarta.json.*; +import jakarta.json.stream.JsonParsingException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import jakarta.ws.rs.*; +import jakarta.ws.rs.container.ContainerRequestContext; +import jakarta.ws.rs.core.*; +import jakarta.ws.rs.core.Response.Status; +import org.apache.commons.lang3.StringUtils; +import org.glassfish.jersey.media.multipart.FormDataBodyPart; +import org.glassfish.jersey.media.multipart.FormDataContentDisposition; +import org.glassfish.jersey.media.multipart.FormDataParam; + import java.io.IOException; import java.io.InputStream; import java.net.URI; @@ -117,45 +75,21 @@ import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.LocalDateTime; -import java.util.*; -import java.util.concurrent.*; -import java.util.function.Predicate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.util.*; import java.util.Map.Entry; +import java.util.concurrent.ExecutionException; +import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; import java.util.stream.Collectors; -import jakarta.ejb.EJB; -import jakarta.ejb.EJBException; -import jakarta.inject.Inject; -import jakarta.json.*; -import jakarta.json.stream.JsonParsingException; -import jakarta.servlet.http.HttpServletRequest; -import jakarta.servlet.http.HttpServletResponse; -import jakarta.ws.rs.BadRequestException; -import jakarta.ws.rs.Consumes; -import jakarta.ws.rs.DELETE; -import jakarta.ws.rs.DefaultValue; -import jakarta.ws.rs.GET; -import jakarta.ws.rs.NotAcceptableException; -import jakarta.ws.rs.POST; -import jakarta.ws.rs.PUT; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.PathParam; -import jakarta.ws.rs.Produces; -import jakarta.ws.rs.QueryParam; -import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.*; -import jakarta.ws.rs.core.Response.Status; + +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; -import org.apache.commons.lang3.StringUtils; -import org.glassfish.jersey.media.multipart.FormDataBodyPart; -import org.glassfish.jersey.media.multipart.FormDataContentDisposition; -import org.glassfish.jersey.media.multipart.FormDataParam; -import com.amazonaws.services.s3.model.PartETag; -import edu.harvard.iq.dataverse.settings.JvmSettings; @Path("datasets") public class Datasets extends AbstractApiBean { @@ -546,7 +480,9 @@ public Response getVersionFiles(@Context ContainerRequestContext crc, } catch (IllegalArgumentException e) { return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); } - return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria))); + // TODO: should we count the total every time or only when offset = 0? + return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)), + datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria)); }, getRequestUser(crc)); } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 200cfbaf1ff..ace69a6c606 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -1,77 +1,66 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersionFilesServiceBean; import edu.harvard.iq.dataverse.FileSearchCriteria; -import io.restassured.RestAssured; -import static edu.harvard.iq.dataverse.DatasetVersion.ARCHIVE_NOTE_MAX_LENGTH; -import static edu.harvard.iq.dataverse.api.ApiConstants.*; -import static io.restassured.RestAssured.given; -import io.restassured.path.json.JsonPath; -import io.restassured.http.ContentType; -import io.restassured.response.Response; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.logging.Logger; -import org.apache.commons.lang3.RandomStringUtils; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.skyscreamer.jsonassert.JSONAssert; -import org.junit.jupiter.api.Disabled; -import jakarta.json.JsonObject; -import static jakarta.ws.rs.core.Response.Status.CREATED; -import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; -import static jakarta.ws.rs.core.Response.Status.OK; -import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; -import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; -import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; -import static jakarta.ws.rs.core.Response.Status.METHOD_NOT_ALLOWED; -import static jakarta.ws.rs.core.Response.Status.CONFLICT; -import static jakarta.ws.rs.core.Response.Status.NO_CONTENT; -import edu.harvard.iq.dataverse.DataFile; -import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; import edu.harvard.iq.dataverse.authorization.DataverseRole; +import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIOTest; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.exception.ExceptionUtils; -import io.restassured.parsing.Parser; -import static io.restassured.path.json.JsonPath.with; -import io.restassured.path.xml.XmlPath; -import static edu.harvard.iq.dataverse.api.UtilIT.equalToCI; -import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers; import edu.harvard.iq.dataverse.datavariable.VarGroup; import edu.harvard.iq.dataverse.datavariable.VariableMetadata; import edu.harvard.iq.dataverse.datavariable.VariableMetadataDDIParser; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringReader; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.Files; +import io.restassured.RestAssured; +import io.restassured.http.ContentType; +import io.restassured.parsing.Parser; +import io.restassured.path.json.JsonPath; +import io.restassured.path.xml.XmlPath; +import io.restassured.response.Response; import jakarta.json.Json; import jakarta.json.JsonArray; +import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.ws.rs.core.Response.Status; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.exception.ExceptionUtils; +import org.hamcrest.CoreMatchers; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.skyscreamer.jsonassert.JSONAssert; + import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.DatasetVersion.ARCHIVE_NOTE_MAX_LENGTH; +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; +import static edu.harvard.iq.dataverse.api.UtilIT.equalToCI; +import static io.restassured.RestAssured.given; +import static io.restassured.path.json.JsonPath.with; +import static jakarta.ws.rs.core.Response.Status.*; import static java.lang.Thread.sleep; -import org.hamcrest.CoreMatchers; -import static org.hamcrest.CoreMatchers.containsString; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.hasItems; -import static org.hamcrest.CoreMatchers.startsWith; -import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.CoreMatchers.*; import static org.hamcrest.Matchers.contains; import static org.junit.jupiter.api.Assertions.*; @@ -3548,7 +3537,9 @@ public void getVersionFiles() throws IOException, InterruptedException { getVersionFilesResponsePaginated.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName1)) - .body("data[1].label", equalTo(testFileName2)); + .body("data[1].label", equalTo(testFileName2)) + .body("total_count", equalTo(5)); + String x = getVersionFilesResponsePaginated.prettyPrint(); int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); @@ -3562,7 +3553,8 @@ public void getVersionFiles() throws IOException, InterruptedException { getVersionFilesResponsePaginated.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName3)) - .body("data[1].label", equalTo(testFileName4)); + .body("data[1].label", equalTo(testFileName4)) + .body("total_count", equalTo(5)); fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); From 0807b1fd64b076ef92029a16b1c3a946802c56b7 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 8 Jan 2024 16:18:55 -0500 Subject: [PATCH 448/546] fix format --- src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 2a2843c0494..419132f7ba7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -635,6 +635,7 @@ protected Response ok( JsonArrayBuilder bld ) { .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } + protected Response ok( JsonArrayBuilder bld , long totalCount) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) @@ -642,6 +643,7 @@ protected Response ok( JsonArrayBuilder bld , long totalCount) { .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } + protected Response ok( JsonArray ja ) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) From 53e525d7ddddcc4fd055f45debc126f8b2340ffc Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 8 Jan 2024 16:24:21 -0500 Subject: [PATCH 449/546] fix format --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index ace69a6c606..91aa33f6b1f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3539,7 +3539,6 @@ public void getVersionFiles() throws IOException, InterruptedException { .body("data[0].label", equalTo(testFileName1)) .body("data[1].label", equalTo(testFileName2)) .body("total_count", equalTo(5)); - String x = getVersionFilesResponsePaginated.prettyPrint(); int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); From 622a676681a336fd78e89d1f6d21e3e703eb7d7a Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Tue, 9 Jan 2024 10:32:12 -0500 Subject: [PATCH 450/546] updated per review comments --- ...-extend-getVersionFiles-api-to-include-total-file-count.md | 2 ++ doc/sphinx-guides/source/api/native-api.rst | 4 +++- .../java/edu/harvard/iq/dataverse/api/AbstractApiBean.java | 2 +- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 1 - src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 4 ++-- 5 files changed, 8 insertions(+), 5 deletions(-) create mode 100644 doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md diff --git a/doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md b/doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md new file mode 100644 index 00000000000..80a71e9bb7e --- /dev/null +++ b/doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md @@ -0,0 +1,2 @@ +The response for getVersionFiles (/api/datasets/{id}/versions/{versionId}/files) endpoint has been modified to include a total count of records available (totalCount:x). +This will aid in pagination by allowing the caller to know how many pages can be iterated through. The existing API (getVersionFileCounts) to return the count will still be available. \ No newline at end of file diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 6591c983824..48fc16bf141 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1066,7 +1066,9 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files" -This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters: +This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters. +To aid in pagination the Json response also includes the total number of rows (totalCount) available. +Usage example: .. code-block:: bash diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 419132f7ba7..bc94d7f0bcc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -639,7 +639,7 @@ protected Response ok( JsonArrayBuilder bld ) { protected Response ok( JsonArrayBuilder bld , long totalCount) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) - .add("total_count", totalCount) + .add("totalCount", totalCount) .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 56b9e8df319..3a2497d9418 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -480,7 +480,6 @@ public Response getVersionFiles(@Context ContainerRequestContext crc, } catch (IllegalArgumentException e) { return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); } - // TODO: should we count the total every time or only when offset = 0? return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)), datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria)); }, getRequestUser(crc)); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 91aa33f6b1f..5753550d564 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3538,7 +3538,7 @@ public void getVersionFiles() throws IOException, InterruptedException { .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName1)) .body("data[1].label", equalTo(testFileName2)) - .body("total_count", equalTo(5)); + .body("totalCount", equalTo(5)); int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); @@ -3553,7 +3553,7 @@ public void getVersionFiles() throws IOException, InterruptedException { .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName3)) .body("data[1].label", equalTo(testFileName4)) - .body("total_count", equalTo(5)); + .body("totalCount", equalTo(5)); fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); From 291811e3e3c6f0f8c54dcd6b980444259e247d70 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 9 Jan 2024 11:42:34 -0500 Subject: [PATCH 451/546] #9686 add migration to harvested files --- .../migration/V6.1.0.1__9686-move-harvestingclient-id.sql | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql index 22142b8fc41..67ba026745f 100644 --- a/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql +++ b/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql @@ -1,8 +1,14 @@ ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS harvestingclient_id BIGINT; +--add harvesting client id to dvobject records of harvested datasets update dvobject dvo set harvestingclient_id = s.harvestingclient_id from (select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s where s.id = dvo.id; +--add harvesting client id to dvobject records of harvested files +update dvobject dvo set harvestingclient_id = s.harvestingclient_id from +(select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s +where s.id = dvo.owner_id; + ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id; From dfb1795e1318d058c4b614894ce9cd1039da38d3 Mon Sep 17 00:00:00 2001 From: Guillermo Portas Date: Tue, 9 Jan 2024 17:37:06 +0000 Subject: [PATCH 452/546] Added: minor docs formatting tweaks --- doc/sphinx-guides/source/api/native-api.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 48fc16bf141..09fc3c69693 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1067,7 +1067,9 @@ The fully expanded example above (without environment variables) looks like this curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files" This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters. -To aid in pagination the Json response also includes the total number of rows (totalCount) available. + +To aid in pagination the JSON response also includes the total number of rows (totalCount) available. + Usage example: .. code-block:: bash From 03f4a06b5ed163d9252e6e868fa2e939fda0a2e0 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 9 Jan 2024 13:30:34 -0500 Subject: [PATCH 453/546] #9686 add a release note --- doc/release-notes/9686-move-harvesting-client-id.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/9686-move-harvesting-client-id.md diff --git a/doc/release-notes/9686-move-harvesting-client-id.md b/doc/release-notes/9686-move-harvesting-client-id.md new file mode 100644 index 00000000000..110fcc6ca6e --- /dev/null +++ b/doc/release-notes/9686-move-harvesting-client-id.md @@ -0,0 +1 @@ +With this release the harvesting client id will be available for harvested files. A database update will copy the id to previously harvested files./ From b9bcf995b42889af3333368b3264f49264df52ef Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Tue, 9 Jan 2024 14:58:32 -0500 Subject: [PATCH 454/546] Update Kanban Board URL The URL was pointing to the old board. --- doc/sphinx-guides/source/developers/intro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst index a01a8066897..f446b73de09 100755 --- a/doc/sphinx-guides/source/developers/intro.rst +++ b/doc/sphinx-guides/source/developers/intro.rst @@ -40,7 +40,7 @@ For the Dataverse Software development roadmap, please see https://www.iq.harvar Kanban Board ------------ -You can get a sense of what's currently in flight (in dev, in QA, etc.) by looking at https://github.com/orgs/IQSS/projects/2 +You can get a sense of what's currently in flight (in dev, in QA, etc.) by looking at https://github.com/orgs/IQSS/projects/34 Issue Tracker ------------- From 94570f0c670e6d39594c5cfb9ca5233962834de0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 10 Jan 2024 10:59:21 -0500 Subject: [PATCH 455/546] add toc to docs #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 2f922fb1fc0..b5d420467aa 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -1,6 +1,9 @@ Globus Transfer API =================== +.. contents:: |toctitle| + :local: + The Globus API addresses three use cases: * Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector) From 67292840e9b6e2f701fd6bc0e09522b0b2d0ef07 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Wed, 10 Jan 2024 13:16:27 -0500 Subject: [PATCH 456/546] Add comments and makes the loop easier to understand. --- ...tLatestPublishedDatasetVersionCommand.java | 44 +++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java index a4952bbf524..dd9a8112afe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java @@ -17,33 +17,51 @@ public class GetLatestPublishedDatasetVersionCommand extends AbstractCommand { private final Dataset ds; private final boolean includeDeaccessioned; - private boolean checkPerms; + private boolean checkPermsWhenDeaccessioned; public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) { this(aRequest, anAffectedDataset, false, false); } - public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPerms) { + public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) { super(aRequest, anAffectedDataset); ds = anAffectedDataset; this.includeDeaccessioned = includeDeaccessioned; - this.checkPerms = checkPerms; + this.checkPermsWhenDeaccessioned = checkPermsWhenDeaccessioned; } + /* + * This command depending on the requested parameters will return: + * + * If the user requested to include a deaccessioned dataset with the files, the command will return the deaccessioned version if the user has permissions to view the files. Otherwise, it will return null. + * If the user requested to include a deaccessioned dataset but did not request the files, the command will return the deaccessioned version. + * If the user did not request to include a deaccessioned dataset, the command will return the latest published version. + * + */ @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - for (DatasetVersion dsv : ds.getVersions()) { - if (dsv.isReleased() || (includeDeaccessioned && dsv.isDeaccessioned())) { - - if(dsv.isDeaccessioned() && checkPerms){ - if(!ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset)){ - return null; - } - } - return dsv; + DatasetVersion dsv = null; + + //We search of a released or deaccessioned version if it is requested. + for (DatasetVersion next : ds.getVersions()) { + if (next.isReleased() || (includeDeaccessioned && next.isDeaccessioned())){ + dsv = next; + break; + } + } + + //Checking permissions if the deaccessionedVersion was found and we are checking permissions because files were requested. + if(dsv != null && (dsv.isDeaccessioned() && checkPermsWhenDeaccessioned)){ + //If the user has no permissions we return null + if(!ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset)){ + dsv = null; } } - return null; + + return dsv; } + + + } From 9d18da511af71dd4daeb1f76c330c5a25dbcca23 Mon Sep 17 00:00:00 2001 From: GPortas Date: Thu, 11 Jan 2024 11:01:08 +0000 Subject: [PATCH 457/546] Added: displayOrder and isRequired fields to DatasetFieldType payload --- .../harvard/iq/dataverse/util/json/JsonPrinter.java | 2 ++ .../edu/harvard/iq/dataverse/api/MetadataBlocksIT.java | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index cfc266f2ba7..a97ef9c12d1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -570,6 +570,8 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { fieldsBld.add("multiple", fld.isAllowMultiples()); fieldsBld.add("isControlledVocabulary", fld.isControlledVocabulary()); fieldsBld.add("displayFormat", fld.getDisplayFormat()); + fieldsBld.add("isRequired", fld.isRequired()); + fieldsBld.add("displayOrder", fld.getDisplayOrder()); if (fld.isControlledVocabulary()) { // If the field has a controlled vocabulary, // add all values to the resulting JSON diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java index c301e158b4e..f1c3a9815f1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java @@ -25,7 +25,9 @@ void testGetCitationBlock() { getCitationBlock.prettyPrint(); getCitationBlock.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data.fields.subject.controlledVocabularyValues[0]", CoreMatchers.is("Agricultural Sciences")); + .body("data.fields.subject.controlledVocabularyValues[0]", CoreMatchers.is("Agricultural Sciences")) + .body("data.fields.title.displayOrder", CoreMatchers.is(0)) + .body("data.fields.title.isRequired", CoreMatchers.is(true)); } @Test @@ -37,18 +39,18 @@ void testDatasetWithAllDefaultMetadata() { ", response=" + createUser.prettyPrint()); String apiToken = UtilIT.getApiTokenFromResponse(createUser); assumeFalse(apiToken == null || apiToken.isBlank()); - + Response createCollection = UtilIT.createRandomDataverse(apiToken); assumeTrue(createCollection.statusCode() < 300, "code=" + createCollection.statusCode() + ", response=" + createCollection.prettyPrint()); String dataverseAlias = UtilIT.getAliasFromResponse(createCollection); assumeFalse(dataverseAlias == null || dataverseAlias.isBlank()); - + // when String pathToJsonFile = "scripts/api/data/dataset-create-new-all-default-fields.json"; Response createDataset = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken); - + // then assertEquals(CREATED.getStatusCode(), createDataset.statusCode(), "code=" + createDataset.statusCode() + From e8054138219ffc499c756ee9d77bdb77d7450a23 Mon Sep 17 00:00:00 2001 From: GPortas Date: Thu, 11 Jan 2024 11:06:16 +0000 Subject: [PATCH 458/546] Added: release notes for #10216 --- doc/release-notes/10216-metadatablocks.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 doc/release-notes/10216-metadatablocks.md diff --git a/doc/release-notes/10216-metadatablocks.md b/doc/release-notes/10216-metadatablocks.md new file mode 100644 index 00000000000..8fbd4f37e14 --- /dev/null +++ b/doc/release-notes/10216-metadatablocks.md @@ -0,0 +1,4 @@ +The API endpoint `/api/metadatablocks/{block_id}` has been extended to include the following fields: + +- `isRequired` - Wether or not this field is required +- `displayOrder`: The display order of the field in create/edit forms From 462d8f743ba96beb39a2d30ec49eb0ee3ae9d210 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 11 Jan 2024 10:17:18 -0500 Subject: [PATCH 459/546] #10216 typo in release note --- doc/release-notes/10216-metadatablocks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/10216-metadatablocks.md b/doc/release-notes/10216-metadatablocks.md index 8fbd4f37e14..b3be7e76abc 100644 --- a/doc/release-notes/10216-metadatablocks.md +++ b/doc/release-notes/10216-metadatablocks.md @@ -1,4 +1,4 @@ The API endpoint `/api/metadatablocks/{block_id}` has been extended to include the following fields: -- `isRequired` - Wether or not this field is required +- `isRequired` - Whether or not this field is required - `displayOrder`: The display order of the field in create/edit forms From b1bb6a047cc347a6d6c97ba9f56060d3805ec545 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 11:35:34 -0500 Subject: [PATCH 460/546] minor doc tweaks #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index b5d420467aa..96475f33230 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -72,7 +72,7 @@ The response includes the id for the Globus endpoint to use along with several s The getDatasetMetadata and getFileListing URLs are just signed versions of the standard Dataset metadata and file listing API calls. The other two are Globus specific. -If called for a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a +If called for, a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are described further below. @@ -91,7 +91,7 @@ The returned response includes the same getDatasetMetadata and getFileListing UR Performing an Upload/Transfer In -------------------------------- -The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer or to reference files (based on the "managed" parameter). +The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer (managed=true) or to reference files (managed=false). Once the user identifies which files are to be added, the requestGlobusTransferPaths or requestGlobusReferencePaths URLs can be called. These both reference the same API call but must be used with different entries in the JSON body sent: From 1c3162f01cb921b21a72042ea03b1e9ca94c6da9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 11:49:01 -0500 Subject: [PATCH 461/546] typo #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 96475f33230..57748d0afc9 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -170,7 +170,7 @@ In the managed case, once a Globus transfer has been initiated a final API call curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA" -Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL. +Note that the mimetype is multipart/form-data, matching the /addFiles API call. Also note that the API_TOKEN is not needed when using a signed URL. With this information, Dataverse will begin to monitor the transfer and when it completes, will add all files for which the transfer succeeded. As the transfer can take significant time and the API call is asynchronous, the only way to determine if the transfer succeeded via API is to use the standard calls to check the dataset lock state and contents. From 8cc2e7c0e5ba16b2f380f8fd31531e1f90271c12 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 11:56:50 -0500 Subject: [PATCH 462/546] fix path in globus endpoint docs #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 57748d0afc9..a9cfe5aedff 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -102,7 +102,7 @@ Once the user identifies which files are to be added, the requestGlobusTransferP export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV export LOCALE=en-US - curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUpload" + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUploadPaths" Note that when using the dataverse-globus app or the return from the previous call, the URL for this call will be signed and no API_TOKEN is needed. From c3556e012a03b1e131146821faabb183b1a62a87 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 12:14:24 -0500 Subject: [PATCH 463/546] add missing trailing double quote #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index a9cfe5aedff..5a90243bd93 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -168,7 +168,7 @@ In the managed case, once a Globus transfer has been initiated a final API call "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \ {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}' - curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA" + curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles" -F "jsonData=$JSON_DATA" Note that the mimetype is multipart/form-data, matching the /addFiles API call. Also note that the API_TOKEN is not needed when using a signed URL. From 50425d3f6e063b7f54d5a49b7bcb758f0ffde3b6 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 11 Jan 2024 14:20:03 -0500 Subject: [PATCH 464/546] only list the OAI sets that have associated records #3322 --- .../harvest/server/OAISetServiceBean.java | 20 +++++++++++++++++++ .../xoai/DataverseXoaiSetRepository.java | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java index 2bd666401c7..d5c78c36b98 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java @@ -25,6 +25,7 @@ import jakarta.inject.Named; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.BaseHttpSolrClient.RemoteSolrException; @@ -121,6 +122,25 @@ public List findAllNamedSets() { } } + /** + * "Active" sets are the ones that have been successfully exported, and contain + * a non-zero number of records. (Although a set that contains a number of + * records that are all marked as "deleted" is still an active set!) + * @return list of OAISets + */ + public List findAllActiveNamedSets() { + String jpaQueryString = "select object(o) " + + "from OAISet as o, OAIRecord as r " + + "where r.setName = o.spec " + + "and o.spec != '' " + + "group by o order by o.spec"; + + Query query = em.createQuery(jpaQueryString); + List queryResults = query.getResultList(); + + return queryResults; + } + @Asynchronous public void remove(Long setId) { OAISet oaiSet = find(setId); diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java index b4e275b6059..1e713b08adb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java @@ -35,7 +35,7 @@ public void setSetService(OAISetServiceBean setService) { @Override public boolean supportSets() { - List dataverseOAISets = setService.findAllNamedSets(); + List dataverseOAISets = setService.findAllActiveNamedSets(); if (dataverseOAISets == null || dataverseOAISets.isEmpty()) { return false; @@ -46,7 +46,7 @@ public boolean supportSets() { @Override public List getSets() { logger.fine("calling retrieveSets()"); - List dataverseOAISets = setService.findAllNamedSets(); + List dataverseOAISets = setService.findAllActiveNamedSets(); List XOAISets = new ArrayList(); if (dataverseOAISets != null) { From 15ad04ee96164806036a974dbe5bf41ea2a7f0fa Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 11 Jan 2024 14:52:24 -0500 Subject: [PATCH 465/546] A test for the new "don't list until exported" OAI set feature (#3322) --- .../iq/dataverse/api/HarvestingServerIT.java | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index e02964ef28f..e0f121305e0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -288,7 +288,7 @@ public void testNativeSetAPI() { } @Test - public void testSetEditAPIandOAIlistSets() { + public void testSetEditAPIandOAIlistSets() throws InterruptedException { // This test focuses on testing the Edit functionality of the Dataverse // OAI Set API and the ListSets method of the Dataverse OAI server. @@ -299,7 +299,8 @@ public void testSetEditAPIandOAIlistSets() { // expected HTTP result codes. String setName = UtilIT.getRandomString(6); - String setDef = "*"; + String persistentId = extraDatasetsIdentifiers.get(0); + String setDef = "dsPersistentId:"+persistentId; // Make sure the set does not exist String setPath = String.format("/api/harvest/server/oaisets/%s", setName); @@ -369,16 +370,35 @@ public void testSetEditAPIandOAIlistSets() { XmlPath responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets"); - // 2. Validate the payload of the response, by confirming that the set + // 2. The set hasn't been exported yet, so it shouldn't be listed in + // ListSets (#3322). Let's confirm that: + + List listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class); + // 2a. Confirm that our set is listed: + assertNotNull(listSets, "Unexpected response from ListSets"); + assertEquals(0, listSets.size(), "An unexported OAI set is listed in ListSets"); + + // export the set: + + Response exportSetResponse = UtilIT.exportOaiSet(setName); + assertEquals(200, exportSetResponse.getStatusCode()); + Thread.sleep(1000L); // sleep for a sec to be sure + + // ... try again: + + listSetsResponse = UtilIT.getOaiListSets(); + responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets"); + + // 3. Validate the payload of the response, by confirming that the set // we created and modified, above, is being listed by the OAI server // and its xml record is properly formatted - List listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class); + listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class); - // 2a. Confirm that our set is listed: + // 3a. Confirm that our set is listed: assertNotNull(listSets, "Unexpected response from ListSets"); assertEquals(1, listSets.size(), "Newly-created set isn't properly listed by the OAI server"); - // 2b. Confirm that the set entry contains the updated description: + // 3b. Confirm that the set entry contains the updated description: assertEquals(newDescription, listSets.get(0).getPath("setDescription.metadata.element.field", String.class), "Incorrect description in the ListSets entry"); // ok, the xml record looks good! From 3a81926980edc7c8228dddf18a8f1305b32fc2c8 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 15:40:14 -0500 Subject: [PATCH 466/546] add requestGlobusUploadPaths to UtilIT #10200 --- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e29677c2252..33dda05b4d7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -3718,4 +3718,12 @@ static Response requestGlobusDownload(Integer datasetId, JsonObject body, String .post("/api/datasets/" + datasetId + "/requestGlobusDownload"); } + static Response requestGlobusUploadPaths(Integer datasetId, JsonObject body, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(body.toString()) + .contentType("application/json") + .post("/api/datasets/" + datasetId + "/requestGlobusUploadPaths"); + } + } From 83120012480ce12ef8db3d33d3a1c93c4605945a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 11 Jan 2024 15:47:17 -0500 Subject: [PATCH 467/546] clarify where taskIdentifier comes from #10200 --- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 5a90243bd93..834db8161f0 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -157,7 +157,7 @@ In the remote/reference case, the map is from the initially supplied endpoint/pa Adding Files to the Dataset --------------------------- -In the managed case, once a Globus transfer has been initiated a final API call is made to Dataverse to provide it with the task identifier of the transfer and information about the files being transferred: +In the managed case, you must initiate a Globus transfer and take note of its task identifier. As in the JSON example below, you will pass it as ``taskIdentifier`` along with details about the files you are transferring: .. code-block:: bash From 2f571e23c7b1b98ce530d5a87ed20c8797810175 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 11 Jan 2024 16:38:18 -0500 Subject: [PATCH 468/546] Got rid of some unnecessary database lookups that were made when rendering the harvesting server page. #3322 --- .../iq/dataverse/HarvestingSetsPage.java | 60 +++++++++++++++++-- src/main/java/propertyFiles/Bundle.properties | 2 +- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java index 6dbba34920b..0b66b652e0c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java @@ -30,6 +30,8 @@ import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; +import java.util.HashMap; +import java.util.Map; import org.apache.commons.lang3.StringUtils; /** @@ -430,44 +432,92 @@ public boolean isSessionUserAuthenticated() { return false; } + // The numbers of datasets and deleted/exported records below are used + // in rendering rules on the page. They absolutely need to be cached + // on the first lookup. + + Map cachedSetInfoNumDatasets = new HashMap<>(); + public int getSetInfoNumOfDatasets(OAISet oaiSet) { if (oaiSet.isDefaultSet()) { return getSetInfoNumOfExported(oaiSet); } + if (cachedSetInfoNumDatasets.get(oaiSet.getSpec()) != null) { + return cachedSetInfoNumDatasets.get(oaiSet.getSpec()); + } + String query = oaiSet.getDefinition(); try { int num = oaiSetService.validateDefinitionQuery(query); if (num > -1) { + cachedSetInfoNumDatasets.put(oaiSet.getSpec(), num); return num; } } catch (OaiSetException ose) { - // do notghin - will return zero. + // do nothing - will return zero. } + cachedSetInfoNumDatasets.put(oaiSet.getSpec(), 0); return 0; } + Map cachedSetInfoNumExported = new HashMap<>(); + Integer defaultSetNumExported = null; + public int getSetInfoNumOfExported(OAISet oaiSet) { + if (oaiSet.isDefaultSet() && defaultSetNumExported != null) { + return defaultSetNumExported; + } else if (cachedSetInfoNumExported.get(oaiSet.getSpec()) != null) { + return cachedSetInfoNumExported.get(oaiSet.getSpec()); + } + List records = oaiRecordService.findActiveOaiRecordsBySetName(oaiSet.getSpec()); + int num; + if (records == null || records.isEmpty()) { - return 0; + num = 0; + } else { + num = records.size(); } - return records.size(); + if (oaiSet.isDefaultSet()) { + defaultSetNumExported = num; + } else { + cachedSetInfoNumExported.put(oaiSet.getSpec(), num); + } + return num; } + Map cachedSetInfoNumDeleted = new HashMap<>(); + Integer defaultSetNumDeleted = null; + public int getSetInfoNumOfDeleted(OAISet oaiSet) { + if (oaiSet.isDefaultSet() && defaultSetNumDeleted != null) { + return defaultSetNumDeleted; + } else if (cachedSetInfoNumDeleted.get(oaiSet.getSpec()) != null) { + return cachedSetInfoNumDeleted.get(oaiSet.getSpec()); + } + List records = oaiRecordService.findDeletedOaiRecordsBySetName(oaiSet.getSpec()); + int num; + if (records == null || records.isEmpty()) { - return 0; + num = 0; + } else { + num = records.size(); } - return records.size(); + if (oaiSet.isDefaultSet()) { + defaultSetNumDeleted = num; + } else { + cachedSetInfoNumDeleted.put(oaiSet.getSpec(), num); + } + return num; } public void validateSetQuery() { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index ece3f070cdd..157f2ecaf54 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -631,7 +631,7 @@ harvestserver.tab.header.description=Description harvestserver.tab.header.definition=Definition Query harvestserver.tab.col.definition.default=All Published Local Datasets harvestserver.tab.header.stats=Datasets -harvestserver.tab.col.stats.empty=No records (empty set) +harvestserver.tab.col.stats.empty=No active records ({2} {2, choice, 0#records|1#record|2#records} marked as deleted) harvestserver.tab.col.stats.results={0} {0, choice, 0#datasets|1#dataset|2#datasets} ({1} {1, choice, 0#records|1#record|2#records} exported, {2} marked as deleted) harvestserver.tab.header.action=Actions harvestserver.tab.header.action.btn.export=Run Export From d86ab1587cb5088330c2df6565744769cc859119 Mon Sep 17 00:00:00 2001 From: Vera Clemens Date: Fri, 12 Jan 2024 11:36:30 +0100 Subject: [PATCH 469/546] test: use curator role in testListRoleAssignments --- scripts/api/data/role-contributor-plus.json | 12 ---------- .../harvard/iq/dataverse/api/DatasetsIT.java | 22 ++++--------------- 2 files changed, 4 insertions(+), 30 deletions(-) delete mode 100644 scripts/api/data/role-contributor-plus.json diff --git a/scripts/api/data/role-contributor-plus.json b/scripts/api/data/role-contributor-plus.json deleted file mode 100644 index ef9ba3aaff6..00000000000 --- a/scripts/api/data/role-contributor-plus.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "alias":"contributorPlus", - "name":"ContributorPlus", - "description":"For datasets, a person who can edit License + Terms, then submit them for review, and add collaborators.", - "permissions":[ - "ViewUnpublishedDataset", - "EditDataset", - "DownloadFile", - "DeleteDatasetDraft", - "ManageDatasetPermissions" - ] -} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index b51d400d2d4..787b9b018a9 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -1349,17 +1349,11 @@ public void testListRoleAssignments() { Response notPermittedToListRoleAssignmentOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken); assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataset.getStatusCode()); - // We create a new role that includes "ManageDatasetPermissions" which are required for listing role assignments - // of a dataset and assign it to the contributor user + // We assign the curator role to the contributor user + // (includes "ManageDatasetPermissions" which are required for listing role assignments of a dataset, but not + // "ManageDataversePermissions") - String pathToJsonFile = "scripts/api/data/role-contributor-plus.json"; - Response addDataverseRoleResponse = UtilIT.addDataverseRole(pathToJsonFile, dataverseAlias, adminApiToken); - addDataverseRoleResponse.prettyPrint(); - String body = addDataverseRoleResponse.getBody().asString(); - String status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); - - Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "contributorPlus", "@" + contributorUsername, adminApiToken); + Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "curator", "@" + contributorUsername, adminApiToken); giveRandoPermission.prettyPrint(); assertEquals(200, giveRandoPermission.getStatusCode()); @@ -1373,14 +1367,6 @@ public void testListRoleAssignments() { notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken); assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode()); - - // Finally, we clean up and delete the role we created - - Response deleteDataverseRoleResponse = UtilIT.deleteDataverseRole("contributorPlus", adminApiToken); - deleteDataverseRoleResponse.prettyPrint(); - body = deleteDataverseRoleResponse.getBody().asString(); - status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); } @Test From 5e9cc2ff4764915324ffc3c990f02e09738101c0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 12 Jan 2024 13:57:59 -0500 Subject: [PATCH 470/546] fix bad SQL query in guestbook #10232 --- .../edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index b0cc41eb448..01e6ecf7ff2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -928,7 +928,7 @@ public Long getDownloadCountByDatasetId(Long datasetId, LocalDate date) { if(date != null) { query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId + " and responsetime < '" + date.toString() + "' and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); }else { - query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId+ "and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); + query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId+ " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); } return (Long) query.getSingleResult(); } From d3f3eb9219fa101db8ebfea34ee62ccd3111194a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 12 Jan 2024 14:18:25 -0500 Subject: [PATCH 471/546] Update docker-compose-dev.yml better explain presence of settings #9275 --- docker-compose-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index ce9f39a418a..10fe62ff6df 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -19,7 +19,7 @@ services: DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test DATAVERSE_JSF_REFRESH_PERIOD: "1" - # to get HarvestingServerIT to pass + # These two oai settings are here to get HarvestingServerIT to pass dataverse_oai_server_maxidentifiers: "2" dataverse_oai_server_maxrecords: "2" JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 From 74b45e1d7d24b621a7368c517e687df0b21f199c Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 16 Jan 2024 10:21:42 -0500 Subject: [PATCH 472/546] QA Guide general update --- doc/sphinx-guides/source/qa/index.md | 6 +-- doc/sphinx-guides/source/qa/overview.md | 22 ++++++---- .../source/qa/performance-tests.md | 8 ++++ .../{other-approaches.md => qa-workflow.md} | 41 ++++--------------- ...{manual-testing.md => testing-approach.md} | 9 +++- 5 files changed, 42 insertions(+), 44 deletions(-) rename doc/sphinx-guides/source/qa/{other-approaches.md => qa-workflow.md} (58%) rename doc/sphinx-guides/source/qa/{manual-testing.md => testing-approach.md} (84%) diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md index 6027f07574f..c7582a2169f 100644 --- a/doc/sphinx-guides/source/qa/index.md +++ b/doc/sphinx-guides/source/qa/index.md @@ -3,9 +3,9 @@ ```{toctree} overview.md testing-infrastructure.md -performance-tests.md -manual-testing.md +qa-workflow.md +testing-approach.md test-automation.md -other-approaches.md jenkins.md +performance-tests.md ``` diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md index c4f66446ca3..08740e9345d 100644 --- a/doc/sphinx-guides/source/qa/overview.md +++ b/doc/sphinx-guides/source/qa/overview.md @@ -11,19 +11,27 @@ This guide describes the testing process used by QA at IQSS and provides a refer ## Workflow -The basic workflow is as follows. Bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub {ref}`kanban-board`. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common {ref}`develop branch ` and ultimately released as part of the product. Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements. Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Done column and the process repeats with the next PR until it is decided to {doc}`make a release `. +The basic workflow is as follows. Bugs or feature requests are submitted to GitHub by the community or by team members as [issues](https://github.com/IQSS/dataverse/issues). These issues are prioritized and added to a two-week sprint that is reflected on the GitHub {ref}`kanban-board`. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common {ref}`develop branch ` and ultimately released as part of the product. -## Release Cadence and Sprints +Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements. -A release likely spans multiple two-week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time. +Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Done column and the process repeats with the next PR until it is decided to {doc}`make a release `. -The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub. +## Tips and Tricks -## Performance Testing and Deployment +- Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing. +- When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice. +- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure. +- When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done. +- When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked. +- When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it. +- Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup. -The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named {ref}`Locust `, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds, it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product, and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page. +## Release Cadence and Sprints -Once the performance has been tested and recorded in a [Google spreadsheet](https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit?usp=sharing) for this proposed version, the release will be prepared and posted. +A release likely spans multiple two-week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time. + +The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub. ## Making a Release diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md index ad7972bd75e..3fab0386eb0 100644 --- a/doc/sphinx-guides/source/qa/performance-tests.md +++ b/doc/sphinx-guides/source/qa/performance-tests.md @@ -7,8 +7,16 @@ ## Introduction +The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named {ref}`Locust `, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. + +Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds, it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product, and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page. + +## Testing Environment + To run performance tests, we have a performance test cluster on AWS that employs web, database, and Solr. The database contains a copy of production that is updated weekly on Sundays. To ensure the homepage content is consistent between test runs across releases, two scripts set the datasets that will appear on the homepage. There is a script on the web server in the default CentOS user dir and one on the database server in the default CentOS user dir. Run these scripts before conducting the tests. +Once the performance has been tested and recorded in a [Google spreadsheet](https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit?usp=sharing) for this proposed version, the release will be prepared and posted. + ## Access Access to performance cluster instances requires ssh keys. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. diff --git a/doc/sphinx-guides/source/qa/other-approaches.md b/doc/sphinx-guides/source/qa/qa-workflow.md similarity index 58% rename from doc/sphinx-guides/source/qa/other-approaches.md rename to doc/sphinx-guides/source/qa/qa-workflow.md index 2e2ef906191..78dcd1b6322 100644 --- a/doc/sphinx-guides/source/qa/other-approaches.md +++ b/doc/sphinx-guides/source/qa/qa-workflow.md @@ -1,24 +1,10 @@ -# Other Approaches to Deploying and Testing +# QA workflow for Pull Requests ```{contents} Contents: :local: :depth: 3 ``` -This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built. - -## Tips and Tricks - -- Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing. -- When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice. -- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure. -- When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done. -- When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked. -- When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it. -- Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup. - -## Workflow for Completing QA on a PR - 1. Assign the PR you are working on to yourself. 1. What does it do? @@ -98,24 +84,13 @@ This workflow is fine for a single person testing a PR, one at a time. It would 1. Merge PR - Click merge to include this PR into the common develop branch. + Click the "Merge pull request" button and be sure to use the "Create a merge commit" option to include this PR into the common develop branch. + + Some of the reasons why we encourage using option over Rebase or Squash are: + -Preserving commit hitory + -Clearer context and treaceability + -Easier collaboration, bug tracking and reverting 1. Delete merged branch - Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions. - - -## Checklist for Completing QA on a PR - -1. Build the docs -1. Smoke test the pr -1. Test the new functionality -1. Regression test -1. Test any upgrade instructions - -## Checklist for QA on Release - -1. Review Consolidated Release Notes, in particular upgrade instructions. -1. Conduct performance testing and compare with the previous release. -1. Perform clean install and smoke test. -1. Potentially follow upgrade instructions. Though they have been performed incrementally for each PR, the sequence may need checking + Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions. \ No newline at end of file diff --git a/doc/sphinx-guides/source/qa/manual-testing.md b/doc/sphinx-guides/source/qa/testing-approach.md similarity index 84% rename from doc/sphinx-guides/source/qa/manual-testing.md rename to doc/sphinx-guides/source/qa/testing-approach.md index 580e5153394..21039c10b1f 100644 --- a/doc/sphinx-guides/source/qa/manual-testing.md +++ b/doc/sphinx-guides/source/qa/testing-approach.md @@ -1,4 +1,4 @@ -# Manual Testing Approach +# Testing Approach ```{contents} Contents: :local: @@ -41,3 +41,10 @@ Think about risk. Is the feature or function part of a critical area such as per 1. Upload 3 different types of files: You can use a tabular file, 50by1000.dta, an image file, and a text file. 1. Publish the dataset. 1. Download a file. + + +## Alternative deployment and testing + +This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? + +An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built. \ No newline at end of file From ff044632aff9c2b98aea01da934cfbf63476dc40 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 16 Jan 2024 11:32:17 -0500 Subject: [PATCH 473/546] add release note #9926 --- doc/release-notes/9926-list-role-assignments-permissions.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/9926-list-role-assignments-permissions.md diff --git a/doc/release-notes/9926-list-role-assignments-permissions.md b/doc/release-notes/9926-list-role-assignments-permissions.md new file mode 100644 index 00000000000..43cd83dc5c9 --- /dev/null +++ b/doc/release-notes/9926-list-role-assignments-permissions.md @@ -0,0 +1 @@ +Listing collction/dataverse role assignments via API still requires ManageDataversePermissions, but listing dataset role assignments via API now requires only ManageDatasetPermissions. From 30e357bcfba66a2c7c2044beb4f03d88e532b96a Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 16 Jan 2024 12:37:10 -0500 Subject: [PATCH 474/546] expect noSetHierarchy rather than noRecordsMatch #9275 --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 45dd0c08226..ac28e7a3605 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -888,7 +888,7 @@ public void testNoSuchSetError() { noSuchSet.prettyPrint(); noSuchSet.then().assertThat() .statusCode(OK.getStatusCode()) - .body("oai.error.@code", equalTo("noRecordsMatch")) + .body("oai.error.@code", equalTo("noSetHierarchy")) .body("oai.error", equalTo("Requested set 'census' does not exist")); } From dc08219cc6f7a2b1152c0acfe67b26844daa5abe Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 16 Jan 2024 12:46:32 -0500 Subject: [PATCH 475/546] Changes after talking to Phil at 12:00 on Jan 16 --- doc/sphinx-guides/source/qa/index.md | 1 - doc/sphinx-guides/source/qa/jenkins.md | 59 ------------------- doc/sphinx-guides/source/qa/overview.md | 8 ++- doc/sphinx-guides/source/qa/qa-workflow.md | 5 +- .../source/qa/test-automation.md | 58 ++++++++++++++++-- .../source/qa/testing-approach.md | 2 +- 6 files changed, 65 insertions(+), 68 deletions(-) delete mode 100644 doc/sphinx-guides/source/qa/jenkins.md diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md index c7582a2169f..937b352bccb 100644 --- a/doc/sphinx-guides/source/qa/index.md +++ b/doc/sphinx-guides/source/qa/index.md @@ -6,6 +6,5 @@ testing-infrastructure.md qa-workflow.md testing-approach.md test-automation.md -jenkins.md performance-tests.md ``` diff --git a/doc/sphinx-guides/source/qa/jenkins.md b/doc/sphinx-guides/source/qa/jenkins.md deleted file mode 100644 index 9259284beb9..00000000000 --- a/doc/sphinx-guides/source/qa/jenkins.md +++ /dev/null @@ -1,59 +0,0 @@ -# Jenkins - -```{contents} Contents: -:local: -:depth: 3 -``` - -## Introduction - -Jenkins is our primary tool for knowing if our API tests are passing. (Unit tests are executed locally by developers.) - -You can find our Jenkins installation at . - -Please note that while it has been open to the public in the past, it is currently firewalled off. We can poke a hole in the firewall for your IP address if necessary. Please get in touch. (You might also be interested in which is about restoring the ability of contributors to see if their pull requests are passing API tests or not.) - -## Jobs - -Jenkins is organized into jobs. We'll highlight a few. - -### IQSS-dataverse-develop - -, which we will refer to as the "develop" job runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it! - -You can get to this job from the README at . - -### IQSS-Dataverse-Develop-PR - - can be thought of as "PR jobs". It's a collection of jobs run on pull requests. Typically, you will navigate directly into the job (and it's particular build number) from a pull request. For example, from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). - -### guides.dataverse.org - - is what we use to build guides. See {doc}`/developers/making-releases` in the Developer Guide. - -## Checking if API Tests are Passing - -If API tests are failing, you should not merge the pull request. - -How can you know if API tests are passing? Here are the steps, by way of example. - -- From the pull request, navigate to the build. For example from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). -- You are now on the new "blue" interface for Jenkins. Click the button with an arrow on the right side of the header called "go to classic" which should take you to (for example) . -- Click "Test Result". -- Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run. -- Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error. - -## Diagnosing Failures - -API test failures can have multiple causes. As described above, from the "Test Result" page, you might see the failure under "All Failed Tests". However, the test could have failed because of some underlying system issue. - -If you have determined that the API tests have not run at all, your next step should be to click on "Console Output". For example, . Click "Full log" to see the full log in the browser or navigate to (for example) to get a plain text version. - -Go to the end of the log and then scroll up, looking for the failure. A failed Ansible task can look like this: - -``` -TASK [dataverse : download payara zip] ***************************************** -fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: ", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip"} -``` - -In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it! diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md index 08740e9345d..01ab629db8c 100644 --- a/doc/sphinx-guides/source/qa/overview.md +++ b/doc/sphinx-guides/source/qa/overview.md @@ -33,6 +33,12 @@ A release likely spans multiple two-week sprints. Each sprint represents the pri The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub. +## Test API + +The API test suite is added to and maintained by development. (See {doc}`/developers/testing` in the Developer Guide.) It is generally advisable for code contributors to add API tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base's code paths as possible, every time to catch bugs. + +This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency. (See {doc}`/qa/test-automation` in the Developer Guide.) + ## Making a Release -See {doc}`/developers/making-releases` in the Developer Guide. +See {doc}`/developers/making-releases` in the Developer Guide. \ No newline at end of file diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md index 78dcd1b6322..df274d2405d 100644 --- a/doc/sphinx-guides/source/qa/qa-workflow.md +++ b/doc/sphinx-guides/source/qa/qa-workflow.md @@ -1,4 +1,4 @@ -# QA workflow for Pull Requests +# QA Workflow for Pull Requests ```{contents} Contents: :local: @@ -87,7 +87,8 @@ Click the "Merge pull request" button and be sure to use the "Create a merge commit" option to include this PR into the common develop branch. Some of the reasons why we encourage using option over Rebase or Squash are: - -Preserving commit hitory + + -Preserving commit history -Clearer context and treaceability -Easier collaboration, bug tracking and reverting diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md index c2b649df498..c996b4cea8f 100644 --- a/doc/sphinx-guides/source/qa/test-automation.md +++ b/doc/sphinx-guides/source/qa/test-automation.md @@ -1,15 +1,36 @@ # Test Automation - ```{contents} Contents: :local: :depth: 3 ``` -The API test suite is added to and maintained by development. (See {doc}`/developers/testing` in the Developer Guide.) It is generally advisable for code contributors to add API tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base's code paths as possible, every time to catch bugs. +## Introduction + +Jenkins is our primary tool for knowing if our API tests are passing. (Unit tests are executed locally by developers.) + +You can find our Jenkins installation at . + +Please note that while it has been open to the public in the past, it is currently firewalled off. We can poke a hole in the firewall for your IP address if necessary. Please get in touch. (You might also be interested in which is about restoring the ability of contributors to see if their pull requests are passing API tests or not.) + +## Jobs + +Jenkins is organized into jobs. We'll highlight a few. + +### IQSS-dataverse-develop -This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency. +, which we will refer to as the "develop" job runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it! -## Building and Deploying a Pull Request from Jenkins to Dataverse-Internal +You can get to this job from the README at . + +### IQSS-Dataverse-Develop-PR + + can be thought of as "PR jobs". It's a collection of jobs run on pull requests. Typically, you will navigate directly into the job (and it's particular build number) from a pull request. For example, from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). + +### guides.dataverse.org + + is what we use to build guides. See {doc}`/developers/making-releases` in the Developer Guide. + +### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal 1. Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. @@ -34,3 +55,32 @@ This type of approach is often used to give contributing developers confidence t 1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. 1. Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting. + + + +## Checking if API Tests are Passing + +If API tests are failing, you should not merge the pull request. + +How can you know if API tests are passing? Here are the steps, by way of example. + +- From the pull request, navigate to the build. For example from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). +- You are now on the new "blue" interface for Jenkins. Click the button with an arrow on the right side of the header called "go to classic" which should take you to (for example) . +- Click "Test Result". +- Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run. +- Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error. + +## Diagnosing Failures + +API test failures can have multiple causes. As described above, from the "Test Result" page, you might see the failure under "All Failed Tests". However, the test could have failed because of some underlying system issue. + +If you have determined that the API tests have not run at all, your next step should be to click on "Console Output". For example, . Click "Full log" to see the full log in the browser or navigate to (for example) to get a plain text version. + +Go to the end of the log and then scroll up, looking for the failure. A failed Ansible task can look like this: + +``` +TASK [dataverse : download payara zip] ***************************************** +fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: ", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip"} +``` + +In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it! diff --git a/doc/sphinx-guides/source/qa/testing-approach.md b/doc/sphinx-guides/source/qa/testing-approach.md index 21039c10b1f..2c7241999a8 100644 --- a/doc/sphinx-guides/source/qa/testing-approach.md +++ b/doc/sphinx-guides/source/qa/testing-approach.md @@ -43,7 +43,7 @@ Think about risk. Is the feature or function part of a critical area such as per 1. Download a file. -## Alternative deployment and testing +## Alternative Deployment and Testing This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? From 95cc8cbffb79f8f91ba2e9137c2b3106e4c1f6b5 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 16 Jan 2024 14:57:15 -0500 Subject: [PATCH 476/546] remove assertion about census not existing (doesn't appear) #9275 --- .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index ac28e7a3605..60e4f623992 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -888,8 +888,7 @@ public void testNoSuchSetError() { noSuchSet.prettyPrint(); noSuchSet.then().assertThat() .statusCode(OK.getStatusCode()) - .body("oai.error.@code", equalTo("noSetHierarchy")) - .body("oai.error", equalTo("Requested set 'census' does not exist")); + .body("oai.error.@code", equalTo("noSetHierarchy")); } // TODO: From edd6fc861f899b7ddb07c51fb5d900dbd0096a6c Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 16 Jan 2024 16:15:42 -0500 Subject: [PATCH 477/546] drop "no such set test" #9275 --- .../edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index 60e4f623992..e77853d6495 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -882,15 +882,6 @@ public void testInvalidQueryParams() { } - @Test - public void testNoSuchSetError() { - Response noSuchSet = given().get("/oai?verb=ListIdentifiers&set=census&metadataPrefix=dc"); - noSuchSet.prettyPrint(); - noSuchSet.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("oai.error.@code", equalTo("noSetHierarchy")); - } - // TODO: // What else can we test? // Some ideas: From 2adbabb31e9206eb1518048a66f98e5853502707 Mon Sep 17 00:00:00 2001 From: GPortas Date: Wed, 17 Jan 2024 12:24:04 +0000 Subject: [PATCH 478/546] Added: typeClass field to DatasetFieldType payload --- doc/release-notes/10216-metadatablocks.md | 5 +++-- .../java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java | 1 + .../java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/release-notes/10216-metadatablocks.md b/doc/release-notes/10216-metadatablocks.md index b3be7e76abc..59d9c1640a5 100644 --- a/doc/release-notes/10216-metadatablocks.md +++ b/doc/release-notes/10216-metadatablocks.md @@ -1,4 +1,5 @@ The API endpoint `/api/metadatablocks/{block_id}` has been extended to include the following fields: -- `isRequired` - Whether or not this field is required -- `displayOrder`: The display order of the field in create/edit forms +- `isRequired`: Whether or not this field is required +- `displayOrder`: The display order of the field in create/edit forms +- `typeClass`: The type class of this field ("controlledVocabulary", "compound", or "primitive") diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index a97ef9c12d1..2eaf6b64579 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -565,6 +565,7 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { fieldsBld.add("displayName", fld.getDisplayName()); fieldsBld.add("title", fld.getTitle()); fieldsBld.add("type", fld.getFieldType().toString()); + fieldsBld.add("typeClass", typeClassString(fld)); fieldsBld.add("watermark", fld.getWatermark()); fieldsBld.add("description", fld.getDescription()); fieldsBld.add("multiple", fld.isAllowMultiples()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java index f1c3a9815f1..39152bccad8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java @@ -27,6 +27,7 @@ void testGetCitationBlock() { .statusCode(OK.getStatusCode()) .body("data.fields.subject.controlledVocabularyValues[0]", CoreMatchers.is("Agricultural Sciences")) .body("data.fields.title.displayOrder", CoreMatchers.is(0)) + .body("data.fields.title.typeClass", CoreMatchers.is("primitive")) .body("data.fields.title.isRequired", CoreMatchers.is(true)); } From ebe95fdb2d81321e9de2d9e3fd3c41aacb474447 Mon Sep 17 00:00:00 2001 From: Katie Mika Date: Wed, 17 Jan 2024 11:35:33 -0500 Subject: [PATCH 479/546] Update native-api.rst Added clarification to what is affected in Set Citation Data Field Type for a Dataset --- doc/sphinx-guides/source/api/native-api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 09fc3c69693..dbe769e2fd1 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1572,8 +1572,8 @@ The fully expanded example above (without environment variables) looks like this Set Citation Date Field Type for a Dataset ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sets the dataset citation date field type for a given dataset. ``:publicationDate`` is the default. -Note that the dataset citation date field type must be a date field. +Sets the dataset citation date field type for a given dataset. ``:publicationDate`` is the default. +Note that the dataset citation date field type must be a date field. This change applies to all versions of the dataset that have an entry for the new date field. It also applies to all file citations in the dataset. .. code-block:: bash From 598c40b8e5ccb2bb3db7a839e4549ac4d00ff8e1 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 17 Jan 2024 16:10:03 -0500 Subject: [PATCH 480/546] replace project 2 with 34 #9157 --- CONTRIBUTING.md | 2 +- doc/sphinx-guides/source/admin/integrations.rst | 2 +- doc/sphinx-guides/source/developers/documentation.rst | 2 +- doc/sphinx-guides/source/developers/version-control.rst | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b2be8f531c4..44f8ae65135 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,7 +56,7 @@ If you are interested in working on the main Dataverse code, great! Before you s Please read http://guides.dataverse.org/en/latest/developers/version-control.html to understand how we use the "git flow" model of development and how we will encourage you to create a GitHub issue (if it doesn't exist already) to associate with your pull request. That page also includes tips on making a pull request. -After making your pull request, your goal should be to help it advance through our kanban board at https://github.com/orgs/IQSS/projects/2 . If no one has moved your pull request to the code review column in a timely manner, please reach out. Note that once a pull request is created for an issue, we'll remove the issue from the board so that we only track one card (the pull request). +After making your pull request, your goal should be to help it advance through our kanban board at https://github.com/orgs/IQSS/projects/34 . If no one has moved your pull request to the code review column in a timely manner, please reach out. Note that once a pull request is created for an issue, we'll remove the issue from the board so that we only track one card (the pull request). Thanks for your contribution! diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst index db566106b49..cae44d42dbf 100644 --- a/doc/sphinx-guides/source/admin/integrations.rst +++ b/doc/sphinx-guides/source/admin/integrations.rst @@ -245,7 +245,7 @@ Future Integrations The `Dataverse Project Roadmap `_ is a good place to see integrations that the core Dataverse Project team is working on. -The `Community Dev `_ column of our project board is a good way to track integrations that are being worked on by the Dataverse Community but many are not listed and if you have an idea for an integration, please ask on the `dataverse-community `_ mailing list if someone is already working on it. +If you have an idea for an integration, please ask on the `dataverse-community `_ mailing list if someone is already working on it. Many integrations take the form of "external tools". See the :doc:`external-tools` section for details. External tool makers should check out the :doc:`/api/external-tools` section of the API Guide. diff --git a/doc/sphinx-guides/source/developers/documentation.rst b/doc/sphinx-guides/source/developers/documentation.rst index d07b5b63f72..4ec011f2b24 100755 --- a/doc/sphinx-guides/source/developers/documentation.rst +++ b/doc/sphinx-guides/source/developers/documentation.rst @@ -18,7 +18,7 @@ If you find a typo or a small error in the documentation you can fix it using Gi - Under the **Write** tab, delete the long welcome message and write a few words about what you fixed. - Click **Create Pull Request**. -That's it! Thank you for your contribution! Your pull request will be added manually to the main Dataverse Project board at https://github.com/orgs/IQSS/projects/2 and will go through code review and QA before it is merged into the "develop" branch. Along the way, developers might suggest changes or make them on your behalf. Once your pull request has been merged you will be listed as a contributor at https://github.com/IQSS/dataverse/graphs/contributors +That's it! Thank you for your contribution! Your pull request will be added manually to the main Dataverse Project board at https://github.com/orgs/IQSS/projects/34 and will go through code review and QA before it is merged into the "develop" branch. Along the way, developers might suggest changes or make them on your behalf. Once your pull request has been merged you will be listed as a contributor at https://github.com/IQSS/dataverse/graphs/contributors Please see https://github.com/IQSS/dataverse/pull/5857 for an example of a quick fix that was merged (the "Files changed" tab shows how a typo was fixed). diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index 12f3d5b81fd..c36c7d1e963 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -142,7 +142,7 @@ Feedback on the pull request template we use is welcome! Here's an example of a Make Sure Your Pull Request Has Been Advanced to Code Review ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Now that you've made your pull request, your goal is to make sure it appears in the "Code Review" column at https://github.com/orgs/IQSS/projects/2. +Now that you've made your pull request, your goal is to make sure it appears in the "Code Review" column at https://github.com/orgs/IQSS/projects/34. Look at https://github.com/IQSS/dataverse/blob/master/CONTRIBUTING.md for various ways to reach out to developers who have enough access to the GitHub repo to move your issue and pull request to the "Code Review" column. From 2593310b4746fa7022d62c6955db3e69b4d03471 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 17 Jan 2024 16:13:50 -0500 Subject: [PATCH 481/546] use "Community Backlog" as "dev efforts" #9157 --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 44f8ae65135..1430ba951a6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -64,4 +64,4 @@ Thanks for your contribution! [Community Call]: https://dataverse.org/community-calls [dataverse-dev Google Group]: https://groups.google.com/group/dataverse-dev [community contributors]: https://docs.google.com/spreadsheets/d/1o9DD-MQ0WkrYaEFTD5rF_NtyL8aUISgURsAXSL7Budk/edit?usp=sharing -[dev efforts]: https://github.com/orgs/IQSS/projects/2#column-5298405 +[dev efforts]: https://github.com/orgs/IQSS/projects/34/views/6 From 4f3a6ac3c038d920b7eb687a1eae6b7871e6eba8 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Thu, 18 Jan 2024 12:43:43 -0500 Subject: [PATCH 482/546] Add fix for SQL on guestbook service bean --- .../edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index 01e6ecf7ff2..04f1ebf4bd0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -914,7 +914,7 @@ public void save(GuestbookResponse guestbookResponse) { public Long getDownloadCountByDataFileId(Long dataFileId) { // datafile id is null, will return 0 - Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.datafile_id = " + dataFileId + "and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); + Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.datafile_id = " + dataFileId + " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); return (Long) query.getSingleResult(); } From eb6da705e1c2dcf4e657326a09646a47bec8cb88 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Thu, 18 Jan 2024 14:11:37 -0500 Subject: [PATCH 483/546] Add fix for same issue on another query reported by Jim Myers --- .../edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index 04f1ebf4bd0..6c043b78941 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -432,7 +432,7 @@ public Long findCountByGuestbookId(Long guestbookId, Long dataverseId) { Query query = em.createNativeQuery(queryString); return (Long) query.getSingleResult(); } else { - String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + "and o.guestbook_id = " + guestbookId; + String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + " and o.guestbook_id = " + guestbookId; Query query = em.createNativeQuery(queryString); return (Long) query.getSingleResult(); } From 867b7dcc8244e0ea4396ef1ef0dcadec40ce6b2c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 18 Jan 2024 14:58:14 -0500 Subject: [PATCH 484/546] a better test setup (#3322) --- .../harvard/iq/dataverse/api/HarvestingServerIT.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java index e0f121305e0..ed9cbdaaed0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java @@ -299,8 +299,7 @@ public void testSetEditAPIandOAIlistSets() throws InterruptedException { // expected HTTP result codes. String setName = UtilIT.getRandomString(6); - String persistentId = extraDatasetsIdentifiers.get(0); - String setDef = "dsPersistentId:"+persistentId; + String setDefinition = "title:Sample"; // Make sure the set does not exist String setPath = String.format("/api/harvest/server/oaisets/%s", setName); @@ -313,20 +312,21 @@ public void testSetEditAPIandOAIlistSets() throws InterruptedException { // Create the set as admin user Response createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .body(jsonForTestSpec(setName, setDef)) + .body(jsonForTestSpec(setName, setDefinition)) .post(createPath); assertEquals(201, createSetResponse.getStatusCode()); // I. Test the Modify/Edit (POST method) functionality of the // Dataverse OAI Sets API - String newDefinition = "title:New"; + String persistentId = extraDatasetsIdentifiers.get(0); + String newDefinition = "dsPersistentId:"+persistentId; String newDescription = "updated"; // API Test 1. Try to modify the set as normal user, should fail Response editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) - .body(jsonForEditSpec(setName, setDef, "")) + .body(jsonForEditSpec(setName, newDefinition, "")) .put(setPath); logger.info("non-admin user editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode()); assertEquals(400, editSetResponse.getStatusCode()); From 091629a6b9db2a3d1b879817a162b4309c040d15 Mon Sep 17 00:00:00 2001 From: "Balazs E. Pataki" Date: Fri, 19 Jan 2024 12:28:41 +0100 Subject: [PATCH 485/546] Add configuration for automatic XHTML/CSS/etc. reloading in IDEA in docker When running Dataverse in Docker we still want to be able to just edit things under src/main/webapp and then just reload the web page to see the changes. To do this: 1. Mapped Payara /opt/payara/appserver/glassfish/domains/domain1/applications folder to ./docker-dev-volumes/glassfish/applications 2. Added watchers.xml File watcher configuration, which can be imported into IDEA to ... 3. ... run cpwebapp.sh to copy changed files under src/main/webapp to ./docker-dev-volumes/glassfish/applications/dataverse-{current version} --- docker-compose-dev.yml | 2 ++ scripts/intellij/cpwebapp.sh | 33 +++++++++++++++++++++++++++++++++ scripts/intellij/watchers.xml | 22 ++++++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100755 scripts/intellij/cpwebapp.sh create mode 100644 scripts/intellij/watchers.xml diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 10fe62ff6df..76a4c8a745d 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -60,6 +60,8 @@ services: volumes: - ./docker-dev-volumes/app/data:/dv - ./docker-dev-volumes/app/secrets:/secrets + # Map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh + - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications # Uncomment for changes to xhtml to be deployed immediately (if supported your IDE or toolchain). # Replace 6.0 with the current version. # - ./target/dataverse-6.0:/opt/payara/deployments/dataverse diff --git a/scripts/intellij/cpwebapp.sh b/scripts/intellij/cpwebapp.sh new file mode 100755 index 00000000000..6ecad367048 --- /dev/null +++ b/scripts/intellij/cpwebapp.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# cpwebapp +# +# Usage: +# +# Add a File watcher by importing watchers.xml into IntelliJ IDEA, and let it do the copying whenever you save a +# file under webapp. +# +# https://www.jetbrains.com/help/idea/settings-tools-file-watchers.html +# +# Alternatively, you can add an External tool and trigger via menu or shortcut to do the copying manually: +# +# https://www.jetbrains.com/help/idea/configuring-third-party-tools.html +# + +PROJECT_DIR=$1 +FILE_TO_COPY=$2 +RELATIVE_PATH="${FILE_TO_COPY#$PROJECT_DIR/}" + +# Check if RELATIVE_PATH starts with 'src/main/webapp', otherwise ignore +if [[ $RELATIVE_PATH == src/main/webapp* ]]; then + # Get current version. Any other way to do this? A simple VERSION file would help. + VERSION=`perl -ne 'print $1 if /(.*?)<\/revision>/' ./modules/dataverse-parent/pom.xml` + RELATIVE_PATH_WITHOUT_WEBAPP="${RELATIVE_PATH#src/main/webapp/}" + TARGET_DIR=./docker-dev-volumes/glassfish/applications/dataverse-$VERSION + TARGET_PATH="${TARGET_DIR}/${RELATIVE_PATH_WITHOUT_WEBAPP}" + + mkdir -p "$(dirname "$TARGET_PATH")" + cp "$FILE_TO_COPY" "$TARGET_PATH" + + echo "File $FILE_TO_COPY copied to $TARGET_PATH" +fi diff --git a/scripts/intellij/watchers.xml b/scripts/intellij/watchers.xml new file mode 100644 index 00000000000..e118fea558f --- /dev/null +++ b/scripts/intellij/watchers.xml @@ -0,0 +1,22 @@ + + + + + \ No newline at end of file From cb08667a77a2ea2a51093c81e6048ee9b5b1ef30 Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Fri, 19 Jan 2024 15:10:17 -0500 Subject: [PATCH 486/546] #10249 correct typo in search API documentation --- doc/sphinx-guides/source/api/search.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index b941064f173..e8d0a0b3ea7 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -25,7 +25,7 @@ Parameters Name Type Description =============== ======= =========== q string The search term or terms. Using "title:data" will search only the "title" field. "*" can be used as a wildcard either alone or adjacent to a term (i.e. "bird*"). For example, https://demo.dataverse.org/api/search?q=title:data . For a list of fields to search, please see https://github.com/IQSS/dataverse/issues/2558 (for now). -type string Can be either "Dataverse", "dataset", or "file". Multiple "type" parameters can be used to include multiple types (i.e. ``type=dataset&type=file``). If omitted, all types will be returned. For example, https://demo.dataverse.org/api/search?q=*&type=dataset +type string Can be either "dataverse", "dataset", or "file". Multiple "type" parameters can be used to include multiple types (i.e. ``type=dataset&type=file``). If omitted, all types will be returned. For example, https://demo.dataverse.org/api/search?q=*&type=dataset subtree string The identifier of the Dataverse collection to which the search should be narrowed. The subtree of this Dataverse collection and all its children will be searched. Multiple "subtree" parameters can be used to include multiple Dataverse collections. For example, https://demo.dataverse.org/api/search?q=data&subtree=birds&subtree=cats . sort string The sort field. Supported values include "name" and "date". See example under "order". order string The order in which to sort. Can either be "asc" or "desc". For example, https://demo.dataverse.org/api/search?q=data&sort=name&order=asc From fc28b37a9bdc847f04f1988f922a1414b1c70527 Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Mon, 22 Jan 2024 13:17:38 -0500 Subject: [PATCH 487/546] bump google.library.version to 26.30.0 per Jim --- modules/dataverse-parent/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index e2d1ceec539..386d4934cb1 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -152,7 +152,7 @@ 42.6.0 9.3.0 1.12.290 - 26.29.0 + 26.30.0 8.0.0 From a28e15a9316cb1f4d726ddd0afee6cd817324c3b Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 23 Jan 2024 10:22:55 -0500 Subject: [PATCH 488/546] #9686 display harvesting client info on cards of harvested objects --- .../iq/dataverse/DatasetServiceBean.java | 48 ------------------- .../iq/dataverse/DvObjectServiceBean.java | 48 +++++++++++++++++++ .../search/SearchIncludeFragment.java | 41 ++++++++++------ .../harvard/iq/dataverse/api/DatasetsIT.java | 2 + 4 files changed, 76 insertions(+), 63 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index c6df2a2e1ab..4c4aafdd1ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -583,54 +583,6 @@ public Long getDatasetVersionCardImage(Long versionId, User user) { return null; } - /** - * Used to identify and properly display Harvested objects on the dataverse page. - * - * @param datasetIds - * @return - */ - public Map getArchiveDescriptionsForHarvestedDatasets(Set datasetIds){ - if (datasetIds == null || datasetIds.size() < 1) { - return null; - } - - String datasetIdStr = StringUtils.join(datasetIds, ", "); - - String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" + datasetIdStr + ")"; - List searchResults; - - try { - searchResults = em.createNativeQuery(qstr).getResultList(); - } catch (Exception ex) { - searchResults = null; - } - - if (searchResults == null) { - return null; - } - - Map ret = new HashMap<>(); - - for (Object[] result : searchResults) { - Long dsId; - if (result[0] != null) { - try { - dsId = (Long)result[0]; - } catch (Exception ex) { - dsId = null; - } - if (dsId == null) { - continue; - } - - ret.put(dsId, (String)result[1]); - } - } - - return ret; - } - - public boolean isDatasetCardImageAvailable(DatasetVersion datasetVersion, User user) { if (datasetVersion == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index d4219c36149..58a246b364a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -383,6 +383,54 @@ public Map getObjectPathsByIds(Set objectIds){ return ret; } + /** + * Used to identify and properly display Harvested objects on the dataverse page. + * + * @param dvObjectIds + * @return + */ + public Map getArchiveDescriptionsForHarvestedDvObjects(Set dvObjectIds){ + + if (dvObjectIds == null || dvObjectIds.size() < 1) { + return null; + } + + String dvObjectIsString = StringUtils.join(dvObjectIds, ", "); + String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, DvObject d WHERE d.harvestingClient_id = h.id AND d.id IN (" + dvObjectIsString + ")"; + List searchResults; + + try { + searchResults = em.createNativeQuery(qstr).getResultList(); + } catch (Exception ex) { + searchResults = null; + } + + if (searchResults == null) { + return null; + } + + Map ret = new HashMap<>(); + + for (Object[] result : searchResults) { + Long dvObjId; + if (result[0] != null) { + try { + Integer castResult = (Integer) result[0]; + dvObjId = Long.valueOf(castResult); + } catch (Exception ex) { + dvObjId = null; + } + if (dvObjId == null) { + continue; + } + ret.put(dvObjId, (String)result[1]); + } + } + + return ret; + } + + public String generateNewIdentifierByStoredProcedure() { StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure"); query.execute(); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 5a5d8781726..939b39b94ef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -1367,6 +1367,7 @@ public boolean canPublishDataset(Long datasetId){ public void setDisplayCardValues() { Set harvestedDatasetIds = null; + Set harvestedFileIds = null; for (SolrSearchResult result : searchResultsList) { //logger.info("checking DisplayImage for the search result " + i++); if (result.getType().equals("dataverses")) { @@ -1392,10 +1393,10 @@ public void setDisplayCardValues() { } else if (result.getType().equals("files")) { result.setImageUrl(thumbnailServiceWrapper.getFileCardImageAsBase64Url(result)); if (result.isHarvested()) { - if (harvestedDatasetIds == null) { - harvestedDatasetIds = new HashSet<>(); + if (harvestedFileIds == null) { + harvestedFileIds = new HashSet<>(); } - harvestedDatasetIds.add(result.getParentIdAsLong()); + harvestedFileIds.add(result.getEntityId()); } } } @@ -1407,25 +1408,35 @@ public void setDisplayCardValues() { // SQL query: if (harvestedDatasetIds != null) { - Map descriptionsForHarvestedDatasets = datasetService.getArchiveDescriptionsForHarvestedDatasets(harvestedDatasetIds); - if (descriptionsForHarvestedDatasets != null && descriptionsForHarvestedDatasets.size() > 0) { + Map descriptionsForHarvestedDatasets = dvObjectService.getArchiveDescriptionsForHarvestedDvObjects(harvestedDatasetIds); + if (descriptionsForHarvestedDatasets != null && !descriptionsForHarvestedDatasets.isEmpty()) { for (SolrSearchResult result : searchResultsList) { - if (result.isHarvested()) { - if (result.getType().equals("files")) { - if (descriptionsForHarvestedDatasets.containsKey(result.getParentIdAsLong())) { - result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getParentIdAsLong())); - } - } else if (result.getType().equals("datasets")) { - if (descriptionsForHarvestedDatasets.containsKey(result.getEntityId())) { - result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getEntityId())); - } - } + if (result.isHarvested() && result.getType().equals("datasets") && descriptionsForHarvestedDatasets.containsKey(result.getEntityId())) { + result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getEntityId())); } } } descriptionsForHarvestedDatasets = null; harvestedDatasetIds = null; } + + if (harvestedFileIds != null) { + + Map descriptionsForHarvestedFiles = dvObjectService.getArchiveDescriptionsForHarvestedDvObjects(harvestedFileIds); + if (descriptionsForHarvestedFiles != null && !descriptionsForHarvestedFiles.isEmpty()) { + for (SolrSearchResult result : searchResultsList) { + if (result.isHarvested() && result.getType().equals("files") && descriptionsForHarvestedFiles.containsKey(result.getEntityId())) { + + result.setHarvestingDescription(descriptionsForHarvestedFiles.get(result.getEntityId())); + + } + } + } + descriptionsForHarvestedFiles = null; + harvestedDatasetIds = null; + + } + // determine which of the objects are linked: diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 9b51be4b365..087db4858b2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -2548,6 +2548,8 @@ public void testLinkingDatasets() { EntityManager entityManager = entityManagerFactory.createEntityManager(); entityManager.getTransaction().begin(); // Do stuff... + //SEK 01/22/2024 - as of 6.2 harvestingclient_id will be on the dv object table + // so if this is ever implemented change will probably need to happen in the updatequery below entityManager.createNativeQuery("UPDATE dataset SET harvestingclient_id=1 WHERE id="+datasetId2).executeUpdate(); entityManager.getTransaction().commit(); entityManager.close(); From 88bae3bb295c26e7eda57d1ad5fbb34b67788542 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 23 Jan 2024 10:59:46 -0500 Subject: [PATCH 489/546] #9686 fix script names --- ...emetadata.sql => V6.1.0.1__9728-universe-variablemetadata.sql} | 0 ...gclient-id.sql => V6.1.0.2__9686-move-harvestingclient-id.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.13.0.3__9728-universe-variablemetadata.sql => V6.1.0.1__9728-universe-variablemetadata.sql} (100%) rename src/main/resources/db/migration/{V6.1.0.1__9686-move-harvestingclient-id.sql => V6.1.0.2__9686-move-harvestingclient-id.sql} (100%) diff --git a/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql b/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql rename to src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql diff --git a/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql similarity index 100% rename from src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql rename to src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql From 7d27a9b64736780314ed3a203990d701db2ab399 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 23 Jan 2024 11:17:50 -0500 Subject: [PATCH 490/546] #10255 fix script name --- ...emetadata.sql => V6.1.0.1__9728-universe-variablemetadata.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/main/resources/db/migration/{V5.13.0.3__9728-universe-variablemetadata.sql => V6.1.0.1__9728-universe-variablemetadata.sql} (100%) diff --git a/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql b/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql similarity index 100% rename from src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql rename to src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql From 89b7f277ccddfc849611d7e08c16fcd3b2af3dcc Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 23 Jan 2024 13:46:16 -0500 Subject: [PATCH 491/546] Fix the issue with the thumbnail size --- src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java | 2 +- src/main/webapp/resources/css/structure.css | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index ccf861ebdc8..03a0044a987 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -464,7 +464,7 @@ public static InputStream getLogoAsInputStream(Dataset dataset) { try { in = ImageThumbConverter.getImageThumbnailAsInputStream(thumbnailFile.getStorageIO(), - ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE).getInputStream(); + ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE).getInputStream(); } catch (IOException ioex) { logger.warning("getLogo(): Failed to get logo from DataFile for " + dataset.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index 470c07d4534..b81cf2a2c47 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -483,7 +483,7 @@ span.search-term-match {font-weight: bold;} [id$='resultsTable'] div.card-title-icon-block span.label {vertical-align:15%} [id$='resultsTable'] div.card-preview-icon-block {width:48px; float:left; margin:4px 12px 6px 0;} [id$='resultsTable'] div.card-preview-icon-block a {display:block; height:48px; line-height:48px;} -[id$='resultsTable'] div.card-preview-icon-block img {vertical-align:middle;} +[id$='resultsTable'] div.card-preview-icon-block img {vertical-align:middle; max-width: 64px; max-height: 48px; padding-right: 10px;} [id$='resultsTable'] div.card-preview-icon-block span[class^='icon'], [id$='resultsTable'] div.card-preview-icon-block span[class^='glyphicon'] {font-size:2.8em;} From 59690d4c9a2b5686e3b38f07c634fb32323400ff Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 24 Jan 2024 09:55:46 -0500 Subject: [PATCH 492/546] emphasize need to check flyway number before merging #10101 --- .../source/developers/sql-upgrade-scripts.rst | 2 ++ doc/sphinx-guides/source/qa/qa-workflow.md | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst index bace682b1b8..4689aeec0f2 100644 --- a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst +++ b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst @@ -21,6 +21,8 @@ If you are creating a new database table (which maps to an ``@Entity`` in JPA), If you are doing anything other than creating a new database table such as adding a column to an existing table, you must create or update a SQL upgrade script. +.. _create-sql-script: + How to Create a SQL Upgrade Script ---------------------------------- diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md index df274d2405d..cb047a3086a 100644 --- a/doc/sphinx-guides/source/qa/qa-workflow.md +++ b/doc/sphinx-guides/source/qa/qa-workflow.md @@ -27,9 +27,11 @@ Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed. -1. Does it use a DB, Flyway script? +1. Does it include a database migration script (Flyway)? - Good to know since it may collide with another existing one by version or it could be a one way transform of your DB so back up your test DB before. Also, happens during deployment so be on the lookout for any issues. + First, check the numbering in the filename of the script. It must be in line with the rules defined at {ref}`create-sql-script`. If the number is out of date (very common for older pull requests), do not merge and ask the developer to rename the script. Otherwise, deployment will fail. + + Once you're sure the numbering is ok (the next available number, basically), back up your database and proceeed with testing. 1. Validate the documentation. @@ -94,4 +96,4 @@ 1. Delete merged branch - Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions. \ No newline at end of file + Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions. From 5292682d6724e1b24cb4001768ce82d97d8dc771 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 24 Jan 2024 12:05:09 -0500 Subject: [PATCH 493/546] fix for #10251 - sync terms popup required code --- .../harvard/iq/dataverse/util/FileUtil.java | 30 +++---------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 776d04e98cc..8decf74fe13 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -1199,34 +1199,12 @@ public static boolean isGuestbookPopupRequired(DatasetVersion datasetVersion) { } public static boolean isTermsPopupRequired(DatasetVersion datasetVersion) { - - if (datasetVersion == null) { - logger.fine("TermsPopup not required because datasetVersion is null."); - return false; - } - //0. if version is draft then Popup "not required" - if (!datasetVersion.isReleased()) { - logger.fine("TermsPopup not required because datasetVersion has not been released."); + Boolean answer = popupDueToStateOrTerms(datasetVersion); + if(answer == null) { + logger.fine("TermsPopup is not required."); return false; } - // 1. License and Terms of Use: - if (datasetVersion.getTermsOfUseAndAccess() != null) { - if (!License.CC0.equals(datasetVersion.getTermsOfUseAndAccess().getLicense()) - && !(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse() == null - || datasetVersion.getTermsOfUseAndAccess().getTermsOfUse().equals(""))) { - logger.fine("TermsPopup required because of license or terms of use."); - return true; - } - - // 2. Terms of Access: - if (!(datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess() == null) && !datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess().equals("")) { - logger.fine("TermsPopup required because of terms of access."); - return true; - } - } - - logger.fine("TermsPopup is not required."); - return false; + return answer; } /** From 51984163525453b7360dd0b89db8746b8d55c031 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 24 Jan 2024 13:04:33 -0500 Subject: [PATCH 494/546] fix null issue found in #10251 --- .../java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index ca3f5b4bded..de3f4d2ab56 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -316,7 +316,7 @@ private void redirectToDownloadAPI(String downloadType, Long fileId, boolean gue Long fileMetadataId) { String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, fileMetadataId); - if (downloadType.equals("GlobusTransfer")) { + if ("GlobusTransfer".equals(downloadType)) { PrimeFaces.current().executeScript(URLTokenUtil.getScriptForUrl(fileDownloadUrl)); } else { logger.fine("Redirecting to file download url: " + fileDownloadUrl); From 96f2c95a26f6bf9d153a0b95f6cea7bdac7bd4ea Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 24 Jan 2024 14:40:12 -0500 Subject: [PATCH 495/546] minor tweaks #10101 --- .../source/developers/making-releases.rst | 2 ++ doc/sphinx-guides/source/qa/overview.md | 12 +++---- .../source/qa/performance-tests.md | 6 ++-- doc/sphinx-guides/source/qa/qa-workflow.md | 14 ++++---- .../source/qa/test-automation.md | 35 ++++++++++--------- .../source/qa/testing-approach.md | 14 ++++---- .../source/qa/testing-infrastructure.md | 4 +-- 7 files changed, 45 insertions(+), 42 deletions(-) diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index 6b94282d55e..18ae34ee656 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -83,6 +83,8 @@ To test these images against our API test suite, go to the "alpha" workflow at h If there are failures, additional dependencies or settings may have been added to the "develop" workflow. Copy them over and try again. +.. _build-guides: + Build the Guides for the Release -------------------------------- diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md index 01ab629db8c..f8eb7b19297 100644 --- a/doc/sphinx-guides/source/qa/overview.md +++ b/doc/sphinx-guides/source/qa/overview.md @@ -15,17 +15,17 @@ The basic workflow is as follows. Bugs or feature requests are submitted to GitH Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements. -Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Done column and the process repeats with the next PR until it is decided to {doc}`make a release `. +Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop branch, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Merged column (where it might be discussed at the next standup) and the process repeats with the next PR until it is decided to {doc}`make a release `. ## Tips and Tricks - Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing. - When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice. -- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure. -- When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done. -- When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked. +- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you interact with the application and makes it easier to identify any stack trace on failure. +- When overloaded, QA the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done. +- When testing a bug fix, try reproducing the bug on the demo server before testing the fix. That way you know you are taking the correct steps to verify that the fix worked. - When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it. -- Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup. +- Back up your DB before applying an irreversible DB update when you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup. ## Release Cadence and Sprints @@ -41,4 +41,4 @@ This type of approach is often used to give contributing developers confidence t ## Making a Release -See {doc}`/developers/making-releases` in the Developer Guide. \ No newline at end of file +See {doc}`/developers/making-releases` in the Developer Guide. diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md index 3fab0386eb0..404188735a2 100644 --- a/doc/sphinx-guides/source/qa/performance-tests.md +++ b/doc/sphinx-guides/source/qa/performance-tests.md @@ -7,7 +7,7 @@ ## Introduction -The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named {ref}`Locust `, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. +The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming, it is done once near the end. Using a load-generating tool named {ref}`Locust `, our scripts load the statistically most-loaded pages (according to Google Analytics): 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds, it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product, and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page. @@ -19,11 +19,11 @@ Once the performance has been tested and recorded in a [Google spreadsheet](http ## Access -Access to performance cluster instances requires ssh keys. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. +Access to performance cluster instances requires ssh keys. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. ## Special Notes âš ï¸ -Please note the performance database is also used occasionally by Julian and the Curation team to generate prod reports so a courtesy check with Julian would be good before taking over the env. +Please note the performance database is also used occasionally by members of the Curation team to generate prod reports so a courtesy check with them would be good before taking over the env. Executing the Performance Script diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md index cb047a3086a..3db17ecb8a4 100644 --- a/doc/sphinx-guides/source/qa/qa-workflow.md +++ b/doc/sphinx-guides/source/qa/qa-workflow.md @@ -23,9 +23,9 @@ Small changes or fixes usually don’t have docs but new features or extensions of a feature or new configuration options should have documentation. -1. Does it have or need release notes? +1. Does it have or need a release note snippet? - Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed. + Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed. See also {ref}`writing-release-note-snippets` for what to expect in a release note snippet. 1. Does it include a database migration script (Flyway)? @@ -35,7 +35,7 @@ 1. Validate the documentation. - Build the doc using Jenkins, does it build without errors? + Build the doc using Jenkins or read the automated Read the Docs preview. Does it build without errors? Read it through for sense. Use it for test cases and to understand the feature. @@ -88,11 +88,11 @@ Click the "Merge pull request" button and be sure to use the "Create a merge commit" option to include this PR into the common develop branch. - Some of the reasons why we encourage using option over Rebase or Squash are: + Some of the reasons why we encourage using this option over Rebase or Squash are: - -Preserving commit history - -Clearer context and treaceability - -Easier collaboration, bug tracking and reverting + - Preservation of commit history + - Clearer context and treaceability + - Easier collaboration, bug tracking and reverting 1. Delete merged branch diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md index c996b4cea8f..e4b3b12ec43 100644 --- a/doc/sphinx-guides/source/qa/test-automation.md +++ b/doc/sphinx-guides/source/qa/test-automation.md @@ -4,7 +4,7 @@ :depth: 3 ``` -## Introduction +## Jenkins Jenkins is our primary tool for knowing if our API tests are passing. (Unit tests are executed locally by developers.) @@ -12,28 +12,27 @@ You can find our Jenkins installation at . Please note that while it has been open to the public in the past, it is currently firewalled off. We can poke a hole in the firewall for your IP address if necessary. Please get in touch. (You might also be interested in which is about restoring the ability of contributors to see if their pull requests are passing API tests or not.) -## Jobs +### Jenkins Jobs Jenkins is organized into jobs. We'll highlight a few. -### IQSS-dataverse-develop +#### IQSS-dataverse-develop -, which we will refer to as the "develop" job runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it! +, which we will refer to as the "develop" job, runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it! -You can get to this job from the README at . +You can access this job from the README at . -### IQSS-Dataverse-Develop-PR +#### IQSS-Dataverse-Develop-PR can be thought of as "PR jobs". It's a collection of jobs run on pull requests. Typically, you will navigate directly into the job (and it's particular build number) from a pull request. For example, from , look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like (build #10). -### guides.dataverse.org +#### guides.dataverse.org - is what we use to build guides. See {doc}`/developers/making-releases` in the Developer Guide. + is what we use to build guides. See {ref}`build-guides` in the Developer Guide for how this job is used at release time. -### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal +#### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal - -1. Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. +1. Go to the QA column on our [project board](https://github.com/orgs/IQSS/projects/34), and select a pull request to test. 1. From the pull request page, click the copy icon next to the pull request branch name. @@ -50,15 +49,13 @@ You can get to this job from the README at . 1. Once complete, go to and check that the deployment succeeded, and that the homepage displays the latest build number. -1. If for some reason it didn’t deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara5/bin/asadmin list-applications; /usr/local/payara5/bin/asadmin undeploy dataverse-5.11.1; /usr/local/payara5/bin/asadmin deploy /tmp/dataverse-5.11.1.war`) +1. If for some reason it didn't deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara6/bin/asadmin list-applications; /usr/local/payara6/bin/asadmin undeploy dataverse-6.1; /usr/local/payara6/bin/asadmin deploy /tmp/dataverse-6.1.war`) -1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. +1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. In the case of a collision, ask the developer to rename the script. 1. Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting. - - -## Checking if API Tests are Passing +### Checking if API Tests are Passing on Jenkins If API tests are failing, you should not merge the pull request. @@ -70,7 +67,7 @@ How can you know if API tests are passing? Here are the steps, by way of example - Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run. - Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error. -## Diagnosing Failures +### Diagnosing Failures on Jenkins API test failures can have multiple causes. As described above, from the "Test Result" page, you might see the failure under "All Failed Tests". However, the test could have failed because of some underlying system issue. @@ -84,3 +81,7 @@ fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "el ``` In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it! + +## GitHub Actions + +We also use GitHub Actions. See for a list of actions. diff --git a/doc/sphinx-guides/source/qa/testing-approach.md b/doc/sphinx-guides/source/qa/testing-approach.md index 2c7241999a8..817161d02a0 100644 --- a/doc/sphinx-guides/source/qa/testing-approach.md +++ b/doc/sphinx-guides/source/qa/testing-approach.md @@ -8,25 +8,25 @@ We use a risk-based, manual testing approach to achieve the most benefit with limited resources. This means we want to catch bugs where they are likely to exist, ensure core functions work, and failures do not have catastrophic results. In practice this means we do a brief positive check of core functions on each build called a smoke test, we test the most likely place for new bugs to exist, the area where things have changed, and attempt to prevent catastrophic failure by asking about the scope and reach of the code and how failures may occur. -If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by the Design group and by the community. +If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by a Design group (in the past) and by the community. ## Examining a Pull Request for Test Cases ### What Problem Does It Solve? -Read the top part of the pull request for a description, notes for reviewers, and usually a "how to test" section. Does it make sense? If not, read the underlying issue it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it. +Read the top part of the pull request for a description, notes for reviewers, and usually a "how to test" section. Does it make sense? If not, read the underlying issue it closes and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it. ### How is It Configured? -Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. A sysadmin or superuser will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! +Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. A sysadmin or superuser will need to follow these instructions so make sure they are in the release note snippet and try them out. Plus, that is the only way you will get it working to test it! -Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields are added, try the export feature. Of course, try the suggestions under "how to test." Those may be sufficient, but you should always think about the pull request based on what it does. +Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing, or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields have been added, try the export feature. Of course, try the suggestions under "how to test." Those may be sufficient, but you should always think about the pull request based on what it does. -Try adding, modifying, and deleting any objects involved. This is probably covered by using the feature but a good basic approach to keep in mind. +Try adding, modifying, and deleting any objects involved. This is probably covered by using the feature, but this is a good basic approach to keep in mind. Make sure any server logging is appropriate. You should tail the server log while running your tests. Watch for unreported errors or stack traces especially chatty logging. If you do find a bug you will need to report the stack trace from the server.log. Err on the side of providing the developer too much of server.log rather than too little. -Exercise the UI if there is one. We tend to use Chrome for most of my basic testing as it's used twice as much as the next most commonly used browser, according to our site's Google Analytics. First go through all the options in the UI. Then, if all works, spot-check using Firefox and Safari. +Exercise the UI if there is one. We tend to use Chrome for most of our basic testing as it's used twice as much as the next most commonly-used browser, according to our site's Google Analytics. First go through all the options in the UI. Then, if all works, spot-check using Firefox and Safari. Check permissions. Is this feature limited to a specific set of users? Can it be accessed by a guest or by a non-privileged user? How about pasting a privileged page URL into a non-privileged user’s browser? @@ -47,4 +47,4 @@ Think about risk. Is the feature or function part of a critical area such as per This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? -An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built. \ No newline at end of file +An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built. diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md index 7a4bda626fc..c099076c458 100644 --- a/doc/sphinx-guides/source/qa/testing-infrastructure.md +++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md @@ -7,11 +7,11 @@ ## Dataverse Internal -To build and test a PR, we use a build named `IQSS_Dataverse_Internal` on , which deploys the .war file to an AWS instance named . +To build and test a PR, we use a job called `IQSS_Dataverse_Internal` on (see {doc}`test-automation`), which deploys the .war file to an AWS instance named . ## Guides Server -There is also a guides build project named `guides.dataverse.org`. Any test builds of guides are deployed to a named directory on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. +There is also a guides job called `guides.dataverse.org` (see {doc}`test-automation`). Any test builds of guides are deployed to a named directory on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. Note that changes to guides can also be previewed on Read the Docs. In the pull request, look for a link like . This Read the Docs preview is also mentioned under also {doc}`/developers/documentation`. From d06ded15c9da2024f75250bcc8a25c363ae1cdc9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 24 Jan 2024 14:51:57 -0500 Subject: [PATCH 496/546] move "deploy to internal" out of "test automation" #10101 --- doc/sphinx-guides/source/qa/qa-workflow.md | 2 +- .../source/qa/test-automation.md | 25 ------------------ .../source/qa/testing-infrastructure.md | 26 +++++++++++++++++++ 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md index 3db17ecb8a4..4654a7456d2 100644 --- a/doc/sphinx-guides/source/qa/qa-workflow.md +++ b/doc/sphinx-guides/source/qa/qa-workflow.md @@ -41,7 +41,7 @@ 1. Build and deploy the pull request. - Normally this is done using Jenkins and automatically deployed to the QA test machine. + Normally this is done using Jenkins and automatically deployed to the QA test machine. See {ref}`deploy-to-internal`. 1. Configure if required diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md index e4b3b12ec43..708d0f88e23 100644 --- a/doc/sphinx-guides/source/qa/test-automation.md +++ b/doc/sphinx-guides/source/qa/test-automation.md @@ -30,31 +30,6 @@ You can access this job from the README at . is what we use to build guides. See {ref}`build-guides` in the Developer Guide for how this job is used at release time. -#### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal - -1. Go to the QA column on our [project board](https://github.com/orgs/IQSS/projects/34), and select a pull request to test. - -1. From the pull request page, click the copy icon next to the pull request branch name. - -1. Log on to , select the `IQSS_Dataverse_Internal` project, and configure the repository URL and branch specifier to match the ones from the pull request. For example: - - * 8372-gdcc-xoai-library has IQSS implied - - **Repository URL:** https://github.com/IQSS/dataverse.git - - **Branch specifier:** */8372-gdcc-xoai-library - * GlobalDataverseCommunityConsortium:GDCC/DC-3B - - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git - - **Branch specifier:** */GDCC/DC-3B. - -1. Click "Build Now" and note the build number in progress. - -1. Once complete, go to and check that the deployment succeeded, and that the homepage displays the latest build number. - -1. If for some reason it didn't deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara6/bin/asadmin list-applications; /usr/local/payara6/bin/asadmin undeploy dataverse-6.1; /usr/local/payara6/bin/asadmin deploy /tmp/dataverse-6.1.war`) - -1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. In the case of a collision, ask the developer to rename the script. - -1. Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting. - ### Checking if API Tests are Passing on Jenkins If API tests are failing, you should not merge the pull request. diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md index c099076c458..804e4c0afe6 100644 --- a/doc/sphinx-guides/source/qa/testing-infrastructure.md +++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md @@ -9,6 +9,32 @@ To build and test a PR, we use a job called `IQSS_Dataverse_Internal` on (see {doc}`test-automation`), which deploys the .war file to an AWS instance named . +(deploy-to-internal)= +### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal + +1. Go to the QA column on our [project board](https://github.com/orgs/IQSS/projects/34), and select a pull request to test. + +1. From the pull request page, click the copy icon next to the pull request branch name. + +1. Log on to , select the `IQSS_Dataverse_Internal` project, and configure the repository URL and branch specifier to match the ones from the pull request. For example: + + * 8372-gdcc-xoai-library has IQSS implied + - **Repository URL:** https://github.com/IQSS/dataverse.git + - **Branch specifier:** */8372-gdcc-xoai-library + * GlobalDataverseCommunityConsortium:GDCC/DC-3B + - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git + - **Branch specifier:** */GDCC/DC-3B. + +1. Click "Build Now" and note the build number in progress. + +1. Once complete, go to and check that the deployment succeeded, and that the homepage displays the latest build number. + +1. If for some reason it didn't deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara6/bin/asadmin list-applications; /usr/local/payara6/bin/asadmin undeploy dataverse-6.1; /usr/local/payara6/bin/asadmin deploy /tmp/dataverse-6.1.war`) + +1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. In the case of a collision, ask the developer to rename the script. + +1. Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting. + ## Guides Server There is also a guides job called `guides.dataverse.org` (see {doc}`test-automation`). Any test builds of guides are deployed to a named directory on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. From 5ffc0589c75fe2fcf2584050ae5a477ddce27e06 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 24 Jan 2024 15:06:42 -0500 Subject: [PATCH 497/546] move testing approaches just below overview #10101 --- doc/sphinx-guides/source/qa/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md index 937b352bccb..f16cd1d38fc 100644 --- a/doc/sphinx-guides/source/qa/index.md +++ b/doc/sphinx-guides/source/qa/index.md @@ -2,9 +2,9 @@ ```{toctree} overview.md +testing-approach.md testing-infrastructure.md qa-workflow.md -testing-approach.md test-automation.md performance-tests.md ``` From 61abe519a429be60616cd61a56df4ad4f4aa52dd Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 24 Jan 2024 15:12:01 -0500 Subject: [PATCH 498/546] minor edits #10101 --- doc/sphinx-guides/source/qa/overview.md | 2 ++ doc/sphinx-guides/source/qa/qa-workflow.md | 1 + 2 files changed, 3 insertions(+) diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md index f8eb7b19297..64796357831 100644 --- a/doc/sphinx-guides/source/qa/overview.md +++ b/doc/sphinx-guides/source/qa/overview.md @@ -17,6 +17,8 @@ Before a pull request is moved to QA, it must be reviewed by a member of the dev Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop branch, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Merged column (where it might be discussed at the next standup) and the process repeats with the next PR until it is decided to {doc}`make a release `. +The complete suggested workflow can be found at {doc}`qa-workflow`. + ## Tips and Tricks - Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing. diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md index 4654a7456d2..9915fe97d98 100644 --- a/doc/sphinx-guides/source/qa/qa-workflow.md +++ b/doc/sphinx-guides/source/qa/qa-workflow.md @@ -4,6 +4,7 @@ :local: :depth: 3 ``` +## Checklist 1. Assign the PR you are working on to yourself. From cad9e583732a568ff083999aba16941505a207f4 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 24 Jan 2024 15:20:17 -0500 Subject: [PATCH 499/546] add release note #10101 --- doc/release-notes/10101-qa-guide.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/10101-qa-guide.md diff --git a/doc/release-notes/10101-qa-guide.md b/doc/release-notes/10101-qa-guide.md new file mode 100644 index 00000000000..11fbd7df2c4 --- /dev/null +++ b/doc/release-notes/10101-qa-guide.md @@ -0,0 +1 @@ +A new QA Guide is intended mostly for the core development team but may be of interest to contributors. From 743dbbc6655fd9e8bcab9db7b9df71a2fa4758db Mon Sep 17 00:00:00 2001 From: beep Date: Thu, 25 Jan 2024 08:37:24 +0100 Subject: [PATCH 500/546] Update docker-compose-dev.yml Co-authored-by: Philip Durbin --- docker-compose-dev.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 76a4c8a745d..6eab84092ed 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -60,8 +60,8 @@ services: volumes: - ./docker-dev-volumes/app/data:/dv - ./docker-dev-volumes/app/secrets:/secrets - # Map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh - - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications + # Uncomment to map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh + # - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications # Uncomment for changes to xhtml to be deployed immediately (if supported your IDE or toolchain). # Replace 6.0 with the current version. # - ./target/dataverse-6.0:/opt/payara/deployments/dataverse From 9d124e760bba83b7baa46bb1f88ec453a6bf6e6a Mon Sep 17 00:00:00 2001 From: GPortas Date: Thu, 25 Jan 2024 11:51:12 +0000 Subject: [PATCH 501/546] Refactor: GetLatestPublishedDatasetVersionCommand --- ...tLatestPublishedDatasetVersionCommand.java | 50 +++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java index dd9a8112afe..9ba02ef750b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java @@ -17,7 +17,7 @@ public class GetLatestPublishedDatasetVersionCommand extends AbstractCommand { private final Dataset ds; private final boolean includeDeaccessioned; - private boolean checkPermsWhenDeaccessioned; + private final boolean checkPermsWhenDeaccessioned; public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) { this(aRequest, anAffectedDataset, false, false); @@ -31,37 +31,35 @@ public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Datase } /* - * This command depending on the requested parameters will return: - * - * If the user requested to include a deaccessioned dataset with the files, the command will return the deaccessioned version if the user has permissions to view the files. Otherwise, it will return null. - * If the user requested to include a deaccessioned dataset but did not request the files, the command will return the deaccessioned version. - * If the user did not request to include a deaccessioned dataset, the command will return the latest published version. - * - */ + * This command depending on the requested parameters will return: + * + * If the user requested to include a deaccessioned dataset with the files, the command will return the deaccessioned version if the user has permissions to view the files. Otherwise, it will return null. + * If the user requested to include a deaccessioned dataset but did not request the files, the command will return the deaccessioned version. + * If the user did not request to include a deaccessioned dataset, the command will return the latest published version. + * + */ @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - - DatasetVersion dsv = null; - - //We search of a released or deaccessioned version if it is requested. - for (DatasetVersion next : ds.getVersions()) { - if (next.isReleased() || (includeDeaccessioned && next.isDeaccessioned())){ - dsv = next; - break; - } + DatasetVersion dsVersionResult = getReleaseOrDeaccessionedVersion(); + if (dsVersionResult != null && userHasPermissionsOnDatasetVersion(dsVersionResult, checkPermsWhenDeaccessioned, ctxt, ds)) { + return dsVersionResult; } + return null; + } - //Checking permissions if the deaccessionedVersion was found and we are checking permissions because files were requested. - if(dsv != null && (dsv.isDeaccessioned() && checkPermsWhenDeaccessioned)){ - //If the user has no permissions we return null - if(!ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset)){ - dsv = null; + private DatasetVersion getReleaseOrDeaccessionedVersion() { + for (DatasetVersion dsVersion : ds.getVersions()) { + if (dsVersion.isReleased() || (includeDeaccessioned && dsVersion.isDeaccessioned())) { + return dsVersion; } } - - return dsv; + return null; } - - + private boolean userHasPermissionsOnDatasetVersion(DatasetVersion dsVersionResult, boolean checkPermsWhenDeaccessioned, CommandContext ctxt, Dataset ds) { + if (dsVersionResult.isDeaccessioned() && checkPermsWhenDeaccessioned) { + return ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset); + } + return true; + } } From e59907bf76553701c8d7ff16428a9cea9f132d96 Mon Sep 17 00:00:00 2001 From: GPortas Date: Thu, 25 Jan 2024 11:55:13 +0000 Subject: [PATCH 502/546] Refactor: method name --- .../command/impl/GetLatestPublishedDatasetVersionCommand.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java index 9ba02ef750b..0afcbe2d0bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java @@ -40,14 +40,14 @@ public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Datase */ @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - DatasetVersion dsVersionResult = getReleaseOrDeaccessionedVersion(); + DatasetVersion dsVersionResult = getReleaseOrDeaccessionedDatasetVersion(); if (dsVersionResult != null && userHasPermissionsOnDatasetVersion(dsVersionResult, checkPermsWhenDeaccessioned, ctxt, ds)) { return dsVersionResult; } return null; } - private DatasetVersion getReleaseOrDeaccessionedVersion() { + private DatasetVersion getReleaseOrDeaccessionedDatasetVersion() { for (DatasetVersion dsVersion : ds.getVersions()) { if (dsVersion.isReleased() || (includeDeaccessioned && dsVersion.isDeaccessioned())) { return dsVersion; From 252672ab68a52cd9b9d8e84b80ddb3f23df769b3 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 24 Jan 2024 14:44:52 -0500 Subject: [PATCH 503/546] Proposed fix in #10220 comments --- .../iq/dataverse/ThumbnailServiceWrapper.java | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index ae81a9326c4..7f56ce0cb27 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -5,11 +5,14 @@ */ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; - +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.util.SystemConfig; +import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.logging.Logger; @@ -170,17 +173,30 @@ public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean if (thumbnailFile == null) { - // We attempt to auto-select via the optimized, native query-based method + boolean hasDatasetLogo = false; + StorageIO storageIO = null; + try { + storageIO = DataAccess.getStorageIO(dataset); + if (!storageIO.isAuxObjectCached(DatasetUtil.datasetLogoFilenameFinal)) { + // If not, return null/use the default, otherwise pass the logo URL + hasDatasetLogo = true; + } + } catch (IOException ioex) { + logger.warning("getDatasetCardImageAsUrl(): Failed to initialize dataset StorageIO for " + + dataset.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); + } + // If no other logo we attempt to auto-select via the optimized, native + // query-based method // from the DatasetVersionService: - if (datasetVersionService.getThumbnailByVersionId(versionId) == null) { + if (!hasDatasetLogo && datasetVersionService.getThumbnailByVersionId(versionId) == null) { return null; } } - String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; logger.fine("getDatasetCardImageAsUrl: " + url); this.dvobjectThumbnailsMap.put(datasetId,url); return url; + } // it's the responsibility of the user - to make sure the search result From 2c989923fba155ef0fe56f46489c3eec77abb213 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 24 Jan 2024 17:10:43 -0500 Subject: [PATCH 504/546] reverse logic --- .../java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 7f56ce0cb27..b6ab23848e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -177,7 +177,7 @@ public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean StorageIO storageIO = null; try { storageIO = DataAccess.getStorageIO(dataset); - if (!storageIO.isAuxObjectCached(DatasetUtil.datasetLogoFilenameFinal)) { + if (storageIO.isAuxObjectCached(DatasetUtil.datasetLogoFilenameFinal)) { // If not, return null/use the default, otherwise pass the logo URL hasDatasetLogo = true; } From 77ba2932551c4a1015745ef2f911fbb5ff7c730d Mon Sep 17 00:00:00 2001 From: landreev Date: Thu, 25 Jan 2024 11:23:19 -0500 Subject: [PATCH 505/546] Revert "9686 move harvesting client" --- .../9686-move-harvesting-client-id.md | 1 - .../edu/harvard/iq/dataverse/Dataset.java | 14 ++++- .../iq/dataverse/DatasetServiceBean.java | 48 +++++++++++++++++ .../edu/harvard/iq/dataverse/DvObject.java | 17 ------ .../iq/dataverse/DvObjectServiceBean.java | 48 ----------------- .../api/imports/ImportServiceBean.java | 5 -- .../client/HarvestingClientServiceBean.java | 4 +- .../dataverse/metrics/MetricsServiceBean.java | 52 +++++++++---------- .../search/SearchIncludeFragment.java | 41 ++++++--------- ...6.1.0.2__9686-move-harvestingclient-id.sql | 14 ----- .../harvard/iq/dataverse/api/DatasetsIT.java | 2 - .../harvard/iq/dataverse/api/MetricsIT.java | 17 +++--- 12 files changed, 112 insertions(+), 151 deletions(-) delete mode 100644 doc/release-notes/9686-move-harvesting-client-id.md delete mode 100644 src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql diff --git a/doc/release-notes/9686-move-harvesting-client-id.md b/doc/release-notes/9686-move-harvesting-client-id.md deleted file mode 100644 index 110fcc6ca6e..00000000000 --- a/doc/release-notes/9686-move-harvesting-client-id.md +++ /dev/null @@ -1 +0,0 @@ -With this release the harvesting client id will be available for harvested files. A database update will copy the id to previously harvested files./ diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index e2788e6acc6..a2f560bc959 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -752,9 +752,21 @@ public void setDatasetExternalCitations(List datasetEx this.datasetExternalCitations = datasetExternalCitations; } + @ManyToOne + @JoinColumn(name="harvestingClient_id") + private HarvestingClient harvestedFrom; - + public HarvestingClient getHarvestedFrom() { + return this.harvestedFrom; + } + public void setHarvestedFrom(HarvestingClient harvestingClientConfig) { + this.harvestedFrom = harvestingClientConfig; + } + + public boolean isHarvested() { + return this.harvestedFrom != null; + } private String harvestIdentifier; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 4c4aafdd1ec..c6df2a2e1ab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -583,6 +583,54 @@ public Long getDatasetVersionCardImage(Long versionId, User user) { return null; } + /** + * Used to identify and properly display Harvested objects on the dataverse page. + * + * @param datasetIds + * @return + */ + public Map getArchiveDescriptionsForHarvestedDatasets(Set datasetIds){ + if (datasetIds == null || datasetIds.size() < 1) { + return null; + } + + String datasetIdStr = StringUtils.join(datasetIds, ", "); + + String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" + datasetIdStr + ")"; + List searchResults; + + try { + searchResults = em.createNativeQuery(qstr).getResultList(); + } catch (Exception ex) { + searchResults = null; + } + + if (searchResults == null) { + return null; + } + + Map ret = new HashMap<>(); + + for (Object[] result : searchResults) { + Long dsId; + if (result[0] != null) { + try { + dsId = (Long)result[0]; + } catch (Exception ex) { + dsId = null; + } + if (dsId == null) { + continue; + } + + ret.put(dsId, (String)result[1]); + } + } + + return ret; + } + + public boolean isDatasetCardImageAvailable(DatasetVersion datasetVersion, User user) { if (datasetVersion == null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 46955f52878..cc5d7620969 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.storageuse.StorageQuota; @@ -372,22 +371,6 @@ public GlobalId getGlobalId() { return globalId; } - @ManyToOne - @JoinColumn(name="harvestingClient_id") - private HarvestingClient harvestedFrom; - - public HarvestingClient getHarvestedFrom() { - return this.harvestedFrom; - } - - public void setHarvestedFrom(HarvestingClient harvestingClientConfig) { - this.harvestedFrom = harvestingClientConfig; - } - - public boolean isHarvested() { - return this.harvestedFrom != null; - } - public abstract T accept(Visitor v); @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index 58a246b364a..d4219c36149 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -383,54 +383,6 @@ public Map getObjectPathsByIds(Set objectIds){ return ret; } - /** - * Used to identify and properly display Harvested objects on the dataverse page. - * - * @param dvObjectIds - * @return - */ - public Map getArchiveDescriptionsForHarvestedDvObjects(Set dvObjectIds){ - - if (dvObjectIds == null || dvObjectIds.size() < 1) { - return null; - } - - String dvObjectIsString = StringUtils.join(dvObjectIds, ", "); - String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, DvObject d WHERE d.harvestingClient_id = h.id AND d.id IN (" + dvObjectIsString + ")"; - List searchResults; - - try { - searchResults = em.createNativeQuery(qstr).getResultList(); - } catch (Exception ex) { - searchResults = null; - } - - if (searchResults == null) { - return null; - } - - Map ret = new HashMap<>(); - - for (Object[] result : searchResults) { - Long dvObjId; - if (result[0] != null) { - try { - Integer castResult = (Integer) result[0]; - dvObjId = Long.valueOf(castResult); - } catch (Exception ex) { - dvObjId = null; - } - if (dvObjId == null) { - continue; - } - ret.put(dvObjId, (String)result[1]); - } - } - - return ret; - } - - public String generateNewIdentifierByStoredProcedure() { StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure"); query.execute(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index c5812403f31..c17ba909230 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -332,11 +332,6 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId().asString()); - //adding the harvesting client id to harvested files #9686 - for (DataFile df : ds.getFiles()){ - df.setHarvestedFrom(harvestingClient); - } - if (existingDs != null) { // If this dataset already exists IN ANOTHER DATAVERSE // we are just going to skip it! diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java index 5747c64d217..7ec6d75a41c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -199,8 +199,8 @@ public void recordHarvestJobStatus(Long hcId, Date finishTime, int harvestedCoun public Long getNumberOfHarvestedDatasetsByAllClients() { try { - return (Long) em.createNativeQuery("SELECT count(d.id) FROM dvobject d " - + " WHERE d.harvestingclient_id IS NOT NULL and d.dtype = 'Dataset'").getSingleResult(); + return (Long) em.createNativeQuery("SELECT count(d.id) FROM dataset d " + + " WHERE d.harvestingclient_id IS NOT NULL").getSingleResult(); } catch (Exception ex) { logger.info("Warning: exception looking up the total number of harvested datasets: " + ex.getMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 9ae0c7cbb8f..1b5619c53e0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat + "from datasetversion\n" + "where versionstate='RELEASED' \n" + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n") - + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "") - + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n " : "") + + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "") + + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n " : "") + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n") + "group by dataset_id) as subq group by subq.date order by date;" @@ -156,11 +156,11 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat * @param d */ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { - String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -189,7 +189,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + "join dvobject on dvobject.id = dataset.id\n" + + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + "and \n" @@ -198,6 +198,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { +") sub_temp" ); logger.log(Level.FINE, "Metric query: {0}", query); + return (long) query.getSingleResult(); } @@ -206,17 +207,16 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // A published local datasets may have more than one released version! // So that's why we have to jump through some extra hoops below // in order to select the latest one: - String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" - + "(\n" - + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" - + " from datasetversion\n" - + " join dataset on dataset.id = datasetversion.dataset_id\n" - + " join dvobject on dataset.id = dvobject.id\n" - + " where versionstate='RELEASED'\n" - + " and dvobject.harvestingclient_id is null" - + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" - + " group by dataset_id\n" - + "))\n"; + String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" + + "(\n" + + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + + " from datasetversion\n" + + " join dataset on dataset.id = datasetversion.dataset_id\n" + + " where versionstate='RELEASED'\n" + + " and dataset.harvestingclient_id is null\n" + + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + + " group by dataset_id\n" + + "))\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated @@ -225,7 +225,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // so the query is simpler: String harvestOriginClause = "(\n" + " datasetversion.dataset_id = dataset.id\n" + - " AND dvobject.harvestingclient_id IS NOT null \n" + + " AND dataset.harvestingclient_id IS NOT null \n" + " AND date_trunc('month', datasetversion.createtime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + ")\n"; @@ -244,7 +244,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n" + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n" + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n" - + "JOIN dvobject ON dvobject.id = dataset.id\n" + + ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n") + "WHERE\n" + originClause + "AND datasetfieldtype.name = 'subject'\n" @@ -258,11 +258,11 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio } public long datasetsPastDays(int days, String dataLocation, Dataverse d) { - String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -276,7 +276,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + "join dvobject on dvobject.id = dataset.id\n" + + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and \n" @@ -304,7 +304,7 @@ public JsonArray filesTimeSeries(Dataverse d) { + "where datasetversion.id=filemetadata.datasetversion_id\n" + "and versionstate='RELEASED' \n" + "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n" - + "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " + + "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " + ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ") + "group by filemetadata.id) as subq group by subq.date order by date;"); logger.log(Level.FINE, "Metric query: {0}", query); @@ -327,11 +327,11 @@ public long filesToMonth(String yyyymm, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + "join dvobject on dvobject.id = dataset.id\n" + + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + "where versionstate='RELEASED'\n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" - + "and dvobject.harvestingclient_id is null\n" + + "and dataset.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); @@ -350,11 +350,11 @@ public long filesPastDays(int days, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + "join dvobject on dvobject.id = dataset.id\n" + + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + "where versionstate='RELEASED'\n" + "and releasetime > current_date - interval '" + days + "' day\n" + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") - + "and dvobject.harvestingclient_id is null\n" + + "and dataset.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 939b39b94ef..5a5d8781726 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -1367,7 +1367,6 @@ public boolean canPublishDataset(Long datasetId){ public void setDisplayCardValues() { Set harvestedDatasetIds = null; - Set harvestedFileIds = null; for (SolrSearchResult result : searchResultsList) { //logger.info("checking DisplayImage for the search result " + i++); if (result.getType().equals("dataverses")) { @@ -1393,10 +1392,10 @@ public void setDisplayCardValues() { } else if (result.getType().equals("files")) { result.setImageUrl(thumbnailServiceWrapper.getFileCardImageAsBase64Url(result)); if (result.isHarvested()) { - if (harvestedFileIds == null) { - harvestedFileIds = new HashSet<>(); + if (harvestedDatasetIds == null) { + harvestedDatasetIds = new HashSet<>(); } - harvestedFileIds.add(result.getEntityId()); + harvestedDatasetIds.add(result.getParentIdAsLong()); } } } @@ -1408,35 +1407,25 @@ public void setDisplayCardValues() { // SQL query: if (harvestedDatasetIds != null) { - Map descriptionsForHarvestedDatasets = dvObjectService.getArchiveDescriptionsForHarvestedDvObjects(harvestedDatasetIds); - if (descriptionsForHarvestedDatasets != null && !descriptionsForHarvestedDatasets.isEmpty()) { + Map descriptionsForHarvestedDatasets = datasetService.getArchiveDescriptionsForHarvestedDatasets(harvestedDatasetIds); + if (descriptionsForHarvestedDatasets != null && descriptionsForHarvestedDatasets.size() > 0) { for (SolrSearchResult result : searchResultsList) { - if (result.isHarvested() && result.getType().equals("datasets") && descriptionsForHarvestedDatasets.containsKey(result.getEntityId())) { - result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getEntityId())); + if (result.isHarvested()) { + if (result.getType().equals("files")) { + if (descriptionsForHarvestedDatasets.containsKey(result.getParentIdAsLong())) { + result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getParentIdAsLong())); + } + } else if (result.getType().equals("datasets")) { + if (descriptionsForHarvestedDatasets.containsKey(result.getEntityId())) { + result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getEntityId())); + } + } } } } descriptionsForHarvestedDatasets = null; harvestedDatasetIds = null; } - - if (harvestedFileIds != null) { - - Map descriptionsForHarvestedFiles = dvObjectService.getArchiveDescriptionsForHarvestedDvObjects(harvestedFileIds); - if (descriptionsForHarvestedFiles != null && !descriptionsForHarvestedFiles.isEmpty()) { - for (SolrSearchResult result : searchResultsList) { - if (result.isHarvested() && result.getType().equals("files") && descriptionsForHarvestedFiles.containsKey(result.getEntityId())) { - - result.setHarvestingDescription(descriptionsForHarvestedFiles.get(result.getEntityId())); - - } - } - } - descriptionsForHarvestedFiles = null; - harvestedDatasetIds = null; - - } - // determine which of the objects are linked: diff --git a/src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql deleted file mode 100644 index 67ba026745f..00000000000 --- a/src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql +++ /dev/null @@ -1,14 +0,0 @@ -ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS harvestingclient_id BIGINT; - ---add harvesting client id to dvobject records of harvested datasets -update dvobject dvo set harvestingclient_id = s.harvestingclient_id from -(select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s -where s.id = dvo.id; - ---add harvesting client id to dvobject records of harvested files -update dvobject dvo set harvestingclient_id = s.harvestingclient_id from -(select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s -where s.id = dvo.owner_id; - -ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id; - diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 087db4858b2..9b51be4b365 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -2548,8 +2548,6 @@ public void testLinkingDatasets() { EntityManager entityManager = entityManagerFactory.createEntityManager(); entityManager.getTransaction().begin(); // Do stuff... - //SEK 01/22/2024 - as of 6.2 harvestingclient_id will be on the dv object table - // so if this is ever implemented change will probably need to happen in the updatequery below entityManager.createNativeQuery("UPDATE dataset SET harvestingclient_id=1 WHERE id="+datasetId2).executeUpdate(); entityManager.getTransaction().commit(); entityManager.close(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java index 1425b7bc5d9..e3328eefb4a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java @@ -5,8 +5,6 @@ import edu.harvard.iq.dataverse.metrics.MetricsUtil; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; import static jakarta.ws.rs.core.Response.Status.OK; -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -18,13 +16,10 @@ //To improve these tests we should try adding data and see if the number DOESN'T //go up to show that the caching worked public class MetricsIT { - - private static String yyyymm; @BeforeAll public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); - yyyymm = LocalDate.now().format(DateTimeFormatter.ofPattern(MetricsUtil.YEAR_AND_MONTH_PATTERN)); UtilIT.clearMetricCache(); } @@ -35,7 +30,8 @@ public static void cleanUpClass() { @Test public void testGetDataversesToMonth() { - + String yyyymm = "2018-04"; +// yyyymm = null; Response response = UtilIT.metricsDataversesToMonth(yyyymm, null); String precache = response.prettyPrint(); response.then().assertThat() @@ -58,7 +54,8 @@ public void testGetDataversesToMonth() { @Test public void testGetDatasetsToMonth() { - + String yyyymm = "2018-04"; +// yyyymm = null; Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null); String precache = response.prettyPrint(); response.then().assertThat() @@ -80,7 +77,8 @@ public void testGetDatasetsToMonth() { @Test public void testGetFilesToMonth() { - + String yyyymm = "2018-04"; +// yyyymm = null; Response response = UtilIT.metricsFilesToMonth(yyyymm, null); String precache = response.prettyPrint(); response.then().assertThat() @@ -102,7 +100,8 @@ public void testGetFilesToMonth() { @Test public void testGetDownloadsToMonth() { - + String yyyymm = "2018-04"; +// yyyymm = null; Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null); String precache = response.prettyPrint(); response.then().assertThat() From 994cf18e5c91245404830ef7e03d682c68a43538 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 25 Jan 2024 16:34:16 -0500 Subject: [PATCH 506/546] add "running Dataverse in docker", other cleanup #10238 --- doc/sphinx-guides/source/container/index.rst | 20 ++------------ doc/sphinx-guides/source/container/intro.rst | 26 ++++++++++++++++++ .../source/container/running/backend-dev.rst | 7 +++++ .../source/container/running/demo.rst | 27 +++++++++++++++++++ .../source/container/running/frontend-dev.rst | 7 +++++ .../source/container/running/index.rst | 12 +++++++++ .../container/running/metadata-blocks.rst | 9 +++++++ .../source/container/running/production.rst | 11 ++++++++ docker/compose/demo/compose.yml | 0 9 files changed, 101 insertions(+), 18 deletions(-) create mode 100644 doc/sphinx-guides/source/container/intro.rst create mode 100644 doc/sphinx-guides/source/container/running/backend-dev.rst create mode 100644 doc/sphinx-guides/source/container/running/demo.rst create mode 100644 doc/sphinx-guides/source/container/running/frontend-dev.rst create mode 100755 doc/sphinx-guides/source/container/running/index.rst create mode 100644 doc/sphinx-guides/source/container/running/metadata-blocks.rst create mode 100644 doc/sphinx-guides/source/container/running/production.rst create mode 100644 docker/compose/demo/compose.yml diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst index 4bbc87a4845..abf871dd340 100644 --- a/doc/sphinx-guides/source/container/index.rst +++ b/doc/sphinx-guides/source/container/index.rst @@ -1,28 +1,12 @@ Container Guide =============== -Running the Dataverse software in containers is quite different than in a :doc:`standard installation <../installation/prep>`. - -Both approaches have pros and cons. These days, containers are very often used for development and testing, -but there is an ever rising move toward running applications in the cloud using container technology. - -**NOTE:** -**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running a standard, non-containerized installation, -container support described in this guide is mostly created and maintained by the Dataverse community on a best-effort -basis.** - -This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other -solutions to run containers in production. There is the `Dataverse on K8s project `_ for this -purpose, as mentioned in the :doc:`/developers/containers` section of the Developer Guide. - -This guide focuses on describing the container images managed from the main Dataverse repository (again: by the -community, not IQSS), their features and limitations. Instructions on how to build the images yourself and how to -develop and extend them further are provided. - **Contents:** .. toctree:: + intro + running/index dev-usage base-image app-image diff --git a/doc/sphinx-guides/source/container/intro.rst b/doc/sphinx-guides/source/container/intro.rst new file mode 100644 index 00000000000..94b2c99f0d1 --- /dev/null +++ b/doc/sphinx-guides/source/container/intro.rst @@ -0,0 +1,26 @@ +Introduction +============ + +Dataverse in containers! + +.. contents:: |toctitle| + :local: + +Intended Audience +----------------- + +This guide is intended for anyone who wants to run Dataverse in containers. This is potentially a wide audience, from sysadmins interested in running Dataverse in production in containers (not recommended yet) to contributors working on a bug fix (encouraged!). + +.. _getting-help-containers: + +Getting Help +------------ + +Please ask in #containers at https://chat.dataverse.org + +.. _helping-containers: + +Helping with the Containerization Effort +---------------------------------------- + +In 2023 the Containerization Working Group started meeting regularly. All are welcome to join! We talk in #containers at https://chat.dataverse.org and have a regular video call. For details, please visit https://ct.gdcc.io diff --git a/doc/sphinx-guides/source/container/running/backend-dev.rst b/doc/sphinx-guides/source/container/running/backend-dev.rst new file mode 100644 index 00000000000..45aa4450bfb --- /dev/null +++ b/doc/sphinx-guides/source/container/running/backend-dev.rst @@ -0,0 +1,7 @@ +Backend Development +=================== + +.. contents:: |toctitle| + :local: + +See :doc:`../dev-usage`. diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst new file mode 100644 index 00000000000..71e45f5028e --- /dev/null +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -0,0 +1,27 @@ +Demo or Evaluation +================== + +If you would like to demo or evaluate Dataverse running in containers, you're in the right place. + +.. contents:: |toctitle| + :local: + +Hardware Requirements +--------------------- + +- 8 GB RAM + +Software Requirements +--------------------- + +- Mac, Linux, or Windows (experimental) +- Docker + +Windows support is experimental but we are very interested in supporting Windows better. Please report bugs and see :ref:`helping-containers`. + +Steps +----- + +- Download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` +- Run ``docker compose up`` in the directory where you put ``compose.yml`` + diff --git a/doc/sphinx-guides/source/container/running/frontend-dev.rst b/doc/sphinx-guides/source/container/running/frontend-dev.rst new file mode 100644 index 00000000000..1f57d4531ba --- /dev/null +++ b/doc/sphinx-guides/source/container/running/frontend-dev.rst @@ -0,0 +1,7 @@ +Frontend Development +==================== + +.. contents:: |toctitle| + :local: + +https://github.com/IQSS/dataverse-frontend includes docs and scripts for running Dataverse in Docker for frontend development. diff --git a/doc/sphinx-guides/source/container/running/index.rst b/doc/sphinx-guides/source/container/running/index.rst new file mode 100755 index 00000000000..8d17b105eb4 --- /dev/null +++ b/doc/sphinx-guides/source/container/running/index.rst @@ -0,0 +1,12 @@ +Running Dataverse in Docker +=========================== + +Contents: + +.. toctree:: + + production + demo + metadata-blocks + frontend-dev + backend-dev diff --git a/doc/sphinx-guides/source/container/running/metadata-blocks.rst b/doc/sphinx-guides/source/container/running/metadata-blocks.rst new file mode 100644 index 00000000000..4794f29ab42 --- /dev/null +++ b/doc/sphinx-guides/source/container/running/metadata-blocks.rst @@ -0,0 +1,9 @@ +Editing Metadata Blocks +======================= + +.. contents:: |toctitle| + :local: + +The Admin Guide has a section on :doc:`/admin/metadatacustomization` and suggests running Dataverse in containers (Docker) for this purpose. + +This is certainly possible but the specifics have not yet been written. Until then, please see :doc:`demo`, which should also provide a suitable environment. diff --git a/doc/sphinx-guides/source/container/running/production.rst b/doc/sphinx-guides/source/container/running/production.rst new file mode 100644 index 00000000000..89e63ff5ab1 --- /dev/null +++ b/doc/sphinx-guides/source/container/running/production.rst @@ -0,0 +1,11 @@ +Production (Future) +=================== + +.. contents:: |toctitle| + :local: + +The images described in this guide not yet recommended for production usage. + +You can help the effort to support these images in production by trying them out and giving feedback (see :ref:`helping-containers`). + +For now, please follow :doc:`demo`. diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml new file mode 100644 index 00000000000..e69de29bb2d From fb58d895edac32744cae7b164d7ae9f1121dba94 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 26 Jan 2024 10:58:07 -0500 Subject: [PATCH 507/546] tweaks and more use cases #10238 --- doc/sphinx-guides/source/container/intro.rst | 2 +- .../source/container/running/backend-dev.rst | 3 +++ .../source/container/running/demo.rst | 4 ++-- .../source/container/running/frontend-dev.rst | 5 ++++- .../source/container/running/github-action.rst | 18 ++++++++++++++++++ .../source/container/running/index.rst | 1 + .../container/running/metadata-blocks.rst | 8 +++++++- .../source/container/running/production.rst | 15 ++++++++++++--- 8 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 doc/sphinx-guides/source/container/running/github-action.rst diff --git a/doc/sphinx-guides/source/container/intro.rst b/doc/sphinx-guides/source/container/intro.rst index 94b2c99f0d1..42b095f3158 100644 --- a/doc/sphinx-guides/source/container/intro.rst +++ b/doc/sphinx-guides/source/container/intro.rst @@ -9,7 +9,7 @@ Dataverse in containers! Intended Audience ----------------- -This guide is intended for anyone who wants to run Dataverse in containers. This is potentially a wide audience, from sysadmins interested in running Dataverse in production in containers (not recommended yet) to contributors working on a bug fix (encouraged!). +This guide is intended for anyone who wants to run Dataverse in containers. This is potentially a wide audience, from sysadmins interested in running Dataverse in production in containers (not recommended yet) to contributors working on a bug fix (encouraged!). See :doc:`running/index` for various scenarios and please let us know if your use case is not covered. .. _getting-help-containers: diff --git a/doc/sphinx-guides/source/container/running/backend-dev.rst b/doc/sphinx-guides/source/container/running/backend-dev.rst index 45aa4450bfb..8b2dab956ad 100644 --- a/doc/sphinx-guides/source/container/running/backend-dev.rst +++ b/doc/sphinx-guides/source/container/running/backend-dev.rst @@ -4,4 +4,7 @@ Backend Development .. contents:: |toctitle| :local: +Intro +----- + See :doc:`../dev-usage`. diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst index 71e45f5028e..8db8cfb2a9c 100644 --- a/doc/sphinx-guides/source/container/running/demo.rst +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -1,7 +1,7 @@ Demo or Evaluation ================== -If you would like to demo or evaluate Dataverse running in containers, you're in the right place. +If you would like to demo or evaluate Dataverse running in containers, you're in the right place. Your feedback is extremely valuable to us! To let us know what you think, pease see :ref:`helping-containers`. .. contents:: |toctitle| :local: @@ -17,7 +17,7 @@ Software Requirements - Mac, Linux, or Windows (experimental) - Docker -Windows support is experimental but we are very interested in supporting Windows better. Please report bugs and see :ref:`helping-containers`. +Windows support is experimental but we are very interested in supporting Windows better. Please report bugs (see :ref:`helping-containers`). Steps ----- diff --git a/doc/sphinx-guides/source/container/running/frontend-dev.rst b/doc/sphinx-guides/source/container/running/frontend-dev.rst index 1f57d4531ba..88d40c12053 100644 --- a/doc/sphinx-guides/source/container/running/frontend-dev.rst +++ b/doc/sphinx-guides/source/container/running/frontend-dev.rst @@ -4,4 +4,7 @@ Frontend Development .. contents:: |toctitle| :local: -https://github.com/IQSS/dataverse-frontend includes docs and scripts for running Dataverse in Docker for frontend development. +Intro +----- + +The frontend (web interface) of Dataverse is being decoupled from the backend. This evolving codebase has its own repo at https://github.com/IQSS/dataverse-frontend which includes docs and scripts for running the backend of Dataverse in Docker. diff --git a/doc/sphinx-guides/source/container/running/github-action.rst b/doc/sphinx-guides/source/container/running/github-action.rst new file mode 100644 index 00000000000..ae42dd494d1 --- /dev/null +++ b/doc/sphinx-guides/source/container/running/github-action.rst @@ -0,0 +1,18 @@ +GitHub Action +============= + +.. contents:: |toctitle| + :local: + +Intro +----- + +A GitHub Action is under development that will spin up a Dataverse instance within the context of GitHub CI workflows: https://github.com/gdcc/dataverse-action + +Use Cases +--------- + +Use cases for the GitHub Action include: + +- Testing :doc:`/api/client-libraries` that interact with Dataverse APIs +- Testing :doc:`/admin/integrations` of third party software with Dataverse diff --git a/doc/sphinx-guides/source/container/running/index.rst b/doc/sphinx-guides/source/container/running/index.rst index 8d17b105eb4..a02266f7cba 100755 --- a/doc/sphinx-guides/source/container/running/index.rst +++ b/doc/sphinx-guides/source/container/running/index.rst @@ -8,5 +8,6 @@ Contents: production demo metadata-blocks + github-action frontend-dev backend-dev diff --git a/doc/sphinx-guides/source/container/running/metadata-blocks.rst b/doc/sphinx-guides/source/container/running/metadata-blocks.rst index 4794f29ab42..fcc80ce1909 100644 --- a/doc/sphinx-guides/source/container/running/metadata-blocks.rst +++ b/doc/sphinx-guides/source/container/running/metadata-blocks.rst @@ -4,6 +4,12 @@ Editing Metadata Blocks .. contents:: |toctitle| :local: +Intro +----- + The Admin Guide has a section on :doc:`/admin/metadatacustomization` and suggests running Dataverse in containers (Docker) for this purpose. -This is certainly possible but the specifics have not yet been written. Until then, please see :doc:`demo`, which should also provide a suitable environment. +Status +------ + +For now, please see :doc:`demo`, which should also provide a suitable Dockerized Dataverse environment. diff --git a/doc/sphinx-guides/source/container/running/production.rst b/doc/sphinx-guides/source/container/running/production.rst index 89e63ff5ab1..0a628dc57b9 100644 --- a/doc/sphinx-guides/source/container/running/production.rst +++ b/doc/sphinx-guides/source/container/running/production.rst @@ -4,8 +4,17 @@ Production (Future) .. contents:: |toctitle| :local: -The images described in this guide not yet recommended for production usage. +Status +------ -You can help the effort to support these images in production by trying them out and giving feedback (see :ref:`helping-containers`). +The images described in this guide are not yet recommended for production usage. -For now, please follow :doc:`demo`. +How to Help +----------- + +You can help the effort to support these images in production by trying them out (see :doc:`demo`) and giving feedback (see :ref:`helping-containers`). + +Alternatives +------------ + +Until the images are ready for production, please use the traditional installation method described in the :doc:`/installation/index`. From b7ec6465b09e41929f985089c2a5c566e95308e4 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 26 Jan 2024 11:12:50 -0500 Subject: [PATCH 508/546] #9748 delete tools only added by tests --- .../iq/dataverse/api/ExternalToolsIT.java | 102 +++++++----------- 1 file changed, 39 insertions(+), 63 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java index 022747a3cdc..664c07d598c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java @@ -40,21 +40,6 @@ public void testGetExternalTools() { @Test public void testFileLevelTool1() { - // Delete all external tools before testing. - Response getTools = UtilIT.getExternalTools(); - getTools.prettyPrint(); - getTools.then().assertThat() - .statusCode(OK.getStatusCode()); - String body = getTools.getBody().asString(); - JsonReader bodyObject = Json.createReader(new StringReader(body)); - JsonArray tools = bodyObject.readObject().getJsonArray("data"); - for (int i = 0; i < tools.size(); i++) { - JsonObject tool = tools.getJsonObject(i); - int id = tool.getInt("id"); - Response deleteExternalTool = UtilIT.deleteExternalTool(id); - deleteExternalTool.prettyPrint(); - } - Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); createUser.then().assertThat() @@ -145,26 +130,14 @@ public void testFileLevelTool1() { .statusCode(OK.getStatusCode()) // No tools for this file type. .body("data", Matchers.hasSize(0)); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); } @Test public void testDatasetLevelTool1() { - // Delete all external tools before testing. - Response getTools = UtilIT.getExternalTools(); - getTools.prettyPrint(); - getTools.then().assertThat() - .statusCode(OK.getStatusCode()); - String body = getTools.getBody().asString(); - JsonReader bodyObject = Json.createReader(new StringReader(body)); - JsonArray tools = bodyObject.readObject().getJsonArray("data"); - for (int i = 0; i < tools.size(); i++) { - JsonObject tool = tools.getJsonObject(i); - int id = tool.getInt("id"); - Response deleteExternalTool = UtilIT.deleteExternalTool(id); - deleteExternalTool.prettyPrint(); - } - Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); createUser.then().assertThat() @@ -184,7 +157,6 @@ public void testDatasetLevelTool1() { createDataset.then().assertThat() .statusCode(CREATED.getStatusCode()); -// Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); Integer datasetId = JsonPath.from(createDataset.getBody().asString()).getInt("data.id"); String datasetPid = JsonPath.from(createDataset.getBody().asString()).getString("data.persistentId"); @@ -219,6 +191,8 @@ public void testDatasetLevelTool1() { addExternalTool.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.displayName", CoreMatchers.equalTo("DatasetTool1")); + + long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); Response getExternalToolsByDatasetIdInvalidType = UtilIT.getExternalToolsForDataset(datasetId.toString(), "invalidType", apiToken); getExternalToolsByDatasetIdInvalidType.prettyPrint(); @@ -233,27 +207,16 @@ public void testDatasetLevelTool1() { .body("data[0].scope", CoreMatchers.equalTo("dataset")) .body("data[0].toolUrlWithQueryParams", CoreMatchers.equalTo("http://datasettool1.com?datasetPid=" + datasetPid + "&key=" + apiToken)) .statusCode(OK.getStatusCode()); - + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); } @Test public void testDatasetLevelToolConfigure() { - // Delete all external tools before testing. - Response getTools = UtilIT.getExternalTools(); - getTools.prettyPrint(); - getTools.then().assertThat() - .statusCode(OK.getStatusCode()); - String body = getTools.getBody().asString(); - JsonReader bodyObject = Json.createReader(new StringReader(body)); - JsonArray tools = bodyObject.readObject().getJsonArray("data"); - for (int i = 0; i < tools.size(); i++) { - JsonObject tool = tools.getJsonObject(i); - int id = tool.getInt("id"); - Response deleteExternalTool = UtilIT.deleteExternalTool(id); - deleteExternalTool.prettyPrint(); - } - Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); createUser.then().assertThat() @@ -302,6 +265,8 @@ public void testDatasetLevelToolConfigure() { addExternalTool.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.displayName", CoreMatchers.equalTo("Dataset Configurator")); + + long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); Response getExternalToolsByDatasetId = UtilIT.getExternalToolsForDataset(datasetId.toString(), "configure", apiToken); getExternalToolsByDatasetId.prettyPrint(); @@ -311,6 +276,11 @@ public void testDatasetLevelToolConfigure() { .body("data[0].types[0]", CoreMatchers.equalTo("configure")) .body("data[0].toolUrlWithQueryParams", CoreMatchers.equalTo("https://datasetconfigurator.com?datasetPid=" + datasetPid)) .statusCode(OK.getStatusCode()); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); } @@ -400,12 +370,13 @@ public void deleteTools() { String body = getTools.getBody().asString(); JsonReader bodyObject = Json.createReader(new StringReader(body)); JsonArray tools = bodyObject.readObject().getJsonArray("data"); + /* for (int i = 0; i < tools.size(); i++) { JsonObject tool = tools.getJsonObject(i); int id = tool.getInt("id"); Response deleteExternalTool = UtilIT.deleteExternalTool(id); deleteExternalTool.prettyPrint(); - } + }*/ } // preview only @@ -446,6 +417,13 @@ public void createToolShellScript() { addExternalTool.prettyPrint(); addExternalTool.then().assertThat() .statusCode(OK.getStatusCode()); + + long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); } // explore only @@ -479,6 +457,13 @@ public void createToolDataExplorer() { addExternalTool.prettyPrint(); addExternalTool.then().assertThat() .statusCode(OK.getStatusCode()); + + long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); } // both preview and explore @@ -527,21 +512,6 @@ public void createToolSpreadsheetViewer() { @Test public void testFileLevelToolWithAuxFileReq() throws IOException { - // Delete all external tools before testing. - Response getTools = UtilIT.getExternalTools(); - getTools.prettyPrint(); - getTools.then().assertThat() - .statusCode(OK.getStatusCode()); - String body = getTools.getBody().asString(); - JsonReader bodyObject = Json.createReader(new StringReader(body)); - JsonArray tools = bodyObject.readObject().getJsonArray("data"); - for (int i = 0; i < tools.size(); i++) { - JsonObject tool = tools.getJsonObject(i); - int id = tool.getInt("id"); - Response deleteExternalTool = UtilIT.deleteExternalTool(id); - deleteExternalTool.prettyPrint(); - } - Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); createUser.then().assertThat() @@ -640,6 +610,12 @@ public void testFileLevelToolWithAuxFileReq() throws IOException { .body("data[0].displayName", CoreMatchers.equalTo("HDF5 Tool")) .body("data[0].scope", CoreMatchers.equalTo("file")) .body("data[0].contentType", CoreMatchers.equalTo("application/x-hdf5")); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); + } } From cc29efecd2748ad005760610c6be65ba073b35c6 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 26 Jan 2024 11:30:19 -0500 Subject: [PATCH 509/546] stub out demo/eval compose.yml based on dev compose #10238 Differences from dev version: - localstack and minio removed - env vars filled in based on current .env The goal is to have a single file to download, rather than a compose file and an .env file. --- docker/compose/demo/compose.yml | 170 ++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index e69de29bb2d..aea99040acd 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -0,0 +1,170 @@ +version: "2.4" + +services: + + dev_dataverse: + container_name: "dev_dataverse" + hostname: dataverse + image: gdcc/dataverse:unstable + restart: on-failure + user: payara + environment: + DATAVERSE_DB_HOST: postgres + DATAVERSE_DB_PASSWORD: secret + DATAVERSE_DB_USER: dataverse + ENABLE_JDWP: "1" + DATAVERSE_FEATURE_API_BEARER_AUTH: "1" + DATAVERSE_AUTH_OIDC_ENABLED: "1" + DATAVERSE_AUTH_OIDC_CLIENT_ID: test + DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 + DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test + DATAVERSE_JSF_REFRESH_PERIOD: "1" + # These two oai settings are here to get HarvestingServerIT to pass + dataverse_oai_server_maxidentifiers: "2" + dataverse_oai_server_maxrecords: "2" + JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 + -Ddataverse.files.file1.type=file + -Ddataverse.files.file1.label=Filesystem + -Ddataverse.files.file1.directory=${STORAGE_DIR}/store + ports: + - "8080:8080" # HTTP (Dataverse Application) + - "4848:4848" # HTTP (Payara Admin Console) + - "9009:9009" # JDWP + - "8686:8686" # JMX + networks: + - dataverse + depends_on: + - dev_postgres + - dev_solr + - dev_dv_initializer + volumes: + - ./docker-dev-volumes/app/data:/dv + - ./docker-dev-volumes/app/secrets:/secrets + # Uncomment to map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh + # - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications + # Uncomment for changes to xhtml to be deployed immediately (if supported your IDE or toolchain). + # Replace 6.0 with the current version. + # - ./target/dataverse-6.0:/opt/payara/deployments/dataverse + tmpfs: + - /dumps:mode=770,size=2052M,uid=1000,gid=1000 + - /tmp:mode=770,size=2052M,uid=1000,gid=1000 + mem_limit: 2147483648 # 2 GiB + mem_reservation: 1024m + privileged: false + + dev_bootstrap: + container_name: "dev_bootstrap" + image: gdcc/configbaker:unstable + restart: "no" + command: + - bootstrap.sh + - dev + networks: + - dataverse + + dev_dv_initializer: + container_name: "dev_dv_initializer" + image: gdcc/configbaker:unstable + restart: "no" + command: + - sh + - -c + - "fix-fs-perms.sh dv" + volumes: + - ./docker-dev-volumes/app/data:/dv + + dev_postgres: + container_name: "dev_postgres" + hostname: postgres + image: postgres:13 + restart: on-failure + environment: + - POSTGRES_USER=dataverse + - POSTGRES_PASSWORD=secret + ports: + - "5432:5432" + networks: + - dataverse + volumes: + - ./docker-dev-volumes/postgresql/data:/var/lib/postgresql/data + + dev_solr_initializer: + container_name: "dev_solr_initializer" + image: gdcc/configbaker:unstable + restart: "no" + command: + - sh + - -c + - "fix-fs-perms.sh solr && cp -a /template/* /solr-template" + volumes: + - ./docker-dev-volumes/solr/data:/var/solr + - ./docker-dev-volumes/solr/conf:/solr-template + + dev_solr: + container_name: "dev_solr" + hostname: "solr" + image: solr:9.3.0 + depends_on: + - dev_solr_initializer + restart: on-failure + ports: + - "8983:8983" + networks: + - dataverse + command: + - "solr-precreate" + - "collection1" + - "/template" + volumes: + - ./docker-dev-volumes/solr/data:/var/solr + - ./docker-dev-volumes/solr/conf:/template + + dev_smtp: + container_name: "dev_smtp" + hostname: "smtp" + image: maildev/maildev:2.0.5 + restart: on-failure + ports: + - "25:25" # smtp server + - "1080:1080" # web ui + environment: + - MAILDEV_SMTP_PORT=25 + - MAILDEV_MAIL_DIRECTORY=/mail + networks: + - dataverse + #volumes: + # - ./docker-dev-volumes/smtp/data:/mail + tmpfs: + - /mail:mode=770,size=128M,uid=1000,gid=1000 + + dev_keycloak: + container_name: "dev_keycloak" + image: 'quay.io/keycloak/keycloak:21.0' + hostname: keycloak + environment: + - KEYCLOAK_ADMIN=kcadmin + - KEYCLOAK_ADMIN_PASSWORD=kcpassword + - KEYCLOAK_LOGLEVEL=DEBUG + - KC_HOSTNAME_STRICT=false + networks: + dataverse: + aliases: + - keycloak.mydomain.com #create a DNS alias within the network (add the same alias to your /etc/hosts to get a working OIDC flow) + command: start-dev --import-realm --http-port=8090 # change port to 8090, so within the network and external the same port is used + ports: + - "8090:8090" + volumes: + - './conf/keycloak/test-realm.json:/opt/keycloak/data/import/test-realm.json' + + dev_nginx: + container_name: dev_nginx + image: gdcc/dev_nginx:unstable + ports: + - "4849:4849" + restart: always + networks: + - dataverse + +networks: + dataverse: + driver: bridge From 0c736cc698a3fef25fa8d5f25e76d4a85a6ec088 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 26 Jan 2024 12:47:38 -0500 Subject: [PATCH 510/546] switch from unstable to alpha images #10238 --- docker/compose/demo/compose.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index aea99040acd..403143130ac 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -5,7 +5,7 @@ services: dev_dataverse: container_name: "dev_dataverse" hostname: dataverse - image: gdcc/dataverse:unstable + image: gdcc/dataverse:alpha restart: on-failure user: payara environment: @@ -54,7 +54,7 @@ services: dev_bootstrap: container_name: "dev_bootstrap" - image: gdcc/configbaker:unstable + image: gdcc/configbaker:alpha restart: "no" command: - bootstrap.sh @@ -64,7 +64,7 @@ services: dev_dv_initializer: container_name: "dev_dv_initializer" - image: gdcc/configbaker:unstable + image: gdcc/configbaker:alpha restart: "no" command: - sh @@ -90,7 +90,7 @@ services: dev_solr_initializer: container_name: "dev_solr_initializer" - image: gdcc/configbaker:unstable + image: gdcc/configbaker:alpha restart: "no" command: - sh From 91287b35960afd0d351d1b07942333763ce84555 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 26 Jan 2024 15:55:12 -0500 Subject: [PATCH 511/546] #9748 one more assert --- src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java index 664c07d598c..6f0aa499dd1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java @@ -133,6 +133,8 @@ public void testFileLevelTool1() { //Delete the tool added by this test... Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); } @Test From 69d3bb9172ad134c32299a326ef76efda2420458 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 26 Jan 2024 16:21:58 -0500 Subject: [PATCH 512/546] more content for demo/eval #10238 Also update tags section under "app image" (now live). --- .../source/container/app-image.rst | 18 +-- doc/sphinx-guides/source/container/intro.rst | 2 + .../source/container/running/demo.rst | 125 ++++++++++++++++-- 3 files changed, 126 insertions(+), 19 deletions(-) diff --git a/doc/sphinx-guides/source/container/app-image.rst b/doc/sphinx-guides/source/container/app-image.rst index 29f6d6ac1d4..caf4aadbf7e 100644 --- a/doc/sphinx-guides/source/container/app-image.rst +++ b/doc/sphinx-guides/source/container/app-image.rst @@ -22,20 +22,20 @@ IQSS will not offer you support how to deploy or run it, please reach out to the You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based) efforts. - +.. _supported-image-tags-app: Supported Image Tags ++++++++++++++++++++ This image is sourced from the main upstream code `repository of the Dataverse software `_. -Development and maintenance of the `image's code `_ happens there -(again, by the community). - -.. note:: - Please note that this image is not (yet) available from Docker Hub. You need to build local to use - (see below). Follow https://github.com/IQSS/dataverse/issues/9444 for new developments. - - +Development and maintenance of the `image's code `_ +happens there (again, by the community). Community-supported image tags are based on the two most important +upstream branches: + +- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged. + (`Dockerfile `__) +- The ``alpha`` tag corresponds to the ``master`` branch, where releases are cut from. + (`Dockerfile `__) Image Contents ++++++++++++++ diff --git a/doc/sphinx-guides/source/container/intro.rst b/doc/sphinx-guides/source/container/intro.rst index 42b095f3158..5099531dcc9 100644 --- a/doc/sphinx-guides/source/container/intro.rst +++ b/doc/sphinx-guides/source/container/intro.rst @@ -18,6 +18,8 @@ Getting Help Please ask in #containers at https://chat.dataverse.org +Alternatively, you can try one or more of the channels under :ref:`support`. + .. _helping-containers: Helping with the Containerization Effort diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst index 8db8cfb2a9c..0ad1e50442f 100644 --- a/doc/sphinx-guides/source/container/running/demo.rst +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -1,27 +1,132 @@ Demo or Evaluation ================== -If you would like to demo or evaluate Dataverse running in containers, you're in the right place. Your feedback is extremely valuable to us! To let us know what you think, pease see :ref:`helping-containers`. +If you would like to demo or evaluate Dataverse running in containers, you're in the right place. Your feedback is extremely valuable to us! To let us know what you think, please see :ref:`helping-containers`. .. contents:: |toctitle| :local: -Hardware Requirements ---------------------- +Quickstart +---------- -- 8 GB RAM +- Download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` +- Run ``docker compose up`` in the directory where you put ``compose.yml`` +- Visit http://localhost:8080 and try logging in: + + - username: dataverseAdmin + - password: admin1 -Software Requirements ---------------------- +Hardware and Software Requirements +----------------------------------- +- 8 GB RAM (if not much else is running) - Mac, Linux, or Windows (experimental) - Docker Windows support is experimental but we are very interested in supporting Windows better. Please report bugs (see :ref:`helping-containers`). -Steps ------ +Tags and Versions +----------------- -- Download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` -- Run ``docker compose up`` in the directory where you put ``compose.yml`` +The compose file references a tag called "alpha", which corresponds to the latest released version of Dataverse. This means that if a release of Dataverse comes out while you are demo'ing or evaluating, the version of Dataverse you are using could change. We are aware that there is a desire for tags that correspond to versions to ensure consistency. You are welcome to join `the discussion `_ and otherwise get in touch (see :ref:`helping-containers`). For more on tags, see :ref:`supported-image-tags-app`. + +Once Dataverse is running, you can check which version you have through the normal methods: + +- Check the bottom right in a web browser. +- Check http://localhost:8080/api/info/version via API. + +About the Containers +-------------------- + +If you run ``docker ps``, you'll see that multiple containers are spun up in a demo or evaluation. Here are the most important ones: + +- dataverse +- postgres +- solr +- smtp +- bootstrap + +Most are self-explanatory, and correspond to components listed under :doc:`/installation/prerequisites` in the (traditional) Installation Guide, but "bootstrap" refers to :doc:`../configbaker-image`. + +Additional containers are used in development (see :doc:`../dev-usage`), but for the purposes of a demo or evaluation, fewer moving (sometimes pointy) parts are included. + +Security +-------- + +Please be aware that for now, the "dev" persona is used to bootstrap Dataverse, which means that admin APIs are wide open (to allow developers to test them; see :ref:`securing-your-installation` for more on API blocking), the "create user" key is set to a default value, etc. You can inspect the dev person `on GitHub `_ (look for ``--insecure``). + +We plan to ship a "demo" persona but it is not ready yet. See also :ref:`configbaker-personas`. + +Common Operations +----------------- + +Starting the Containers ++++++++++++++++++++++++ + +First, download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` and place it somewhere you'll remember. + +Then, run ``docker compose up`` in the directory where you put ``compose.yml`` + +Starting the containers for the first time involves a bootstrap process. You should see "have a nice day" output at the end. + +Stopping the Containers ++++++++++++++++++++++++ + +You might want to stop the containers if you aren't using them. Hit ``Ctrl-c`` (hold down the ``Ctrl`` key and then hit the ``c`` key). + +You data is still intact and you can start the containers again with ``docker compose up``. + +Deleting the Containers ++++++++++++++++++++++++ + +If you no longer need the containers because your demo or evaluation is finished and you want to reclaim disk space, run ``docker compose down`` in the directory where you put ``compose.yml``. + +Deleting the Data Directory ++++++++++++++++++++++++++++ + +Data related to the Dataverse containers is placed in a directory called ``docker-dev-volumes`` next to the ``compose.yml`` file. If you are finished with your demo or evaluation or you want to start fresh, simply delete this directory. + +Configuration +------------- + +Configuration is described in greater detail under :doc:`/installation/config` in the Installation Guide, but there are some specifics to running in containers you should know about. + +.. _configbaker-personas: + +Personas +++++++++ + +When the containers are bootstrapped, the "dev" persona is used. In the future we plan to add a "demo" persona that is more suited to demo and evaluation use cases. + +Database Settings ++++++++++++++++++ + +Updating database settings is the same as described under :ref:`database-settings` in the Installation Guide. + +MPCONFIG Options +++++++++++++++++ + +The compose file contains an ``environment`` section with various MicroProfile Config (MPCONFIG) options. You can experiment with this by adding ``DATAVERSE_VERSION: foobar`` to change the (displayed) version of Dataverse to "foobar". + +JVM Options ++++++++++++ + +JVM options are not especially easy to change in the container. The general process is to get a shell on the "dataverse" container, change the settings, and then stop and start the containers. See :ref:`jvm-options` for more. + +Troubleshooting +--------------- + +Bootstrapping Did Not Complete +++++++++++++++++++++++++++++++ + +In the compose file, try increasing the timeout in the bootstrap container by adding something like this: + +.. code-block:: bash + + environment: + - TIMEOUT=10m + +Getting Help +------------ +Please do not be shy about reaching out for help. We very much want you to have a pleasant demo or evaluation experience. For ways to contact us, please see See :ref:`getting-help-containers`. From d3a378de0815a8d9af94fe8972f61d95841f89f2 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 26 Jan 2024 16:23:20 -0500 Subject: [PATCH 513/546] remove limits used for harvesting tests #10238 --- docker/compose/demo/compose.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index 403143130ac..4cfd8cd9345 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -19,9 +19,6 @@ services: DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test DATAVERSE_JSF_REFRESH_PERIOD: "1" - # These two oai settings are here to get HarvestingServerIT to pass - dataverse_oai_server_maxidentifiers: "2" - dataverse_oai_server_maxrecords: "2" JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 -Ddataverse.files.file1.type=file -Ddataverse.files.file1.label=Filesystem From 4555ae3f9dae12fd83c369b846c4aff114fecbf0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 26 Jan 2024 16:25:59 -0500 Subject: [PATCH 514/546] remove keycloak container and OIDC config #10238 --- docker/compose/demo/compose.yml | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index 4cfd8cd9345..e0839eb1023 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -14,10 +14,6 @@ services: DATAVERSE_DB_USER: dataverse ENABLE_JDWP: "1" DATAVERSE_FEATURE_API_BEARER_AUTH: "1" - DATAVERSE_AUTH_OIDC_ENABLED: "1" - DATAVERSE_AUTH_OIDC_CLIENT_ID: test - DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8 - DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test DATAVERSE_JSF_REFRESH_PERIOD: "1" JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 -Ddataverse.files.file1.type=file @@ -134,25 +130,6 @@ services: tmpfs: - /mail:mode=770,size=128M,uid=1000,gid=1000 - dev_keycloak: - container_name: "dev_keycloak" - image: 'quay.io/keycloak/keycloak:21.0' - hostname: keycloak - environment: - - KEYCLOAK_ADMIN=kcadmin - - KEYCLOAK_ADMIN_PASSWORD=kcpassword - - KEYCLOAK_LOGLEVEL=DEBUG - - KC_HOSTNAME_STRICT=false - networks: - dataverse: - aliases: - - keycloak.mydomain.com #create a DNS alias within the network (add the same alias to your /etc/hosts to get a working OIDC flow) - command: start-dev --import-realm --http-port=8090 # change port to 8090, so within the network and external the same port is used - ports: - - "8090:8090" - volumes: - - './conf/keycloak/test-realm.json:/opt/keycloak/data/import/test-realm.json' - dev_nginx: container_name: dev_nginx image: gdcc/dev_nginx:unstable From bb4d78649338ced4f66ec4ba4167c6a94efcd23f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 26 Jan 2024 16:29:33 -0500 Subject: [PATCH 515/546] remove various dev stuff not needed for a demo #10238 --- docker/compose/demo/compose.yml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index e0839eb1023..b72d06951e8 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -12,9 +12,7 @@ services: DATAVERSE_DB_HOST: postgres DATAVERSE_DB_PASSWORD: secret DATAVERSE_DB_USER: dataverse - ENABLE_JDWP: "1" DATAVERSE_FEATURE_API_BEARER_AUTH: "1" - DATAVERSE_JSF_REFRESH_PERIOD: "1" JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 -Ddataverse.files.file1.type=file -Ddataverse.files.file1.label=Filesystem @@ -33,11 +31,6 @@ services: volumes: - ./docker-dev-volumes/app/data:/dv - ./docker-dev-volumes/app/secrets:/secrets - # Uncomment to map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh - # - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications - # Uncomment for changes to xhtml to be deployed immediately (if supported your IDE or toolchain). - # Replace 6.0 with the current version. - # - ./target/dataverse-6.0:/opt/payara/deployments/dataverse tmpfs: - /dumps:mode=770,size=2052M,uid=1000,gid=1000 - /tmp:mode=770,size=2052M,uid=1000,gid=1000 @@ -130,15 +123,6 @@ services: tmpfs: - /mail:mode=770,size=128M,uid=1000,gid=1000 - dev_nginx: - container_name: dev_nginx - image: gdcc/dev_nginx:unstable - ports: - - "4849:4849" - restart: always - networks: - - dataverse - networks: dataverse: driver: bridge From c5f4ca46b6d384965c80926bce199f64f80d1af3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 29 Jan 2024 10:33:34 -0500 Subject: [PATCH 516/546] remove "dev_" from container names #10238 --- docker/compose/demo/compose.yml | 36 ++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index b72d06951e8..09dde63d5f4 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -2,8 +2,8 @@ version: "2.4" services: - dev_dataverse: - container_name: "dev_dataverse" + dataverse: + container_name: "dataverse" hostname: dataverse image: gdcc/dataverse:alpha restart: on-failure @@ -25,9 +25,9 @@ services: networks: - dataverse depends_on: - - dev_postgres - - dev_solr - - dev_dv_initializer + - postgres + - solr + - dv_initializer volumes: - ./docker-dev-volumes/app/data:/dv - ./docker-dev-volumes/app/secrets:/secrets @@ -38,8 +38,8 @@ services: mem_reservation: 1024m privileged: false - dev_bootstrap: - container_name: "dev_bootstrap" + bootstrap: + container_name: "bootstrap" image: gdcc/configbaker:alpha restart: "no" command: @@ -48,8 +48,8 @@ services: networks: - dataverse - dev_dv_initializer: - container_name: "dev_dv_initializer" + dv_initializer: + container_name: "dv_initializer" image: gdcc/configbaker:alpha restart: "no" command: @@ -59,8 +59,8 @@ services: volumes: - ./docker-dev-volumes/app/data:/dv - dev_postgres: - container_name: "dev_postgres" + postgres: + container_name: "postgres" hostname: postgres image: postgres:13 restart: on-failure @@ -74,8 +74,8 @@ services: volumes: - ./docker-dev-volumes/postgresql/data:/var/lib/postgresql/data - dev_solr_initializer: - container_name: "dev_solr_initializer" + solr_initializer: + container_name: "solr_initializer" image: gdcc/configbaker:alpha restart: "no" command: @@ -86,12 +86,12 @@ services: - ./docker-dev-volumes/solr/data:/var/solr - ./docker-dev-volumes/solr/conf:/solr-template - dev_solr: - container_name: "dev_solr" + solr: + container_name: "solr" hostname: "solr" image: solr:9.3.0 depends_on: - - dev_solr_initializer + - solr_initializer restart: on-failure ports: - "8983:8983" @@ -105,8 +105,8 @@ services: - ./docker-dev-volumes/solr/data:/var/solr - ./docker-dev-volumes/solr/conf:/template - dev_smtp: - container_name: "dev_smtp" + smtp: + container_name: "smtp" hostname: "smtp" image: maildev/maildev:2.0.5 restart: on-failure From c0cda028c3ce0922f51c670917d94ef22cab61c5 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 29 Jan 2024 10:39:14 -0500 Subject: [PATCH 517/546] rename docker-dev-volumes to data #10238 --- .../source/container/running/demo.rst | 2 +- docker/compose/demo/.gitignore | 1 + docker/compose/demo/compose.yml | 18 +++++++++--------- 3 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 docker/compose/demo/.gitignore diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst index 0ad1e50442f..5eda108c842 100644 --- a/doc/sphinx-guides/source/container/running/demo.rst +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -84,7 +84,7 @@ If you no longer need the containers because your demo or evaluation is finished Deleting the Data Directory +++++++++++++++++++++++++++ -Data related to the Dataverse containers is placed in a directory called ``docker-dev-volumes`` next to the ``compose.yml`` file. If you are finished with your demo or evaluation or you want to start fresh, simply delete this directory. +Data related to the Dataverse containers is placed in a directory called ``data`` next to the ``compose.yml`` file. If you are finished with your demo or evaluation or you want to start fresh, simply delete this directory. Configuration ------------- diff --git a/docker/compose/demo/.gitignore b/docker/compose/demo/.gitignore new file mode 100644 index 00000000000..1269488f7fb --- /dev/null +++ b/docker/compose/demo/.gitignore @@ -0,0 +1 @@ +data diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index 09dde63d5f4..3817921f10a 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -29,8 +29,8 @@ services: - solr - dv_initializer volumes: - - ./docker-dev-volumes/app/data:/dv - - ./docker-dev-volumes/app/secrets:/secrets + - ./data/app/data:/dv + - ./data/app/secrets:/secrets tmpfs: - /dumps:mode=770,size=2052M,uid=1000,gid=1000 - /tmp:mode=770,size=2052M,uid=1000,gid=1000 @@ -57,7 +57,7 @@ services: - -c - "fix-fs-perms.sh dv" volumes: - - ./docker-dev-volumes/app/data:/dv + - ./data/app/data:/dv postgres: container_name: "postgres" @@ -72,7 +72,7 @@ services: networks: - dataverse volumes: - - ./docker-dev-volumes/postgresql/data:/var/lib/postgresql/data + - ./data/postgresql/data:/var/lib/postgresql/data solr_initializer: container_name: "solr_initializer" @@ -83,8 +83,8 @@ services: - -c - "fix-fs-perms.sh solr && cp -a /template/* /solr-template" volumes: - - ./docker-dev-volumes/solr/data:/var/solr - - ./docker-dev-volumes/solr/conf:/solr-template + - ./data/solr/data:/var/solr + - ./data/solr/conf:/solr-template solr: container_name: "solr" @@ -102,8 +102,8 @@ services: - "collection1" - "/template" volumes: - - ./docker-dev-volumes/solr/data:/var/solr - - ./docker-dev-volumes/solr/conf:/template + - ./data/solr/data:/var/solr + - ./data/solr/conf:/template smtp: container_name: "smtp" @@ -119,7 +119,7 @@ services: networks: - dataverse #volumes: - # - ./docker-dev-volumes/smtp/data:/mail + # - ./data/smtp/data:/mail tmpfs: - /mail:mode=770,size=128M,uid=1000,gid=1000 From d275a6343c0b7d0b296e8dc2d3c158afdd980058 Mon Sep 17 00:00:00 2001 From: raravumich <48064835+raravumich@users.noreply.github.com> Date: Mon, 29 Jan 2024 10:42:23 -0500 Subject: [PATCH 518/546] Add TurboCurator to External Tools list --- .../source/_static/admin/dataverse-external-tools.tsv | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 4f4c29d0670..a20ab864d2a 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -5,3 +5,4 @@ Binder explore dataset Binder allows you to spin up custom computing environment File Previewers explore file "A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers" Data Curation Tool configure file "A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions." Ask the Data query file Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. +TurboCurator by ICPSR configure dataset "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator `_ page for more details on how it works and adding TurboCurator to your Dataverse installation." From 1ea4db3f3c011dc8ea28d9eb656e423fdccfccd9 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 30 Jan 2024 13:02:34 -0500 Subject: [PATCH 519/546] a checklist for making a core field allowMultiples for the dev. guide #9634 --- doc/sphinx-guides/source/developers/index.rst | 1 + .../source/developers/metadatablocksdev.rst | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 doc/sphinx-guides/source/developers/metadatablocksdev.rst diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 25fea138736..25007baf589 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -31,6 +31,7 @@ Developer Guide making-releases making-library-releases metadataexport + metadatablocksdev tools unf/index make-data-count diff --git a/doc/sphinx-guides/source/developers/metadatablocksdev.rst b/doc/sphinx-guides/source/developers/metadatablocksdev.rst new file mode 100644 index 00000000000..17093471467 --- /dev/null +++ b/doc/sphinx-guides/source/developers/metadatablocksdev.rst @@ -0,0 +1,26 @@ +=========================== +Metadata Blocks Development +=========================== + +.. contents:: |toctitle| + :local: + +Introduction +------------ + +The idea behind Metadata Blocks in Dataverse is to have everything about the supported metadata fields configurable and customizable. Ideally, this should be accomplished by simply re-importing the updated tsv for the block via the API. In practice, when it comes to the core blocks that are distributed with Dataverse - such as the Citation and Social Science blocks - unfortunately, many dependencies exist in various parts of Dataverse, primarily import and export subsystems, on many specific fields being configured a certain way. This means that code changes may be required whenever a field from one of these core blocks is modified. + +Making a Field Multiple +----------------------- + +Back in 2023, in order to accommodate specific needs of some community member institutions a few fields from Citation and Social Science were changed to support multiple values. (For example, the ``alternativeTitle`` field from the Citation block.) A number of code changes had to be made to accommodate this, plus a number of changes in the sample metadata files that are maintained in the Dataverse code tree. The checklist below is to help another developer should a similar change become necessary in the future. Note that some of the steps below may not apply 1:1 to a different metadata field, depending on how it is exported and imported in various formats by Dataverse. It may help to consult the PR `#9440 `_ as a specific example of the changes that had to be made for the ``alternativeTitle`` field. + +- Change the value from ``FALSE`` to ``TRUE`` in the ``alowmultiples`` column of the .tsv file for the block (obviously). +- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/9.3.0/schema.xml`` as of writing this). +- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (you may be able to use the change in the PR above as a model.) +- Modify the DDI export utility (``DdiExportUtil.java``). +- Modify the OpenAire export utility (``OpenAireExportUtil.java``). +- Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests). +- Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``. +- Make sure all the automated Unit and Integration tests are passing. +- Write a short release note to announce the change in the upcoming release. From 2eeda3d910ed128176c75b290c651252722dd919 Mon Sep 17 00:00:00 2001 From: Don Sizemore Date: Tue, 30 Jan 2024 13:08:58 -0500 Subject: [PATCH 520/546] add sleep to SwordIT per qqmyers --- src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java index 39156f1c59b..4df6c89411d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java @@ -855,7 +855,7 @@ public void testDeleteFiles() { List oneFileLeftInV2Draft = statement3.getBody().xmlPath().getList("feed.entry.id"); logger.info("Number of files remaining in this post version 1 draft:" + oneFileLeftInV2Draft.size()); assertEquals(1, oneFileLeftInV2Draft.size()); - + UtilIT.sleepForLock(datasetPersistentId, "EditInProgress", apiToken, UtilIT.MAXIMUM_PUBLISH_LOCK_DURATION); Response deleteIndex1b = UtilIT.deleteFile(Integer.parseInt(index1b), apiToken); deleteIndex1b.then().assertThat() .statusCode(NO_CONTENT.getStatusCode()); From e4776101e8507a4b470b58ec70e90046516e4fa4 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 30 Jan 2024 13:16:11 -0500 Subject: [PATCH 521/546] linked the dev. checklist in the metadata customization section of the admin guide. #9634 --- doc/sphinx-guides/source/admin/metadatacustomization.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 4f737bd730b..36956567a7d 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -648,6 +648,11 @@ Alternatively, you are welcome to request "edit" access to this "Tips for Datave The thinking is that the tips can become issues and the issues can eventually be worked on as features to improve the Dataverse Software metadata system. +Development Tasks Specific to Changing Fields in Core Metadata Blocks +--------------------------------------------------------------------- + +When it comes to the fields from the core blocks that are distributed with Dataverse (such as Citation and Social Science blocks), code dependencies may exist in Dataverse, primarily in the Import and Export subsystems, on these fields being configured a certain way. So, if it becomes necessary to modify one of such core fields (a real life example is making a single value-only field support multiple values), code changes may be necessary to accompany the change in the block tsv, plus some sample and test files maintained in the Dataverse source tree will need to be adjusted accordingly. An example of a checklist of such tasks is provided in the Development Guide, please see the :doc:`/developers/metadatablocksdev` section. + Footnotes --------- From d960b980f926ba3e1d8ed0336ef3d541ddc6fb50 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 30 Jan 2024 16:01:55 -0500 Subject: [PATCH 522/546] #9748 comment out disabled test --- src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java index 6f0aa499dd1..2c96ce96dea 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java @@ -432,6 +432,7 @@ public void createToolShellScript() { @Disabled @Test public void createToolDataExplorer() { + /* JsonObjectBuilder job = Json.createObjectBuilder(); job.add("displayName", "Data Explorer"); job.add("description", ""); @@ -466,6 +467,7 @@ public void createToolDataExplorer() { Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); deleteExternalTool.then().assertThat() .statusCode(OK.getStatusCode()); + */ } // both preview and explore From 9b0a3cf2f0c5a6337aaed925ff640651fecf6116 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 30 Jan 2024 16:50:07 -0500 Subject: [PATCH 523/546] rewrite demo page as a tutorial #10238 Also, explain how to create a persona and some basic config. --- .../source/container/running/demo.rst | 169 +++++++++++------- docker/compose/demo/compose.yml | 4 + .../scripts/bootstrap/demo/init.sh | 13 ++ 3 files changed, 126 insertions(+), 60 deletions(-) create mode 100644 modules/container-configbaker/scripts/bootstrap/demo/init.sh diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst index 5eda108c842..4e2a9db3f48 100644 --- a/doc/sphinx-guides/source/container/running/demo.rst +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -1,7 +1,7 @@ Demo or Evaluation ================== -If you would like to demo or evaluate Dataverse running in containers, you're in the right place. Your feedback is extremely valuable to us! To let us know what you think, please see :ref:`helping-containers`. +In the following tutorial we'll walk through spinning up Dataverse in containers for demo or evaluation purposes. .. contents:: |toctitle| :local: @@ -9,6 +9,8 @@ If you would like to demo or evaluate Dataverse running in containers, you're in Quickstart ---------- +First, let's confirm that we can get Dataverse running on your system. + - Download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` - Run ``docker compose up`` in the directory where you put ``compose.yml`` - Visit http://localhost:8080 and try logging in: @@ -16,106 +18,138 @@ Quickstart - username: dataverseAdmin - password: admin1 -Hardware and Software Requirements ------------------------------------ +If you can log in, great! Please continue through the tutorial. If you have any trouble, please consult the sections below on troubleshooting and getting help. -- 8 GB RAM (if not much else is running) -- Mac, Linux, or Windows (experimental) -- Docker +Stopping and Starting the Containers +------------------------------------ -Windows support is experimental but we are very interested in supporting Windows better. Please report bugs (see :ref:`helping-containers`). +Let's practice stopping the containers and starting them up again. Your data, stored in a directory called ``data``, will remain intact -Tags and Versions ------------------ +To stop the containers hit ``Ctrl-c`` (hold down the ``Ctrl`` key and then hit the ``c`` key). -The compose file references a tag called "alpha", which corresponds to the latest released version of Dataverse. This means that if a release of Dataverse comes out while you are demo'ing or evaluating, the version of Dataverse you are using could change. We are aware that there is a desire for tags that correspond to versions to ensure consistency. You are welcome to join `the discussion `_ and otherwise get in touch (see :ref:`helping-containers`). For more on tags, see :ref:`supported-image-tags-app`. +To start the containers, run ``docker compose up``. -Once Dataverse is running, you can check which version you have through the normal methods: +Deleting Data and Starting Over +------------------------------- -- Check the bottom right in a web browser. -- Check http://localhost:8080/api/info/version via API. +Again, data related to your Dataverse installation such as the database is stored in a directory called ``data`` that gets created in the directory where you ran ``docker compose`` commands. -About the Containers --------------------- +You may reach a point during your demo or evaluation that you'd like to start over with a fresh database. Simply make sure the containers are not running and then remove the ``data`` directory. Now, as before, you can run ``docker compose up`` to spin up the containers. -If you run ``docker ps``, you'll see that multiple containers are spun up in a demo or evaluation. Here are the most important ones: +Configuring Dataverse +--------------------- -- dataverse -- postgres -- solr -- smtp -- bootstrap +Now that you are familiar with the basics of running Dataverse in containers, let's move on to configuration. -Most are self-explanatory, and correspond to components listed under :doc:`/installation/prerequisites` in the (traditional) Installation Guide, but "bootstrap" refers to :doc:`../configbaker-image`. +Start Fresh ++++++++++++ -Additional containers are used in development (see :doc:`../dev-usage`), but for the purposes of a demo or evaluation, fewer moving (sometimes pointy) parts are included. +For this configuration exercise, please start fresh by stopping all containers and removing the ``data`` directory. -Security --------- +Change the Site URL ++++++++++++++++++++ -Please be aware that for now, the "dev" persona is used to bootstrap Dataverse, which means that admin APIs are wide open (to allow developers to test them; see :ref:`securing-your-installation` for more on API blocking), the "create user" key is set to a default value, etc. You can inspect the dev person `on GitHub `_ (look for ``--insecure``). +Edit ``compose.yml`` and change ``_CT_DATAVERSE_SITEURL`` to the URL you plan to use for your installation. -We plan to ship a "demo" persona but it is not ready yet. See also :ref:`configbaker-personas`. +(You can read more about this setting at :ref:`dataverse.siteUrl`.) -Common Operations ------------------ +This is an example of setting an environment variable to configure Dataverse. -Starting the Containers -+++++++++++++++++++++++ +Create and Run a Demo Persona ++++++++++++++++++++++++++++++ -First, download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` and place it somewhere you'll remember. +Previously we used the "dev" persona to bootstrap Dataverse, but for security reasons, we should create a persona more suited to demos and evaluations. -Then, run ``docker compose up`` in the directory where you put ``compose.yml`` +Edit the ``compose.yml`` file and look for the following section. -Starting the containers for the first time involves a bootstrap process. You should see "have a nice day" output at the end. +.. code-block:: bash -Stopping the Containers -+++++++++++++++++++++++ + bootstrap: + container_name: "bootstrap" + image: gdcc/configbaker:alpha + restart: "no" + command: + - bootstrap.sh + - dev + #- demo + #volumes: + # - ./demo:/scripts/bootstrap/demo + networks: + - dataverse -You might want to stop the containers if you aren't using them. Hit ``Ctrl-c`` (hold down the ``Ctrl`` key and then hit the ``c`` key). +Comment out "dev" and uncomment "demo". -You data is still intact and you can start the containers again with ``docker compose up``. +Uncomment the "volumes" section. -Deleting the Containers -+++++++++++++++++++++++ +Create a directory called "demo" and copy :download:`init.sh <../../../../../modules/container-configbaker/scripts/bootstrap/demo/init.sh>` into it. You are welcome to edit this demo init script, customizing the final message, for example. -If you no longer need the containers because your demo or evaluation is finished and you want to reclaim disk space, run ``docker compose down`` in the directory where you put ``compose.yml``. +Now run ``docker compose up``. The "bootstrap" container should exit with the message from the init script and Dataverse should be running on http://localhost:8080 as before during the quickstart exercise. -Deleting the Data Directory -+++++++++++++++++++++++++++ +One of the main differences between the "dev" persona and our new "demo" persona is that we are now running the setup-all script without the ``--insecure`` flag. This makes our installation more secure, though it does block "admin" APIs that are useful for configuration. -Data related to the Dataverse containers is placed in a directory called ``data`` next to the ``compose.yml`` file. If you are finished with your demo or evaluation or you want to start fresh, simply delete this directory. +Set DOI Provider to FAKE +++++++++++++++++++++++++ -Configuration -------------- +For the purposes of a demo, we'll use the "FAKE" DOI provider. (For more on this and related settings, see :ref:`pids-configuration` in the Installation Guide.) Without this step, you won't be able to create or publish datasets. -Configuration is described in greater detail under :doc:`/installation/config` in the Installation Guide, but there are some specifics to running in containers you should know about. +Run the following command. (In this context, "dataverse" is the name of the running container.) -.. _configbaker-personas: +``docker exec -it dataverse curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE`` -Personas -++++++++ +This is an example of configuring a database setting, which you can read more about at :ref:`database-settings` in the Installation Guide. -When the containers are bootstrapped, the "dev" persona is used. In the future we plan to add a "demo" persona that is more suited to demo and evaluation use cases. +Smoke Test +---------- -Database Settings -+++++++++++++++++ +At this point, please try some basic operations within your installation, such as: -Updating database settings is the same as described under :ref:`database-settings` in the Installation Guide. +- logging in as dataverseAdmin +- publishing the "root" collection (dataverse) +- creating a collection +- creating a dataset +- uploading a data file +- publishing the dataset -MPCONFIG Options -++++++++++++++++ +About the Containers +-------------------- -The compose file contains an ``environment`` section with various MicroProfile Config (MPCONFIG) options. You can experiment with this by adding ``DATAVERSE_VERSION: foobar`` to change the (displayed) version of Dataverse to "foobar". +Container List +++++++++++++++ -JVM Options -+++++++++++ +If you run ``docker ps``, you'll see that multiple containers are spun up in a demo or evaluation. Here are the most important ones: -JVM options are not especially easy to change in the container. The general process is to get a shell on the "dataverse" container, change the settings, and then stop and start the containers. See :ref:`jvm-options` for more. +- dataverse +- postgres +- solr +- smtp +- bootstrap + +Most are self-explanatory, and correspond to components listed under :doc:`/installation/prerequisites` in the (traditional) Installation Guide, but "bootstrap" refers to :doc:`../configbaker-image`. + +Additional containers are used in development (see :doc:`../dev-usage`), but for the purposes of a demo or evaluation, fewer moving (sometimes pointy) parts are included. + +Tags and Versions ++++++++++++++++++ + +The compose file references a tag called "alpha", which corresponds to the latest released version of Dataverse. This means that if a release of Dataverse comes out while you are demo'ing or evaluating, the version of Dataverse you are using could change if you do a ``docker pull``. We are aware that there is a desire for tags that correspond to versions to ensure consistency. You are welcome to join `the discussion `_ and otherwise get in touch (see :ref:`helping-containers`). For more on tags, see :ref:`supported-image-tags-app`. + +Once Dataverse is running, you can check which version you have through the normal methods: + +- Check the bottom right in a web browser. +- Check http://localhost:8080/api/info/version via API. Troubleshooting --------------- +Hardware and Software Requirements +++++++++++++++++++++++++++++++++++ + +- 8 GB RAM (if not much else is running) +- Mac, Linux, or Windows (experimental) +- Docker + +Windows support is experimental but we are very interested in supporting Windows better. Please report bugs (see :ref:`helping-containers`). + Bootstrapping Did Not Complete ++++++++++++++++++++++++++++++ @@ -126,6 +160,21 @@ In the compose file, try increasing the timeout in the bootstrap container by ad environment: - TIMEOUT=10m +Wrapping Up +----------- + +Deleting the Containers and Data +++++++++++++++++++++++++++++++++ + +If you no longer need the containers because your demo or evaluation is finished and you want to reclaim disk space, run ``docker compose down`` in the directory where you put ``compose.yml``. + +You might also want to delete the ``data`` directory, as described above. + +Giving Feedback +--------------- + +Your feedback is extremely valuable to us! To let us know what you think, please see :ref:`helping-containers`. + Getting Help ------------ diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index 3817921f10a..a262f43006a 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -9,6 +9,7 @@ services: restart: on-failure user: payara environment: + _CT_DATAVERSE_SITEURL: "https://demo.example.org" DATAVERSE_DB_HOST: postgres DATAVERSE_DB_PASSWORD: secret DATAVERSE_DB_USER: dataverse @@ -45,6 +46,9 @@ services: command: - bootstrap.sh - dev + #- demo + #volumes: + # - ./demo:/scripts/bootstrap/demo networks: - dataverse diff --git a/modules/container-configbaker/scripts/bootstrap/demo/init.sh b/modules/container-configbaker/scripts/bootstrap/demo/init.sh new file mode 100644 index 00000000000..0e9be7ffef5 --- /dev/null +++ b/modules/container-configbaker/scripts/bootstrap/demo/init.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -euo pipefail + +# Set some defaults as documented +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +export DATAVERSE_URL + +echo "Running base setup-all.sh..." +"${BOOTSTRAP_DIR}"/base/setup-all.sh -p=admin1 | tee /tmp/setup-all.sh.out + +echo "" +echo "Done, your instance has been configured for demo or eval. Have a nice day!" From bdc2c8e980ac9878ef472f874098e4f25431592b Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 31 Jan 2024 10:05:04 -0500 Subject: [PATCH 524/546] #9748 avoid issue with existing tools --- .../edu/harvard/iq/dataverse/api/TestApi.java | 26 +++++++++++++++++++ .../iq/dataverse/api/ExternalToolsIT.java | 15 ++++++----- .../edu/harvard/iq/dataverse/api/UtilIT.java | 15 +++++++++++ 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java index 87be1f14e05..10510013495 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java @@ -71,5 +71,31 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que return wr.getResponse(); } } + + @Path("files/{id}/externalTool/{toolId}") + @GET + public Response getExternalToolForFileById(@PathParam("id") String idSupplied, @QueryParam("type") String typeSupplied, @PathParam("toolId") String toolId) { + ExternalTool.Type type; + try { + type = ExternalTool.Type.fromString(typeSupplied); + } catch (IllegalArgumentException ex) { + return error(BAD_REQUEST, ex.getLocalizedMessage()); + } + try { + DataFile dataFile = findDataFileOrDie(idSupplied); + List datasetTools = externalToolService.findFileToolsByTypeAndContentType(type, dataFile.getContentType()); + for (ExternalTool tool : datasetTools) { + ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey()); + ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null); + JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler); + if (externalToolService.meetsRequirements(tool, dataFile) && tool.getId().toString().equals(toolId)) { + return ok(toolToJson); + } + } + return error(BAD_REQUEST, "Could not find external tool with id of " + toolId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java index 2c96ce96dea..9a280f475a1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java @@ -101,7 +101,7 @@ public void testFileLevelTool1() { .statusCode(OK.getStatusCode()) .body("data.displayName", CoreMatchers.equalTo("AwesomeTool")); - long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + Long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); Response getTool = UtilIT.getExternalTool(toolId); getTool.prettyPrint(); @@ -115,14 +115,17 @@ public void testFileLevelTool1() { .statusCode(BAD_REQUEST.getStatusCode()) .body("message", CoreMatchers.equalTo("Type must be one of these values: [explore, configure, preview, query].")); - Response getExternalToolsForTabularFiles = UtilIT.getExternalToolsForFile(tabularFileId.toString(), "explore", apiToken); + // Getting tool by tool Id to avoid issue where there are existing tools + String toolIdString = toolId.toString(); + Response getExternalToolsForTabularFiles = UtilIT.getExternalToolForFileById(tabularFileId.toString(), "explore", apiToken, toolIdString); getExternalToolsForTabularFiles.prettyPrint(); + getExternalToolsForTabularFiles.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data[0].displayName", CoreMatchers.equalTo("AwesomeTool")) - .body("data[0].scope", CoreMatchers.equalTo("file")) - .body("data[0].contentType", CoreMatchers.equalTo("text/tab-separated-values")) - .body("data[0].toolUrlWithQueryParams", CoreMatchers.equalTo("http://awesometool.com?fileid=" + tabularFileId + "&key=" + apiToken)); + .body("data.displayName", CoreMatchers.equalTo("AwesomeTool")) + .body("data.scope", CoreMatchers.equalTo("file")) + .body("data.contentType", CoreMatchers.equalTo("text/tab-separated-values")) + .body("data.toolUrlWithQueryParams", CoreMatchers.equalTo("http://awesometool.com?fileid=" + tabularFileId + "&key=" + apiToken)); Response getExternalToolsForJuptyerNotebooks = UtilIT.getExternalToolsForFile(jupyterNotebookFileId.toString(), "explore", apiToken); getExternalToolsForJuptyerNotebooks.prettyPrint(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 6af3f8a0a09..ec41248a65f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -2354,6 +2354,21 @@ static Response getExternalToolsForFile(String idOrPersistentIdOfFile, String ty } return requestSpecification.get("/api/admin/test/files/" + idInPath + "/externalTools?type=" + type + optionalQueryParam); } + + static Response getExternalToolForFileById(String idOrPersistentIdOfFile, String type, String apiToken, String toolId) { + String idInPath = idOrPersistentIdOfFile; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(idOrPersistentIdOfFile)) { + idInPath = ":persistentId"; + optionalQueryParam = "&persistentId=" + idOrPersistentIdOfFile; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/admin/test/files/" + idInPath + "/externalTool/" + toolId + "?type=" + type + optionalQueryParam); + } static Response submitFeedback(JsonObjectBuilder job) { return given() From 7d537aa394c447562820cf0343fd6ec2d8a760ca Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 31 Jan 2024 17:45:01 -0500 Subject: [PATCH 525/546] simplified/reorganized the new dev. checklist for making a core field multiple #9634 --- .../source/admin/metadatacustomization.rst | 19 +++++++++++++- doc/sphinx-guides/source/developers/index.rst | 1 - .../source/developers/metadatablocksdev.rst | 26 ------------------- 3 files changed, 18 insertions(+), 28 deletions(-) delete mode 100644 doc/sphinx-guides/source/developers/metadatablocksdev.rst diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 36956567a7d..f97b222b51f 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -651,7 +651,24 @@ The thinking is that the tips can become issues and the issues can eventually be Development Tasks Specific to Changing Fields in Core Metadata Blocks --------------------------------------------------------------------- -When it comes to the fields from the core blocks that are distributed with Dataverse (such as Citation and Social Science blocks), code dependencies may exist in Dataverse, primarily in the Import and Export subsystems, on these fields being configured a certain way. So, if it becomes necessary to modify one of such core fields (a real life example is making a single value-only field support multiple values), code changes may be necessary to accompany the change in the block tsv, plus some sample and test files maintained in the Dataverse source tree will need to be adjusted accordingly. An example of a checklist of such tasks is provided in the Development Guide, please see the :doc:`/developers/metadatablocksdev` section. +When it comes to the fields from the core blocks that are distributed with Dataverse (such as Citation, Social Science and Geospatial blocks), code dependencies may exist in Dataverse, primarily in the Import and Export subsystems, on these fields being configured a certain way. So, if it becomes necessary to modify one of such core fields, code changes may be necessary to accompany the change in the block tsv, plus some sample and test files maintained in the Dataverse source tree will need to be adjusted accordingly. + +Making a Field Multi-Valued +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As a recent real life example, a few fields from the Citation and Social Science block were changed to support multiple values, in order to accommodate specific needs of some community member institutions. A PR for one of these fields, ``alternativeTitle`` from the Citation block is linked below. Each time a number of code changes, plus some changes in the sample metadata files in the Dataverse code tree had to be made. The checklist below is to help another developer in the event that a similar change becomes necessary in the future. Note that some of the steps below may not apply 1:1 to a different metadata field, depending on how it is exported and imported in various formats by Dataverse. It may help to consult the PR `#9440 `_ as a specific example of the changes that had to be made for the ``alternativeTitle`` field. + +- Change the value from ``FALSE`` to ``TRUE`` in the ``alowmultiples`` column of the .tsv file for the block. +- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/9.3.0/schema.xml`` as of writing this). +- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (you may be able to use the change in the PR above as a model.) +- Modify the DDI export utility (``DdiExportUtil.java``). +- Modify the OpenAire export utility (``OpenAireExportUtil.java``). +- Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests). +- Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``. +- Make sure all the automated Unit and Integration tests are passing. +- Write a short release note to announce the change in the upcoming release. +- Make a Pull Request. + Footnotes --------- diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 25007baf589..25fea138736 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -31,7 +31,6 @@ Developer Guide making-releases making-library-releases metadataexport - metadatablocksdev tools unf/index make-data-count diff --git a/doc/sphinx-guides/source/developers/metadatablocksdev.rst b/doc/sphinx-guides/source/developers/metadatablocksdev.rst deleted file mode 100644 index 17093471467..00000000000 --- a/doc/sphinx-guides/source/developers/metadatablocksdev.rst +++ /dev/null @@ -1,26 +0,0 @@ -=========================== -Metadata Blocks Development -=========================== - -.. contents:: |toctitle| - :local: - -Introduction ------------- - -The idea behind Metadata Blocks in Dataverse is to have everything about the supported metadata fields configurable and customizable. Ideally, this should be accomplished by simply re-importing the updated tsv for the block via the API. In practice, when it comes to the core blocks that are distributed with Dataverse - such as the Citation and Social Science blocks - unfortunately, many dependencies exist in various parts of Dataverse, primarily import and export subsystems, on many specific fields being configured a certain way. This means that code changes may be required whenever a field from one of these core blocks is modified. - -Making a Field Multiple ------------------------ - -Back in 2023, in order to accommodate specific needs of some community member institutions a few fields from Citation and Social Science were changed to support multiple values. (For example, the ``alternativeTitle`` field from the Citation block.) A number of code changes had to be made to accommodate this, plus a number of changes in the sample metadata files that are maintained in the Dataverse code tree. The checklist below is to help another developer should a similar change become necessary in the future. Note that some of the steps below may not apply 1:1 to a different metadata field, depending on how it is exported and imported in various formats by Dataverse. It may help to consult the PR `#9440 `_ as a specific example of the changes that had to be made for the ``alternativeTitle`` field. - -- Change the value from ``FALSE`` to ``TRUE`` in the ``alowmultiples`` column of the .tsv file for the block (obviously). -- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/9.3.0/schema.xml`` as of writing this). -- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (you may be able to use the change in the PR above as a model.) -- Modify the DDI export utility (``DdiExportUtil.java``). -- Modify the OpenAire export utility (``OpenAireExportUtil.java``). -- Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests). -- Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``. -- Make sure all the automated Unit and Integration tests are passing. -- Write a short release note to announce the change in the upcoming release. From ad12c7f2ddaf4f6fb1ec5023845d98092df0da47 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 1 Feb 2024 12:28:06 -0500 Subject: [PATCH 526/546] Apply suggestions from code review --- .../source/admin/metadatacustomization.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index f97b222b51f..841dfd8b3cd 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -658,16 +658,16 @@ Making a Field Multi-Valued As a recent real life example, a few fields from the Citation and Social Science block were changed to support multiple values, in order to accommodate specific needs of some community member institutions. A PR for one of these fields, ``alternativeTitle`` from the Citation block is linked below. Each time a number of code changes, plus some changes in the sample metadata files in the Dataverse code tree had to be made. The checklist below is to help another developer in the event that a similar change becomes necessary in the future. Note that some of the steps below may not apply 1:1 to a different metadata field, depending on how it is exported and imported in various formats by Dataverse. It may help to consult the PR `#9440 `_ as a specific example of the changes that had to be made for the ``alternativeTitle`` field. -- Change the value from ``FALSE`` to ``TRUE`` in the ``alowmultiples`` column of the .tsv file for the block. -- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/9.3.0/schema.xml`` as of writing this). -- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (you may be able to use the change in the PR above as a model.) +- Change the value from ``FALSE`` to ``TRUE`` in the ``allowmultiples`` column of the .tsv file for the block. +- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/x.x.x/schema.xml``). +- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (You may be able to use the change in the PR above as a model.) - Modify the DDI export utility (``DdiExportUtil.java``). - Modify the OpenAire export utility (``OpenAireExportUtil.java``). - Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests). - Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``. -- Make sure all the automated Unit and Integration tests are passing. +- Make sure all the automated unit and integration tests are passing. - Write a short release note to announce the change in the upcoming release. -- Make a Pull Request. +- Make a pull request. Footnotes From e064313c4c11fbec2bf875d0f8dbe98b99013fca Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 1 Feb 2024 12:31:01 -0500 Subject: [PATCH 527/546] add refs to dev guide #9634 --- doc/sphinx-guides/source/admin/metadatacustomization.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 841dfd8b3cd..5bd28bfa103 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -665,8 +665,8 @@ As a recent real life example, a few fields from the Citation and Social Science - Modify the OpenAire export utility (``OpenAireExportUtil.java``). - Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests). - Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``. -- Make sure all the automated unit and integration tests are passing. -- Write a short release note to announce the change in the upcoming release. +- Make sure all the automated unit and integration tests are passing. See :doc:`/developers/testing` in the Developer Guide. +- Write a short release note to announce the change in the upcoming release. See :ref:`writing-release-note-snippets` in the Developer Guide. - Make a pull request. From 89739bc39542930546c807c2236033b7da790688 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 1 Feb 2024 16:37:58 -0500 Subject: [PATCH 528/546] use --insecure and secure later #10238 Using --insecure at first and then doing securing APIs, etc later (like non --insecure does) seems like the best option for now. It allows us to simplify the tutorial and set up an unblock key for later use. --- .../source/container/running/demo.rst | 96 +++++++++++++------ .../scripts/bootstrap/demo/init.sh | 30 +++++- 2 files changed, 94 insertions(+), 32 deletions(-) diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst index 4e2a9db3f48..24027e677a1 100644 --- a/doc/sphinx-guides/source/container/running/demo.rst +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -36,27 +36,18 @@ Again, data related to your Dataverse installation such as the database is store You may reach a point during your demo or evaluation that you'd like to start over with a fresh database. Simply make sure the containers are not running and then remove the ``data`` directory. Now, as before, you can run ``docker compose up`` to spin up the containers. -Configuring Dataverse +Setting Up for a Demo --------------------- -Now that you are familiar with the basics of running Dataverse in containers, let's move on to configuration. +Now that you are familiar with the basics of running Dataverse in containers, let's move on to a better setup for a demo or evaluation. -Start Fresh -+++++++++++ - -For this configuration exercise, please start fresh by stopping all containers and removing the ``data`` directory. - -Change the Site URL -+++++++++++++++++++ - -Edit ``compose.yml`` and change ``_CT_DATAVERSE_SITEURL`` to the URL you plan to use for your installation. - -(You can read more about this setting at :ref:`dataverse.siteUrl`.) +Starting Fresh +++++++++++++++ -This is an example of setting an environment variable to configure Dataverse. +For this exercise, please start fresh by stopping all containers and removing the ``data`` directory. -Create and Run a Demo Persona -+++++++++++++++++++++++++++++ +Creating and Running a Demo Persona ++++++++++++++++++++++++++++++++++++ Previously we used the "dev" persona to bootstrap Dataverse, but for security reasons, we should create a persona more suited to demos and evaluations. @@ -83,36 +74,81 @@ Uncomment the "volumes" section. Create a directory called "demo" and copy :download:`init.sh <../../../../../modules/container-configbaker/scripts/bootstrap/demo/init.sh>` into it. You are welcome to edit this demo init script, customizing the final message, for example. +Note that the init script contains a key for using the admin API once it is blocked. You should change it in the script from "unblockme" to something only you know. + Now run ``docker compose up``. The "bootstrap" container should exit with the message from the init script and Dataverse should be running on http://localhost:8080 as before during the quickstart exercise. One of the main differences between the "dev" persona and our new "demo" persona is that we are now running the setup-all script without the ``--insecure`` flag. This makes our installation more secure, though it does block "admin" APIs that are useful for configuration. -Set DOI Provider to FAKE -++++++++++++++++++++++++ +Smoke Testing +------------- + +At this point, please try the following basic operations within your installation: + +- logging in as dataverseAdmin (password "admin1") +- publishing the "root" collection (dataverse) +- creating a collection +- creating a dataset +- uploading a data file +- publishing the dataset + +If anything isn't working, please see the sections below on troubleshooting, giving feedback, and getting help. + +Further Configuration +--------------------- + +Now that we've verified through a smoke test that basic operations are working, let's configure our installation of Dataverse. + +Please refer to the :doc:`/installation/config` section of the Installation Guide for various configuration options. -For the purposes of a demo, we'll use the "FAKE" DOI provider. (For more on this and related settings, see :ref:`pids-configuration` in the Installation Guide.) Without this step, you won't be able to create or publish datasets. +Below we'll explain some specifics for configuration in containers. -Run the following command. (In this context, "dataverse" is the name of the running container.) +JVM Options/MicroProfile Config ++++++++++++++++++++++++++++++++ -``docker exec -it dataverse curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE`` +:ref:`jvm-options` can be configured under ``JVM_ARGS`` in the ``compose.yml`` file. Here's an example: + +.. code-block:: bash -This is an example of configuring a database setting, which you can read more about at :ref:`database-settings` in the Installation Guide. + environment: + JVM_ARGS: -Ddataverse.files.storage-driver-id=file1 -Smoke Test +Some JVM options can be configured as environment variables. For example, you can configure the database host like this: + +.. code-block:: bash + + environment: + DATAVERSE_DB_HOST: postgres + +We are in the process of making more JVM options configurable as environment variables. Look for the term "MicroProfile Config" in under :doc:`/installation/config` in the Installation Guide to know if you can use them this way. + +Please note that for a few environment variables (the ones that start with ``%ct`` in :download:`microprofile-config.properties <../../../../../src/main/resources/META-INF/microprofile-config.properties>`), you have to prepend ``_CT_`` to make, for example, ``_CT_DATAVERSE_SITEURL``. We are working on a fix for this in https://github.com/IQSS/dataverse/issues/10285. + +There is a final way to configure JVM options that we plan to deprecate once all JVM options have been converted to MicroProfile Config. Look for "magic trick" under "tunables" at :doc:`../app-image` for more information. + +Database Settings ++++++++++++++++++ + +Generally, you should be able to look at the list of :ref:`database-settings` and configure them but the "demo" persona above secured your installation to the point that you'll need an "unblock key" to access the "admin" API and change database settings. + +In the example below of configuring :ref:`:FooterCopyright` we use the default unblock key of "unblockme" but you should use the key you set above. + +``curl -X PUT -d ", My Org" "http://localhost:8080/api/admin/settings/:FooterCopyright?unblock-key=unblockme"`` + +One you make this change it should be visible in the copyright in the bottom left of every page. + +Next Steps ---------- -At this point, please try some basic operations within your installation, such as: +From here, you are encouraged to continue poking around, configuring, and testing. You probably spend a lot of time reading the :doc:`/installation/config` section of the Installation Guide. -- logging in as dataverseAdmin -- publishing the "root" collection (dataverse) -- creating a collection -- creating a dataset -- uploading a data file -- publishing the dataset +Please consider giving feedback using the methods described below. Good luck with your demo! About the Containers -------------------- +Now that you've gone through the tutorial, you might be interested in the various containers you've spun up and what they do. + Container List ++++++++++++++ diff --git a/modules/container-configbaker/scripts/bootstrap/demo/init.sh b/modules/container-configbaker/scripts/bootstrap/demo/init.sh index 0e9be7ffef5..e8d1d07dd2d 100644 --- a/modules/container-configbaker/scripts/bootstrap/demo/init.sh +++ b/modules/container-configbaker/scripts/bootstrap/demo/init.sh @@ -2,12 +2,38 @@ set -euo pipefail -# Set some defaults as documented +# Set some defaults DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} export DATAVERSE_URL +BLOCKED_API_KEY=${BLOCKED_API_KEY:-"unblockme"} +export BLOCKED_API_KEY + +# --insecure is used so we can configure a few things but +# later in this script we'll apply the changes as if we had +# run the script without --insecure. echo "Running base setup-all.sh..." -"${BOOTSTRAP_DIR}"/base/setup-all.sh -p=admin1 | tee /tmp/setup-all.sh.out +"${BOOTSTRAP_DIR}"/base/setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out + +echo "" +echo "Setting DOI provider to \"FAKE\"..." +curl -sS -X PUT -d FAKE "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" + +echo "" +echo "Revoke the key that allows for creation of builtin users..." +curl -sS -X DELETE "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" + +echo "" +echo "Set key for accessing blocked API endpoints..." +curl -sS -X PUT -d "$BLOCKED_API_KEY" "${DATAVERSE_URL}/api/admin/settings/:BlockedApiKey" + +echo "" +echo "Set policy to only allow access to admin APIs with with a key..." +curl -sS -X PUT -d unblock-key "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy" + +echo "" +echo "Block admin and other sensitive API endpoints..." +curl -sS -X PUT -d 'admin,builtin-users' "${DATAVERSE_URL}/api/admin/settings/:BlockedApiEndpoints" echo "" echo "Done, your instance has been configured for demo or eval. Have a nice day!" From c8f71f16d41c83586bd4572fd2e4bcf9f8b3962b Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Fri, 2 Feb 2024 16:15:17 -0500 Subject: [PATCH 529/546] Update metadatacustomization.rst The /tree seems to be just a reference for the GitHub URL but the project doesn't have a "tree" directory so probably would be better or less confusing to reference the root of the project. Also the property files are in a different location than the one specified on the Documentation. --- doc/sphinx-guides/source/admin/metadatacustomization.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 5bd28bfa103..c9cb3c47f85 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -37,8 +37,8 @@ tab-separated value (TSV). [1]_\ :sup:`,`\ [2]_ While it is technically possible to define more than one metadata block in a TSV file, it is good organizational practice to define only one in each file. -The metadata block TSVs shipped with the Dataverse Software are in `/tree/develop/scripts/api/data/metadatablocks -`__ and the corresponding ResourceBundle property files `/tree/develop/src/main/java `__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets +The metadata block TSVs shipped with the Dataverse Software are in `/src/scripts/api/data/metadatablocks +`__ and the corresponding ResourceBundle property files `/src/main/java/propertyFiles `__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets document `__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet. About the metadata block TSV From 2978080e5299d91d340ff926ec2a3a33a81b40df Mon Sep 17 00:00:00 2001 From: qqmyers Date: Fri, 2 Feb 2024 16:50:20 -0500 Subject: [PATCH 530/546] Update metadatacustomization.rst --- doc/sphinx-guides/source/admin/metadatacustomization.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index c9cb3c47f85..78eadd9b2ce 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -37,8 +37,8 @@ tab-separated value (TSV). [1]_\ :sup:`,`\ [2]_ While it is technically possible to define more than one metadata block in a TSV file, it is good organizational practice to define only one in each file. -The metadata block TSVs shipped with the Dataverse Software are in `/src/scripts/api/data/metadatablocks -`__ and the corresponding ResourceBundle property files `/src/main/java/propertyFiles `__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets +The metadata block TSVs shipped with the Dataverse Software are in `/scripts/api/data/metadatablocks +`__ with the corresponding ResourceBundle property files in `/src/main/java/propertyFiles `__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets document `__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet. About the metadata block TSV From 24daf553ecdbc7811737da58d6a41b6294a98434 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva <142103991+jp-tosca@users.noreply.github.com> Date: Fri, 2 Feb 2024 16:53:24 -0500 Subject: [PATCH 531/546] Update metadatacustomization.rst As @qqmyers pointed these are not on /src --- doc/sphinx-guides/source/admin/metadatacustomization.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index c9cb3c47f85..4920859d716 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -37,7 +37,7 @@ tab-separated value (TSV). [1]_\ :sup:`,`\ [2]_ While it is technically possible to define more than one metadata block in a TSV file, it is good organizational practice to define only one in each file. -The metadata block TSVs shipped with the Dataverse Software are in `/src/scripts/api/data/metadatablocks +The metadata block TSVs shipped with the Dataverse Software are in `/scripts/api/data/metadatablocks `__ and the corresponding ResourceBundle property files `/src/main/java/propertyFiles `__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets document `__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet. From 7c248239c260e56c2c7e162b0ddfafda1af7d9f6 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 2 Feb 2024 19:12:59 -0500 Subject: [PATCH 532/546] Fix line break --- doc/sphinx-guides/source/admin/metadatacustomization.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 7d6e0c4c5c1..f518c7eb802 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -40,7 +40,6 @@ good organizational practice to define only one in each file. The metadata block TSVs shipped with the Dataverse Software are in `/scripts/api/data/metadatablocks `__ with the corresponding ResourceBundle property files in `/src/main/java/propertyFiles `__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets - document `__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet. About the metadata block TSV From 59f1560daa77404c602029e2112546b00f9f19f2 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Fri, 2 Feb 2024 19:16:02 -0500 Subject: [PATCH 533/546] Fix incorrect line break that cause build fail --- doc/sphinx-guides/source/admin/metadatacustomization.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index f518c7eb802..78eadd9b2ce 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -38,7 +38,6 @@ possible to define more than one metadata block in a TSV file, it is good organizational practice to define only one in each file. The metadata block TSVs shipped with the Dataverse Software are in `/scripts/api/data/metadatablocks - `__ with the corresponding ResourceBundle property files in `/src/main/java/propertyFiles `__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets document `__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet. From 77951683a2f495e04098125a81945dc076d80b4b Mon Sep 17 00:00:00 2001 From: raravumich <48064835+raravumich@users.noreply.github.com> Date: Mon, 5 Feb 2024 10:33:46 -0500 Subject: [PATCH 534/546] added tabs --- .../source/_static/admin/dataverse-external-tools.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index a20ab864d2a..05263498977 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -5,4 +5,4 @@ Binder explore dataset Binder allows you to spin up custom computing environment File Previewers explore file "A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers" Data Curation Tool configure file "A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions." Ask the Data query file Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. -TurboCurator by ICPSR configure dataset "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator `_ page for more details on how it works and adding TurboCurator to your Dataverse installation." +TurboCurator by ICPSR configure dataset "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator `_ page for more details on how it works and adding TurboCurator to your Dataverse installation." From 905c8cf906857feb2e7231f31c1a2e224b33d26b Mon Sep 17 00:00:00 2001 From: raravumich <48064835+raravumich@users.noreply.github.com> Date: Mon, 5 Feb 2024 10:36:27 -0500 Subject: [PATCH 535/546] added correct tabs --- .../source/_static/admin/dataverse-external-tools.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 05263498977..10f9a6a6062 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -5,4 +5,4 @@ Binder explore dataset Binder allows you to spin up custom computing environment File Previewers explore file "A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers" Data Curation Tool configure file "A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions." Ask the Data query file Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. -TurboCurator by ICPSR configure dataset "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator `_ page for more details on how it works and adding TurboCurator to your Dataverse installation." +TurboCurator by ICPSR configure dataset "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator `_ page for more details on how it works and adding TurboCurator to your Dataverse installation." From 5760c259ae493ce3670eefcd850480e5106133ef Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 5 Feb 2024 15:11:55 -0500 Subject: [PATCH 536/546] fix formatting #10279 --- .../source/_static/admin/dataverse-external-tools.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 10f9a6a6062..c22392a7c5e 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -5,4 +5,4 @@ Binder explore dataset Binder allows you to spin up custom computing environment File Previewers explore file "A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers" Data Curation Tool configure file "A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions." Ask the Data query file Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. -TurboCurator by ICPSR configure dataset "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator `_ page for more details on how it works and adding TurboCurator to your Dataverse installation." +TurboCurator by ICPSR configure dataset TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI's ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator `_ page for more details on how it works and adding TurboCurator to your Dataverse installation. From a92560059ecd18a081a063a08f4c5a998fb1e3d4 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Mon, 5 Feb 2024 19:33:33 -0500 Subject: [PATCH 537/546] Fix to provide latest version metadata --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index ea74368d110..e3505cbbb33 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -778,7 +778,7 @@ public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @ @Path("{id}/metadata") @Produces("application/ld+json, application/json-ld") public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return getVersionJsonLDMetadata(crc, id, DS_VERSION_DRAFT, uriInfo, headers); + return getVersionJsonLDMetadata(crc, id, DS_VERSION_LATEST, uriInfo, headers); } @PUT diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 125753296a2..cd292a40a1e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -1202,7 +1202,7 @@ public void testGeospatialSearch() { .add("value", "42.33661") .add("typeClass", "primitive") .add("multiple", false) - .add("typeName", "southLongitude") + .add("typeName", "southLongitud e") ) .add("eastLongitude", Json.createObjectBuilder() From ae9b74fd4592103e1c8135655d312bb7ef0c24d7 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 6 Feb 2024 09:27:09 -0500 Subject: [PATCH 538/546] #10229 fix popup list --- src/main/java/edu/harvard/iq/dataverse/DataversePage.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index 943a74327d5..3dbc22902b0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -362,7 +362,7 @@ public void initFeaturedDataverses() { List featuredSource = new ArrayList<>(); List featuredTarget = new ArrayList<>(); featuredSource.addAll(dataverseService.findAllPublishedByOwnerId(dataverse.getId())); - featuredSource.addAll(linkingService.findLinkingDataverses(dataverse.getId())); + featuredSource.addAll(linkingService.findLinkedDataverses(dataverse.getId())); List featuredList = featuredDataverseService.findByDataverseId(dataverse.getId()); for (DataverseFeaturedDataverse dfd : featuredList) { Dataverse fd = dfd.getFeaturedDataverse(); From 4309ab06308f1be2333dcf40bc0bda3c11022437 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 6 Feb 2024 09:34:01 -0500 Subject: [PATCH 539/546] #10229 add to error message --- src/main/java/propertyFiles/Bundle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 157f2ecaf54..f1c8381816c 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -875,7 +875,7 @@ dataverse.option.deleteDataverse=Delete Dataverse dataverse.publish.btn=Publish dataverse.publish.header=Publish Dataverse dataverse.nopublished=No Published Dataverses -dataverse.nopublished.tip=In order to use this feature you must have at least one published dataverse. +dataverse.nopublished.tip=In order to use this feature you must have at least one published or linked dataverse. dataverse.contact=Email Dataverse Contact dataverse.link=Link Dataverse dataverse.link.btn.tip=Link to Your Dataverse From 2f7ce01fd67539a9213d87884dc229e689a055da Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 6 Feb 2024 10:38:44 -0500 Subject: [PATCH 540/546] Add to DatasetsIT testSemanticMetadataAPIs test cases for published and draft --- .../harvard/iq/dataverse/api/DatasetsIT.java | 60 +++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 6e6855306e4..e1c4b901116 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -3013,6 +3013,46 @@ public void testSemanticMetadataAPIs() { response = UtilIT.updateDatasetJsonLDMetadata(datasetId, apiToken, badTerms, false); response.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + + //We publish the dataset and dataverse + UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode()); + UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode()); + + //We check the version is published + response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiToken); + response.prettyPrint(); + jsonLDString = getData(response.getBody().asString()); + jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString); + String publishedVersion = jsonLDObject.getString("http://schema.org/version"); + assertNotEquals("DRAFT", publishedVersion); + + // Upload a file so a draft version is created + String pathToFile = "src/main/webapp/resources/images/cc0.png"; + Response uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadResponse.prettyPrint(); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + int fileID = uploadResponse.jsonPath().getInt("data.files[0].dataFile.id"); + + //We check the authenticated user gets DRAFT + response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiToken); + response.prettyPrint(); + jsonLDString = getData(response.getBody().asString()); + jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString); + assertEquals("DRAFT", jsonLDObject.getString("http://schema.org/version")); + + // Create user with no permission and check they get published version + String apiTokenNoPerms = UtilIT.createRandomUserGetToken(); + response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiTokenNoPerms); + response.prettyPrint(); + jsonLDString = getData(response.getBody().asString()); + jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString); + assertNotEquals("DRAFT", jsonLDObject.getString("http://schema.org/version")); + + // Delete the file + Response deleteFileResponse = UtilIT.deleteFileInDataset(fileID, apiToken); + deleteFileResponse.prettyPrint(); + deleteFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + // Delete the terms of use response = UtilIT.deleteDatasetJsonLDMetadata(datasetId, apiToken, "{\"https://dataverse.org/schema/core#termsOfUse\": \"New terms\"}"); @@ -3026,15 +3066,27 @@ public void testSemanticMetadataAPIs() { jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString); assertTrue(!jsonLDObject.containsKey("https://dataverse.org/schema/core#termsOfUse")); - // Cleanup - delete dataset, dataverse, user... - Response deleteDatasetResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken); - deleteDatasetResponse.prettyPrint(); - assertEquals(200, deleteDatasetResponse.getStatusCode()); + //Delete the DRAFT dataset + Response deleteDraftResponse = UtilIT.deleteDatasetVersionViaNativeApi(datasetId, DS_VERSION_DRAFT, apiToken); + deleteDraftResponse.prettyPrint(); + deleteDraftResponse.then().assertThat().statusCode(OK.getStatusCode()); + + //We set the user as superuser so we can delete the published dataset + Response superUserResponse = UtilIT.makeSuperUser(username); + superUserResponse.prettyPrint(); + deleteDraftResponse.then().assertThat().statusCode(OK.getStatusCode()); + + //Delete the published dataset + Response deletePublishedResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken); + deletePublishedResponse.prettyPrint(); + deleteDraftResponse.then().assertThat().statusCode(OK.getStatusCode()); + //Delete the dataverse Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); deleteDataverseResponse.prettyPrint(); assertEquals(200, deleteDataverseResponse.getStatusCode()); + //Delete the user Response deleteUserResponse = UtilIT.deleteUser(username); deleteUserResponse.prettyPrint(); assertEquals(200, deleteUserResponse.getStatusCode()); From 9568c20359234bbe87b17656c91926ab11329a57 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 6 Feb 2024 10:53:24 -0500 Subject: [PATCH 541/546] Add release notes --- doc/release-notes/10297-metadata-api-fix.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/10297-metadata-api-fix.md diff --git a/doc/release-notes/10297-metadata-api-fix.md b/doc/release-notes/10297-metadata-api-fix.md new file mode 100644 index 00000000000..11ee086af04 --- /dev/null +++ b/doc/release-notes/10297-metadata-api-fix.md @@ -0,0 +1 @@ +The API endpoint `api/datasets/{id}/metadata` has been changed to default to the latest version of the dataset that the user has access. From 2f167cf57def265d719f52a7211ed6648b7e3df8 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 6 Feb 2024 10:56:03 -0500 Subject: [PATCH 542/546] Restore SearchIT --- src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index cd292a40a1e..125753296a2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -1202,7 +1202,7 @@ public void testGeospatialSearch() { .add("value", "42.33661") .add("typeClass", "primitive") .add("multiple", false) - .add("typeName", "southLongitud e") + .add("typeName", "southLongitude") ) .add("eastLongitude", Json.createObjectBuilder() From df4f49a1650070427a710046be32b7c5f6ad5312 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 6 Feb 2024 14:43:38 -0500 Subject: [PATCH 543/546] add release note #10238 --- doc/release-notes/10238-container-demo.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/release-notes/10238-container-demo.md diff --git a/doc/release-notes/10238-container-demo.md b/doc/release-notes/10238-container-demo.md new file mode 100644 index 00000000000..edc4db4b650 --- /dev/null +++ b/doc/release-notes/10238-container-demo.md @@ -0,0 +1 @@ +The Container Guide now containers a tutorial for running Dataverse in containers for demo or evaluation purposes: https://guides.dataverse.org/en/6.2/container From ce4b1e0418b31a9a4db9fa7ab1926f17459a046c Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 6 Feb 2024 14:50:35 -0500 Subject: [PATCH 544/546] Change the workflow section including feedback from @sekmiller --- doc/sphinx-guides/source/qa/overview.md | 27 ++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md index 64796357831..a5b613f6516 100644 --- a/doc/sphinx-guides/source/qa/overview.md +++ b/doc/sphinx-guides/source/qa/overview.md @@ -11,11 +11,28 @@ This guide describes the testing process used by QA at IQSS and provides a refer ## Workflow -The basic workflow is as follows. Bugs or feature requests are submitted to GitHub by the community or by team members as [issues](https://github.com/IQSS/dataverse/issues). These issues are prioritized and added to a two-week sprint that is reflected on the GitHub {ref}`kanban-board`. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common {ref}`develop branch ` and ultimately released as part of the product. - -Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements. - -Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop branch, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Merged column (where it might be discussed at the next standup) and the process repeats with the next PR until it is decided to {doc}`make a release `. +Here is a brief description of our workflow: + +### Issue Submission and Prioritization: +- Members of the community or the development team submit bugs or request features through GitHub as [Issues](https://github.com/IQSS/dataverse/issues)sues. +- These Issues are prioritized and added to a two-week-long sprint that can be tracked on the {ref}`kanban-board`. + +### Development Process: +- Developers will work on a solution on a separate branch +- Once a developer completes their work, they submit a [Pull Request](https://github.com/IQSS/dataverse/pulls) (PR). +- The PR is reviewed by a developer from the team. +- During the review, the reviewer may suggest coding or documentation changes to the original developer. + +### Quality Assurance (QA) Testing: +- The QA tester performs a smoke test of core functionality and regression testing. +- Documentation is used to understand the feature and validate any assertions made. +- If no documentation is provided in the PR, the tester may refer to the original bug report to determine the desired outcome of the changes. +- Once the branch is assumed to be safe, it is merged into the develop branch. + +### Final Steps: +- The PR and the Issue are closed and assigned the “merged†status. +- It is good practice to delete the branch if it is local. +- The content from the PR becomes part of the codebase for {doc}`future releases `. The complete suggested workflow can be found at {doc}`qa-workflow`. From de3bad6e6ec000f182c9a50e019f155cb0c20fb9 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 6 Feb 2024 14:53:05 -0500 Subject: [PATCH 545/546] Typo correction --- doc/sphinx-guides/source/qa/overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md index a5b613f6516..60e6a28ee9a 100644 --- a/doc/sphinx-guides/source/qa/overview.md +++ b/doc/sphinx-guides/source/qa/overview.md @@ -14,7 +14,7 @@ This guide describes the testing process used by QA at IQSS and provides a refer Here is a brief description of our workflow: ### Issue Submission and Prioritization: -- Members of the community or the development team submit bugs or request features through GitHub as [Issues](https://github.com/IQSS/dataverse/issues)sues. +- Members of the community or the development team submit bugs or request features through GitHub as [Issues](https://github.com/IQSS/dataverse/issues). - These Issues are prioritized and added to a two-week-long sprint that can be tracked on the {ref}`kanban-board`. ### Development Process: From bec394519826529c02adedfdd601f04b45f859c2 Mon Sep 17 00:00:00 2001 From: landreev Date: Wed, 7 Feb 2024 11:50:52 -0500 Subject: [PATCH 546/546] 8524 adding mechanism for storing tab. files with variable headers (#10282) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * "stored with header" flag #8524 * more changes for the streaming and redirect code. #8524 * disabling dynamically-generated varheader in the remaining storage drivers. #8524 * Ingest plugins (work in progress) #8524 * R ingest plugin (#8524) * still some unaddressed @todo:s, but the branch should build and the unit tests should be passing. # 8524 * work-in-progress, on the subsetting code in the download instance writer. #8524 * more work-in-progress changes. removing all the unused code from TabularSubsetGenerator, for clarity etc. #8524 * more bits and pieces #8524 * 2 more ingest plugins. #8542 * Integration tests. #8524 * typo #8524 * documenting the new setting. #8524 * a release note for the pr. also, added the "storage quotas enabled" to the list of settings documented in the config guide while I was at it. #8524 * removed all the unused code from this class (lots of it) for clarity, etc. git history can be consulted if anyone is curious about what we used to do here. #8524 * removing @todo: that's no longer relevant #8524 * (cosmetic) defined the control constants used in the integration test. #8524 --- ...4-storing-tabular-files-with-varheaders.md | 6 + .../source/installation/config.rst | 22 + .../edu/harvard/iq/dataverse/DataTable.java | 18 + .../dataverse/api/DownloadInstanceWriter.java | 78 +- .../harvard/iq/dataverse/api/TestIngest.java | 2 +- .../iq/dataverse/dataaccess/FileAccessIO.java | 3 +- .../dataaccess/GlobusOverlayAccessIO.java | 8 +- .../dataaccess/RemoteOverlayAccessIO.java | 8 +- .../iq/dataverse/dataaccess/S3AccessIO.java | 3 +- .../dataverse/dataaccess/SwiftAccessIO.java | 3 +- .../dataaccess/TabularSubsetGenerator.java | 1150 +---------------- .../dataaccess/TabularSubsetInputStream.java | 114 -- .../export/DDIExportServiceBean.java | 11 + .../dataverse/ingest/IngestServiceBean.java | 64 +- .../tabulardata/TabularDataFileReader.java | 26 +- .../impl/plugins/csv/CSVFileReader.java | 24 +- .../impl/plugins/dta/DTAFileReader.java | 11 +- .../impl/plugins/dta/NewDTAFileReader.java | 19 +- .../impl/plugins/por/PORFileReader.java | 13 +- .../impl/plugins/rdata/RDATAFileReader.java | 4 +- .../impl/plugins/rdata/RTabFileParser.java | 28 +- .../impl/plugins/sav/SAVFileReader.java | 24 +- .../impl/plugins/xlsx/XLSXFileReader.java | 11 +- .../settings/SettingsServiceBean.java | 7 +- .../iq/dataverse/util/SystemConfig.java | 8 + ...24-store-tabular-files-with-varheaders.sql | 1 + .../edu/harvard/iq/dataverse/api/FilesIT.java | 128 ++ .../dataverse/ingest/IngestFrequencyTest.java | 2 +- .../impl/plugins/csv/CSVFileReaderTest.java | 24 +- .../impl/plugins/dta/DTAFileReaderTest.java | 2 +- .../plugins/dta/NewDTAFileReaderTest.java | 14 +- 31 files changed, 501 insertions(+), 1335 deletions(-) create mode 100644 doc/release-notes/8524-storing-tabular-files-with-varheaders.md delete mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java create mode 100644 src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql diff --git a/doc/release-notes/8524-storing-tabular-files-with-varheaders.md b/doc/release-notes/8524-storing-tabular-files-with-varheaders.md new file mode 100644 index 00000000000..f7034c846f6 --- /dev/null +++ b/doc/release-notes/8524-storing-tabular-files-with-varheaders.md @@ -0,0 +1,6 @@ +Tabular Data Ingest can now save the generated archival files with the list of variable names added as the first tab-delimited line. As the most significant effect of this feature, +Access API will be able to take advantage of Direct Download for tab. files saved with these headers on S3 - since they no longer have to be generated and added to the streamed content on the fly. + +This behavior is controlled by the new setting `:StoreIngestedTabularFilesWithVarHeaders`. It is false by default, preserving the legacy behavior. When enabled, Dataverse will be able to handle both the newly ingested files, and any already-existing legacy files stored without these headers transparently to the user. E.g. the access API will continue delivering tab-delimited files **with** this header line, whether it needs to add it dynamically for the legacy files, or reading complete files directly from storage for the ones stored with it. + +An API for converting existing legacy tabular files will be added separately. [this line will need to be changed if we have time to add said API before 6.2 is released]. \ No newline at end of file diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a7d7905ca4a..c233e594fa7 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4151,3 +4151,25 @@ A true/false (default) option determining whether the dataset datafile table dis .. _supported MicroProfile Config API source: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html + +.. _:UseStorageQuotas: + +:UseStorageQuotas ++++++++++++++++++ + +Enables storage use quotas in collections. See the :doc:`/api/native-api` for details. + + +.. _:StoreIngestedTabularFilesWithVarHeaders: + +:StoreIngestedTabularFilesWithVarHeaders +++++++++++++++++++++++++++++++++++++++++ + +With this setting enabled, tabular files produced during Ingest will +be stored with the list of variable names added as the first +tab-delimited line. As the most significant effect of this feature, +Access API will be able to take advantage of Direct Download for +tab. files saved with these headers on S3 - since they no longer have +to be generated and added to the streamed file on the fly. + +The setting is ``false`` by default, preserving the legacy behavior. diff --git a/src/main/java/edu/harvard/iq/dataverse/DataTable.java b/src/main/java/edu/harvard/iq/dataverse/DataTable.java index a17d8c65138..95f3aed0f40 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataTable.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataTable.java @@ -112,6 +112,16 @@ public DataTable() { @Column( nullable = true ) private String originalFileName; + + /** + * The physical tab-delimited file is in storage with the list of variable + * names saved as the 1st line. This means that we do not need to generate + * this line on the fly. (Also means that direct download mechanism can be + * used for this file!) + */ + @Column(nullable = false) + private boolean storedWithVariableHeader = false; + /* * Getter and Setter methods: */ @@ -206,6 +216,14 @@ public void setOriginalFileName(String originalFileName) { this.originalFileName = originalFileName; } + public boolean isStoredWithVariableHeader() { + return storedWithVariableHeader; + } + + public void setStoredWithVariableHeader(boolean storedWithVariableHeader) { + this.storedWithVariableHeader = storedWithVariableHeader; + } + /* * Custom overrides for hashCode(), equals() and toString() methods: */ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index bcb8799ec9e..89b22b76a7d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -22,7 +22,6 @@ import jakarta.ws.rs.ext.Provider; import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.dataaccess.*; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.engine.command.Command; @@ -104,8 +103,10 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] String auxiliaryTag = null; String auxiliaryType = null; String auxiliaryFileName = null; + // Before we do anything else, check if this download can be handled // by a redirect to remote storage (only supported on S3, as of 5.4): + if (storageIO.downloadRedirectEnabled()) { // Even if the above is true, there are a few cases where a @@ -159,7 +160,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } } else if (dataFile.isTabularData()) { - // Many separate special cases here. + // Many separate special cases here. if (di.getConversionParam() != null) { if (di.getConversionParam().equals("format")) { @@ -180,12 +181,26 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] redirectSupported = false; } } - } else if (!di.getConversionParam().equals("noVarHeader")) { - // This is a subset request - can't do. + } else if (di.getConversionParam().equals("noVarHeader")) { + // This will work just fine, if the tab. file is + // stored without the var. header. Throw "unavailable" + // exception otherwise. + // @todo: should we actually drop support for this "noVarHeader" flag? + if (dataFile.getDataTable().isStoredWithVariableHeader()) { + throw new ServiceUnavailableException(); + } + // ... defaults to redirectSupported = true + } else { + // This must be a subset request then - can't do. + redirectSupported = false; + } + } else { + // "straight" download of the full tab-delimited file. + // can redirect, but only if stored with the variable + // header already added: + if (!dataFile.getDataTable().isStoredWithVariableHeader()) { redirectSupported = false; } - } else { - redirectSupported = false; } } } @@ -247,11 +262,16 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // finally, issue the redirect: Response response = Response.seeOther(redirect_uri).build(); logger.fine("Issuing redirect to the file location."); + // Yes, this throws an exception. It's not an exception + // as in, "bummer, something went wrong". This is how a + // redirect is produced here! throw new RedirectionException(response); } throw new ServiceUnavailableException(); } + // Past this point, this is a locally served/streamed download + if (di.getConversionParam() != null) { // Image Thumbnail and Tabular data conversion: // NOTE: only supported on local files, as of 4.0.2! @@ -285,9 +305,14 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // request any tabular-specific services. if (di.getConversionParam().equals("noVarHeader")) { - logger.fine("tabular data with no var header requested"); - storageIO.setNoVarHeader(Boolean.TRUE); - storageIO.setVarHeader(null); + if (!dataFile.getDataTable().isStoredWithVariableHeader()) { + logger.fine("tabular data with no var header requested"); + storageIO.setNoVarHeader(Boolean.TRUE); + storageIO.setVarHeader(null); + } else { + logger.fine("can't serve request for tabular data without varheader, since stored with it"); + throw new ServiceUnavailableException(); + } } else if (di.getConversionParam().equals("format")) { // Conversions, and downloads of "stored originals" are // now supported on all DataFiles for which StorageIO @@ -329,11 +354,10 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] if (variable.getDataTable().getDataFile().getId().equals(dataFile.getId())) { logger.fine("adding variable id " + variable.getId() + " to the list."); variablePositionIndex.add(variable.getFileOrder()); - if (subsetVariableHeader == null) { - subsetVariableHeader = variable.getName(); - } else { - subsetVariableHeader = subsetVariableHeader.concat("\t"); - subsetVariableHeader = subsetVariableHeader.concat(variable.getName()); + if (!dataFile.getDataTable().isStoredWithVariableHeader()) { + subsetVariableHeader = subsetVariableHeader == null + ? variable.getName() + : subsetVariableHeader.concat("\t" + variable.getName()); } } else { logger.warning("variable does not belong to this data file."); @@ -346,7 +370,17 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] try { File tempSubsetFile = File.createTempFile("tempSubsetFile", ".tmp"); TabularSubsetGenerator tabularSubsetGenerator = new TabularSubsetGenerator(); - tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), tempSubsetFile.getAbsolutePath(), variablePositionIndex, dataFile.getDataTable().getCaseQuantity(), "\t"); + + long numberOfLines = dataFile.getDataTable().getCaseQuantity(); + if (dataFile.getDataTable().isStoredWithVariableHeader()) { + numberOfLines++; + } + + tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), + tempSubsetFile.getAbsolutePath(), + variablePositionIndex, + numberOfLines, + "\t"); if (tempSubsetFile.exists()) { FileInputStream subsetStream = new FileInputStream(tempSubsetFile); @@ -354,8 +388,11 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] InputStreamIO subsetStreamIO = new InputStreamIO(subsetStream, subsetSize); logger.fine("successfully created subset output stream."); - subsetVariableHeader = subsetVariableHeader.concat("\n"); - subsetStreamIO.setVarHeader(subsetVariableHeader); + + if (subsetVariableHeader != null) { + subsetVariableHeader = subsetVariableHeader.concat("\n"); + subsetStreamIO.setVarHeader(subsetVariableHeader); + } String tabularFileName = storageIO.getFileName(); @@ -380,8 +417,13 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } else { logger.fine("empty list of extra arguments."); } + // end of tab. data subset case + } else if (dataFile.getDataTable().isStoredWithVariableHeader()) { + logger.fine("tabular file stored with the var header included, no need to generate it on the fly"); + storageIO.setNoVarHeader(Boolean.TRUE); + storageIO.setVarHeader(null); } - } + } // end of tab. data file case if (storageIO == null) { //throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java index 05ba150df8e..add43ea2091 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java @@ -100,7 +100,7 @@ public String datafile(@QueryParam("fileName") String fileName, @QueryParam("fil TabularDataIngest tabDataIngest = null; try { - tabDataIngest = ingestPlugin.read(fileInputStream, null); + tabDataIngest = ingestPlugin.read(fileInputStream, false, null); } catch (IOException ingestEx) { output = output.concat("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage()); return output; diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index f2a1312a150..26637ec5742 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -120,7 +120,8 @@ public void open (DataAccessOption... options) throws IOException { && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null - && (!this.noVarHeader())) { + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java index 7a6809cb2ff..733daaf1328 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java @@ -450,8 +450,12 @@ public void open(DataAccessOption... options) throws IOException { this.setSize(retrieveSizeFromMedia()); } // Only applies for the S3 Connector case (where we could have run an ingest) - if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") - && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + if (dataFile.getContentType() != null + && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() + && dataFile.getDataTable() != null + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 1616bfabf96..bca70259cb7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -124,8 +124,12 @@ public void open(DataAccessOption... options) throws IOException { logger.fine("Setting size"); this.setSize(retrieveSizeFromMedia()); } - if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") - && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + if (dataFile.getContentType() != null + && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() + && dataFile.getDataTable() != null + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 8afc365417e..c2143bd4789 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -225,7 +225,8 @@ public void open(DataAccessOption... options) throws IOException { && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null - && (!this.noVarHeader())) { + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 105a60ab418..717f46ffd60 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -142,7 +142,8 @@ public void open(DataAccessOption... options) throws IOException { && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null - && (!this.noVarHeader())) { + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java index 782f7f3a52d..c369010c8cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java @@ -60,305 +60,26 @@ public class TabularSubsetGenerator implements SubsetGenerator { - private static Logger dbgLog = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName()); + private static Logger logger = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName()); - private static int COLUMN_TYPE_STRING = 1; - private static int COLUMN_TYPE_LONG = 2; - private static int COLUMN_TYPE_DOUBLE = 3; - private static int COLUMN_TYPE_FLOAT = 4; - - private static int MAX_COLUMN_BUFFER = 8192; - - private FileChannel fileChannel = null; - - private int varcount; - private int casecount; - private int subsetcount; - - private byte[][] columnEntries = null; - - - private ByteBuffer[] columnByteBuffers; - private int[] columnBufferSizes; - private int[] columnBufferOffsets; - - private long[] columnStartOffsets; - private long[] columnTotalOffsets; - private long[] columnTotalLengths; - - public TabularSubsetGenerator() { - - } - - public TabularSubsetGenerator (DataFile datafile, List variables) throws IOException { - if (!datafile.isTabularData()) { - throw new IOException("DataFile is not tabular data."); - } - - setVarCount(datafile.getDataTable().getVarQuantity().intValue()); - setCaseCount(datafile.getDataTable().getCaseQuantity().intValue()); - - - - StorageIO dataAccess = datafile.getStorageIO(); - if (!dataAccess.isLocalFile()) { - throw new IOException("Subsetting is supported on local files only!"); - } - - //File tabfile = datafile.getFileSystemLocation().toFile(); - File tabfile = dataAccess.getFileSystemPath().toFile(); + //private static int MAX_COLUMN_BUFFER = 8192; - File rotatedImageFile = getRotatedImage(tabfile, getVarCount(), getCaseCount()); - long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, getVarCount(), getCaseCount()); - - fileChannel = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ)); - - if (variables == null || variables.size() < 1 || variables.size() > getVarCount()) { - throw new IOException("Illegal number of variables in the subset request"); - } - - subsetcount = variables.size(); - columnTotalOffsets = new long[subsetcount]; - columnTotalLengths = new long[subsetcount]; - columnByteBuffers = new ByteBuffer[subsetcount]; - - + public TabularSubsetGenerator() { - if (subsetcount == 1) { - if (!datafile.getDataTable().getId().equals(variables.get(0).getDataTable().getId())) { - throw new IOException("Variable in the subset request does not belong to the datafile."); - } - dbgLog.fine("single variable subset; setting fileChannel position to "+extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder())); - fileChannel.position(extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder())); - columnTotalLengths[0] = extractColumnLength(columnEndOffsets, variables.get(0).getFileOrder()); - columnTotalOffsets[0] = 0; - } else { - columnEntries = new byte[subsetcount][]; - - columnBufferSizes = new int[subsetcount]; - columnBufferOffsets = new int[subsetcount]; - columnStartOffsets = new long[subsetcount]; - - int i = 0; - for (DataVariable var : variables) { - if (!datafile.getDataTable().getId().equals(var.getDataTable().getId())) { - throw new IOException("Variable in the subset request does not belong to the datafile."); - } - columnByteBuffers[i] = ByteBuffer.allocate(MAX_COLUMN_BUFFER); - columnTotalLengths[i] = extractColumnLength(columnEndOffsets, var.getFileOrder()); - columnStartOffsets[i] = extractColumnOffset(columnEndOffsets, var.getFileOrder()); - if (columnTotalLengths[i] < MAX_COLUMN_BUFFER) { - columnByteBuffers[i].limit((int)columnTotalLengths[i]); - } - fileChannel.position(columnStartOffsets[i]); - columnBufferSizes[i] = fileChannel.read(columnByteBuffers[i]); - columnBufferOffsets[i] = 0; - columnTotalOffsets[i] = columnBufferSizes[i]; - i++; - } - } - } - - private int getVarCount() { - return varcount; } - private void setVarCount(int varcount) { - this.varcount = varcount; - } - - private int getCaseCount() { - return casecount; - } - - private void setCaseCount(int casecount) { - this.casecount = casecount; - } - - - /* - * Note that this method operates on the *absolute* column number, i.e. - * the number of the physical column in the tabular file. This is stored - * in DataVariable.FileOrder. - * This "column number" should not be confused with the number of column - * in the subset request; a user can request any number of variable - * columns, in an order that doesn't have to follow the physical order - * of the columns in the file. - */ - private long extractColumnOffset(long[] columnEndOffsets, int column) throws IOException { - if (columnEndOffsets == null || columnEndOffsets.length <= column) { - throw new IOException("Offsets table not initialized; or column out of bounds."); - } - long columnOffset; - - if (column > 0) { - columnOffset = columnEndOffsets[column - 1]; - } else { - columnOffset = getVarCount() * 8; - } - return columnOffset; - } - - /* - * See the comment for the method above. + /** + * This class used to be much more complex. There were methods for subsetting + * from fixed-width field files; including using the optimized, "90 deg. rotated" + * versions of such files (i.e. you create a *columns-wise* copy of your data + * file in which the columns are stored sequentially, and a table of byte + * offsets of each column. You can then read individual variable columns + * for cheap; at the expense of doubling the storage size of your tabular + * data files. These methods were not used, so they were deleted (in Jan. 2024 + * prior to 6.2. + * Please consult git history if you are interested in looking at that code. */ - private long extractColumnLength(long[] columnEndOffsets, int column) throws IOException { - if (columnEndOffsets == null || columnEndOffsets.length <= column) { - throw new IOException("Offsets table not initialized; or column out of bounds."); - } - long columnLength; - - if (column > 0) { - columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1]; - } else { - columnLength = columnEndOffsets[0] - varcount * 8; - } - - return columnLength; - } - - - private void bufferMoreColumnBytes(int column) throws IOException { - if (columnTotalOffsets[column] >= columnTotalLengths[column]) { - throw new IOException("attempt to buffer bytes past the column boundary"); - } - fileChannel.position(columnStartOffsets[column] + columnTotalOffsets[column]); - - columnByteBuffers[column].clear(); - if (columnTotalLengths[column] < columnTotalOffsets[column] + MAX_COLUMN_BUFFER) { - dbgLog.fine("Limiting the buffer to "+(columnTotalLengths[column] - columnTotalOffsets[column])+" bytes"); - columnByteBuffers[column].limit((int) (columnTotalLengths[column] - columnTotalOffsets[column])); - } - columnBufferSizes[column] = fileChannel.read(columnByteBuffers[column]); - dbgLog.fine("Read "+columnBufferSizes[column]+" bytes for subset column "+column); - columnBufferOffsets[column] = 0; - columnTotalOffsets[column] += columnBufferSizes[column]; - } - - public byte[] readColumnEntryBytes(int column) { - return readColumnEntryBytes(column, true); - } - - - public byte[] readColumnEntryBytes(int column, boolean addTabs) { - byte[] leftover = null; - byte[] ret = null; - - if (columnBufferOffsets[column] >= columnBufferSizes[column]) { - try { - bufferMoreColumnBytes(column); - if (columnBufferSizes[column] < 1) { - return null; - } - } catch (IOException ioe) { - return null; - } - } - - int byteindex = columnBufferOffsets[column]; - try { - while (columnByteBuffers[column].array()[byteindex] != '\n') { - byteindex++; - if (byteindex == columnBufferSizes[column]) { - // save the leftover: - if (leftover == null) { - leftover = new byte[columnBufferSizes[column] - columnBufferOffsets[column]]; - System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], leftover, 0, columnBufferSizes[column] - columnBufferOffsets[column]); - } else { - byte[] merged = new byte[leftover.length + columnBufferSizes[column]]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnByteBuffers[column].array(), 0, merged, leftover.length, columnBufferSizes[column]); - leftover = merged; - merged = null; - } - // read more bytes: - bufferMoreColumnBytes(column); - if (columnBufferSizes[column] < 1) { - return null; - } - byteindex = 0; - } - } - - // presumably, we have found our '\n': - if (leftover == null) { - ret = new byte[byteindex - columnBufferOffsets[column] + 1]; - System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], ret, 0, byteindex - columnBufferOffsets[column] + 1); - } else { - ret = new byte[leftover.length + byteindex + 1]; - System.arraycopy(leftover, 0, ret, 0, leftover.length); - System.arraycopy(columnByteBuffers[column].array(), 0, ret, leftover.length, byteindex + 1); - } - - } catch (IOException ioe) { - return null; - } - - columnBufferOffsets[column] = (byteindex + 1); - - if (column < columnBufferOffsets.length - 1) { - ret[ret.length - 1] = '\t'; - } - return ret; - } - - public int readSingleColumnSubset(byte[] buffer) throws IOException { - if (columnTotalOffsets[0] == columnTotalLengths[0]) { - return -1; - } - - if (columnByteBuffers[0] == null) { - dbgLog.fine("allocating single column subset buffer."); - columnByteBuffers[0] = ByteBuffer.allocate(buffer.length); - } - - int bytesread = fileChannel.read(columnByteBuffers[0]); - dbgLog.fine("single column subset: read "+bytesread+" bytes."); - if (columnTotalOffsets[0] + bytesread > columnTotalLengths[0]) { - bytesread = (int)(columnTotalLengths[0] - columnTotalOffsets[0]); - } - System.arraycopy(columnByteBuffers[0].array(), 0, buffer, 0, bytesread); - - columnTotalOffsets[0] += bytesread; - columnByteBuffers[0].clear(); - return bytesread > 0 ? bytesread : -1; - } - - - public byte[] readSubsetLineBytes() throws IOException { - byte[] ret = null; - int total = 0; - for (int i = 0; i < subsetcount; i++) { - columnEntries[i] = readColumnEntryBytes(i); - if (columnEntries[i] == null) { - throw new IOException("Failed to read subset line entry"); - } - total += columnEntries[i].length; - } - - ret = new byte[total]; - int offset = 0; - for (int i = 0; i < subsetcount; i++) { - System.arraycopy(columnEntries[i], 0, ret, offset, columnEntries[i].length); - offset += columnEntries[i].length; - } - dbgLog.fine("line: "+new String(ret)); - return ret; - } - - - public void close() { - if (fileChannel != null) { - try { - fileChannel.close(); - } catch (IOException ioe) { - // don't care. - } - } - } - public void subsetFile(String infile, String outfile, List columns, Long numCases) { subsetFile(infile, outfile, columns, numCases, "\t"); } @@ -411,11 +132,15 @@ public void subsetFile(InputStream in, String outfile, List columns, Lo * files, OK to use on small files: */ - public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) { + public static Double[] subsetDoubleVector(InputStream in, int column, int numCases, boolean skipHeader) { Double[] retVector = new Double[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -463,11 +188,15 @@ public static Double[] subsetDoubleVector(InputStream in, int column, int numCas * Same deal as with the method above - straightforward, but (potentially) slow. * Not a resource hog though - will only try to store one vector in memory. */ - public static Float[] subsetFloatVector(InputStream in, int column, int numCases) { + public static Float[] subsetFloatVector(InputStream in, int column, int numCases, boolean skipHeader) { Float[] retVector = new Float[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -513,11 +242,15 @@ public static Float[] subsetFloatVector(InputStream in, int column, int numCases * Same deal as with the method above - straightforward, but (potentially) slow. * Not a resource hog though - will only try to store one vector in memory. */ - public static Long[] subsetLongVector(InputStream in, int column, int numCases) { + public static Long[] subsetLongVector(InputStream in, int column, int numCases, boolean skipHeader) { Long[] retVector = new Long[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -549,11 +282,15 @@ public static Long[] subsetLongVector(InputStream in, int column, int numCases) * Same deal as with the method above - straightforward, but (potentially) slow. * Not a resource hog though - will only try to store one vector in memory. */ - public static String[] subsetStringVector(InputStream in, int column, int numCases) { + public static String[] subsetStringVector(InputStream in, int column, int numCases, boolean skipHeader) { String[] retVector = new String[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -621,819 +358,10 @@ public static String[] subsetStringVector(InputStream in, int column, int numCas } - /* - * Straightforward method for subsetting a tab-delimited data file, extracting - * all the columns representing continuous variables and returning them as - * a 2-dimensional array of Doubles; - * Inefficient on large files, OK to use on small ones. - */ - public static Double[][] subsetDoubleVectors(InputStream in, Set columns, int numCases) throws IOException { - Double[][] retVector = new Double[columns.size()][numCases]; - try (Scanner scanner = new Scanner(in)) { - scanner.useDelimiter("\\n"); - - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - int j = 0; - for (Integer i : columns) { - try { - // TODO: verify that NaN and +-Inf are going to be - // handled correctly here! -- L.A. - // NO, "+-Inf" is not handled correctly; see the - // comment further down below. - retVector[j][caseIndex] = new Double(line[i]); - } catch (NumberFormatException ex) { - retVector[j][caseIndex] = null; // missing value - } - j++; - } - } else { - throw new IOException("Tab file has fewer rows than the stored number of cases!"); - } - } - - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); - } - tailIndex++; - } - - } - return retVector; - - } - - public String[] subsetStringVector(DataFile datafile, int column) throws IOException { - return (String[])subsetObjectVector(datafile, column, COLUMN_TYPE_STRING); - } - - public Double[] subsetDoubleVector(DataFile datafile, int column) throws IOException { - return (Double[])subsetObjectVector(datafile, column, COLUMN_TYPE_DOUBLE); - } - - public Long[] subsetLongVector(DataFile datafile, int column) throws IOException { - return (Long[])subsetObjectVector(datafile, column, COLUMN_TYPE_LONG); - } - - // Float methods are temporary; - // In normal operations we'll be treating all the floating point types as - // doubles. I need to be able to handle floats for some 4.0 vs 3.* ingest - // tests. -- L.A. - - public Float[] subsetFloatVector(DataFile datafile, int column) throws IOException { - return (Float[])subsetObjectVector(datafile, column, COLUMN_TYPE_FLOAT); - } - - public String[] subsetStringVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (String[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_STRING); - } - - public Double[] subsetDoubleVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (Double[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_DOUBLE); - } - - public Long[] subsetLongVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (Long[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_LONG); - } - - public Float[] subsetFloatVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (Float[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_FLOAT); - } - - public Object[] subsetObjectVector(DataFile dataFile, int column, int columntype) throws IOException { - if (!dataFile.isTabularData()) { - throw new IOException("DataFile is not tabular data."); - } - - int varcount = dataFile.getDataTable().getVarQuantity().intValue(); - int casecount = dataFile.getDataTable().getCaseQuantity().intValue(); - - if (column >= varcount) { - throw new IOException("Column "+column+" is out of bounds."); - } - - StorageIO dataAccess = dataFile.getStorageIO(); - if (!dataAccess.isLocalFile()) { - throw new IOException("Subsetting is supported on local files only!"); - } - - //File tabfile = datafile.getFileSystemLocation().toFile(); - File tabfile = dataAccess.getFileSystemPath().toFile(); - - if (columntype == COLUMN_TYPE_STRING) { - String filename = dataFile.getFileMetadata().getLabel(); - if (filename != null) { - filename = filename.replaceFirst("^_", ""); - Integer fnumvalue = null; - try { - fnumvalue = new Integer(filename); - } catch (Exception ex){ - fnumvalue = null; - } - if (fnumvalue != null) { - //if ((fnumvalue.intValue() < 112497)) { // && (fnumvalue.intValue() > 60015)) { - if ((fnumvalue.intValue() < 111931)) { // && (fnumvalue.intValue() > 60015)) { - if (!(fnumvalue.intValue() == 60007 - || fnumvalue.intValue() == 59997 - || fnumvalue.intValue() == 60015 - || fnumvalue.intValue() == 59948 - || fnumvalue.intValue() == 60012 - || fnumvalue.intValue() == 52585 - || fnumvalue.intValue() == 60005 - || fnumvalue.intValue() == 60002 - || fnumvalue.intValue() == 59954 - || fnumvalue.intValue() == 60008 - || fnumvalue.intValue() == 54972 - || fnumvalue.intValue() == 55010 - || fnumvalue.intValue() == 54996 - || fnumvalue.intValue() == 53527 - || fnumvalue.intValue() == 53546 - || fnumvalue.intValue() == 55002 - || fnumvalue.intValue() == 55006 - || fnumvalue.intValue() == 54998 - || fnumvalue.intValue() == 52552 - // SPSS/SAV cases with similar issue - compat mode must be disabled - //|| fnumvalue.intValue() == 101826 // temporary - tricky file with accents and v. 16... - || fnumvalue.intValue() == 54618 // another SAV file, with long strings... - || fnumvalue.intValue() == 54619 // [same] - || fnumvalue.intValue() == 57983 - || fnumvalue.intValue() == 58262 - || fnumvalue.intValue() == 58288 - || fnumvalue.intValue() == 58656 - || fnumvalue.intValue() == 59144 - // || fnumvalue.intValue() == 69626 [nope!] - )) { - dbgLog.info("\"Old\" file name detected; using \"compatibility mode\" for a character vector subset;"); - return subsetObjectVector(tabfile, column, varcount, casecount, columntype, true); - } - } - } - } + private static void skipFirstLine(Scanner scanner) { + if (!scanner.hasNext()) { + throw new RuntimeException("Failed to read the variable name header line from the tab-delimited file!"); } - - return subsetObjectVector(tabfile, column, varcount, casecount, columntype); - } - - public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype) throws IOException { - return subsetObjectVector(tabfile, column, varcount, casecount, columntype, false); - } - - - - public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype, boolean compatmode) throws IOException { - - Object[] retVector = null; - - boolean isString = false; - boolean isDouble = false; - boolean isLong = false; - boolean isFloat = false; - - //Locale loc = new Locale("en", "US"); - - if (columntype == COLUMN_TYPE_STRING) { - isString = true; - retVector = new String[casecount]; - } else if (columntype == COLUMN_TYPE_DOUBLE) { - isDouble = true; - retVector = new Double[casecount]; - } else if (columntype == COLUMN_TYPE_LONG) { - isLong = true; - retVector = new Long[casecount]; - } else if (columntype == COLUMN_TYPE_FLOAT){ - isFloat = true; - retVector = new Float[casecount]; - } else { - throw new IOException("Unsupported column type: "+columntype); - } - - File rotatedImageFile = getRotatedImage(tabfile, varcount, casecount); - long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, varcount, casecount); - long columnOffset = 0; - long columnLength = 0; - - if (column > 0) { - columnOffset = columnEndOffsets[column - 1]; - columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1]; - } else { - columnOffset = varcount * 8; - columnLength = columnEndOffsets[0] - varcount * 8; - } - int caseindex = 0; - - try (FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), - StandardOpenOption.READ))) { - fc.position(columnOffset); - int MAX_COLUMN_BUFFER = 8192; - - ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER); - - if (columnLength < MAX_COLUMN_BUFFER) { - in.limit((int) (columnLength)); - } - - long bytesRead = 0; - long bytesReadTotal = 0; - - int byteoffset = 0; - byte[] leftover = null; - - while (bytesReadTotal < columnLength) { - bytesRead = fc.read(in); - byte[] columnBytes = in.array(); - int bytecount = 0; - - while (bytecount < bytesRead) { - if (columnBytes[bytecount] == '\n') { - /* - String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); - - if (leftover != null) { - String leftoverString = new String (leftover, "UTF8"); - token = leftoverString + token; - leftover = null; - } - */ - /* - * Note that the way I was doing it at first - above - - * was not quite the correct way - because I was creating UTF8 - * strings from the leftover bytes, and the bytes in the - * current buffer *separately*; which means, if a multi-byte - * UTF8 character got split in the middle between one buffer - * and the next, both chunks of it would become junk - * characters, on each side! - * The correct way of doing it, of course, is to create a - * merged byte buffer, and then turn it into a UTF8 string. - * -- L.A. 4.0 - */ - String token = null; - - if (leftover == null) { - token = new String(columnBytes, byteoffset, bytecount - byteoffset, "UTF8"); - } else { - byte[] merged = new byte[leftover.length + bytecount - byteoffset]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount - byteoffset); - token = new String(merged, "UTF8"); - leftover = null; - merged = null; - } - - if (isString) { - if ("".equals(token)) { - // An empty string is a string missing value! - // An empty string in quotes is an empty string! - retVector[caseindex] = null; - } else { - // Strip the outer quotes: - token = token.replaceFirst("^\\\"", ""); - token = token.replaceFirst("\\\"$", ""); - - // We need to restore the special characters that - // are stored in tab files escaped - quotes, new lines - // and tabs. Before we do that however, we need to - // take care of any escaped backslashes stored in - // the tab file. I.e., "foo\t" should be transformed - // to "foo"; but "foo\\t" should be transformed - // to "foo\t". This way new lines and tabs that were - // already escaped in the original data are not - // going to be transformed to unescaped tab and - // new line characters! - - String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2); - - // (note that it's important to use the 2-argument version - // of String.split(), and set the limit argument to a - // negative value; otherwise any trailing backslashes - // are lost.) - - for (int i = 0; i < splitTokens.length; i++) { - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); - } - // TODO: - // Make (some of?) the above optional; for ex., we - // do need to restore the newlines when calculating UNFs; - // But if we are subsetting these vectors in order to - // create a new tab-delimited file, they will - // actually break things! -- L.A. Jul. 28 2014 - - token = StringUtils.join(splitTokens, '\\'); - - // "compatibility mode" - a hack, to be able to produce - // unfs identical to those produced by the "early" - // unf5 jar; will be removed in production 4.0. - // -- L.A. (TODO: ...) - if (compatmode && !"".equals(token)) { - if (token.length() > 128) { - if ("".equals(token.trim())) { - // don't ask... - token = token.substring(0, 129); - } else { - token = token.substring(0, 128); - // token = String.format(loc, "%.128s", token); - token = token.trim(); - // dbgLog.info("formatted and trimmed: "+token); - } - } else { - if ("".equals(token.trim())) { - // again, don't ask; - // - this replicates some bugginness - // that happens inside unf5; - token = "null"; - } else { - token = token.trim(); - } - } - } - - retVector[caseindex] = token; - } - } else if (isDouble) { - try { - // TODO: verify that NaN and +-Inf are - // handled correctly here! -- L.A. - // Verified: new Double("nan") works correctly, - // resulting in Double.NaN; - // Double("[+-]Inf") doesn't work however; - // (the constructor appears to be expecting it - // to be spelled as "Infinity", "-Infinity", etc. - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Double(token); - } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for " + token + " as Double"); - - retVector[caseindex] = null; // missing value - // TODO: ? - } - } else if (isLong) { - try { - retVector[caseindex] = new Long(token); - } catch (NumberFormatException ex) { - retVector[caseindex] = null; // assume missing value - } - } else if (isFloat) { - try { - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Float(token); - } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for " + token + " as Float"); - retVector[caseindex] = null; // assume missing value (TODO: ?) - } - } - caseindex++; - - if (bytecount == bytesRead - 1) { - byteoffset = 0; - } else { - byteoffset = bytecount + 1; - } - } else { - if (bytecount == bytesRead - 1) { - // We've reached the end of the buffer; - // This means we'll save whatever unused bytes left in - // it - i.e., the bytes between the last new line - // encountered and the end - in the leftover buffer. - - // *EXCEPT*, there may be a case of a very long String - // that is actually longer than MAX_COLUMN_BUFFER, in - // which case it is possible that we've read through - // an entire buffer of bytes without finding any - // new lines... in this case we may need to add this - // entire byte buffer to an already existing leftover - // buffer! - if (leftover == null) { - leftover = new byte[(int) bytesRead - byteoffset]; - System.arraycopy(columnBytes, byteoffset, leftover, 0, (int) bytesRead - byteoffset); - } else { - if (byteoffset != 0) { - throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!"); - } - byte[] merged = new byte[leftover.length + (int) bytesRead]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int) bytesRead); - // leftover = null; - leftover = merged; - merged = null; - } - byteoffset = 0; - - } - } - bytecount++; - } - - bytesReadTotal += bytesRead; - in.clear(); - if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) { - in.limit((int) (columnLength - bytesReadTotal)); - } - } - - } - - if (caseindex != casecount) { - throw new IOException("Faile to read "+casecount+" tokens for column "+column); - //System.out.println("read "+caseindex+" tokens instead of expected "+casecount+"."); - } - - return retVector; - } - - private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException { - long[] byteOffsets = new long[varcount]; - - try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile))) { - - byte[] offsetHeader = new byte[varcount * 8]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException("Could not read " + varcount * 8 + " header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex * 8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - // System.out.println(byteOffsets[varindex]); - } - - } - - return byteOffsets; - } - - private File getRotatedImage(File tabfile, int varcount, int casecount) throws IOException { - String fileName = tabfile.getAbsolutePath(); - String rotatedImageFileName = fileName + ".90d"; - File rotatedImageFile = new File(rotatedImageFileName); - if (rotatedImageFile.exists()) { - //System.out.println("Image already exists!"); - return rotatedImageFile; - } - - return generateRotatedImage(tabfile, varcount, casecount); - - } - - private File generateRotatedImage (File tabfile, int varcount, int casecount) throws IOException { - // TODO: throw exceptions if bad file, zero varcount, etc. ... - - String fileName = tabfile.getAbsolutePath(); - String rotatedImageFileName = fileName + ".90d"; - - int MAX_OUTPUT_STREAMS = 32; - int MAX_BUFFERED_BYTES = 10 * 1024 * 1024; // 10 MB - for now? - int MAX_COLUMN_BUFFER = 8 * 1024; - - // offsetHeader will contain the byte offsets of the individual column - // vectors in the final rotated image file - byte[] offsetHeader = new byte[varcount * 8]; - int[] bufferedSizes = new int[varcount]; - long[] cachedfileSizes = new long[varcount]; - File[] columnTempFiles = new File[varcount]; - - for (int i = 0; i < varcount; i++) { - bufferedSizes[i] = 0; - cachedfileSizes[i] = 0; - } - - // TODO: adjust MAX_COLUMN_BUFFER here, so that the total size is - // no more than MAX_BUFFERED_BYTES (but no less than 1024 maybe?) - - byte[][] bufferedColumns = new byte [varcount][MAX_COLUMN_BUFFER]; - - // read the tab-delimited file: - - try (FileInputStream tabfileStream = new FileInputStream(tabfile); - Scanner scanner = new Scanner(tabfileStream)) { - scanner.useDelimiter("\\n"); - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - // TODO: throw an exception if there are fewer tab-delimited - // tokens than the number of variables specified. - String token = ""; - int tokensize = 0; - for (int varindex = 0; varindex < varcount; varindex++) { - // TODO: figure out the safest way to convert strings to - // bytes here. Is it going to be safer to use getBytes("UTF8")? - // we are already making the assumption that the values - // in the tab file are in UTF8. -- L.A. - token = line[varindex] + "\n"; - tokensize = token.getBytes().length; - if (bufferedSizes[varindex] + tokensize > MAX_COLUMN_BUFFER) { - // fill the buffer and dump its contents into the temp file: - // (do note that there may be *several* MAX_COLUMN_BUFFERs - // worth of bytes in the token!) - - int tokenoffset = 0; - - if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) { - tokenoffset = MAX_COLUMN_BUFFER - bufferedSizes[varindex]; - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset); - } // (otherwise the buffer is already full, and we should - // simply dump it into the temp file, without adding any - // extra bytes to it) - - File bufferTempFile = columnTempFiles[varindex]; - if (bufferTempFile == null) { - bufferTempFile = File.createTempFile("columnBufferFile", "bytes"); - columnTempFiles[varindex] = bufferTempFile; - } - - // *append* the contents of the buffer to the end of the - // temp file, if already exists: - try (BufferedOutputStream outputStream = new BufferedOutputStream( - new FileOutputStream(bufferTempFile, true))) { - outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - - // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into - // the temp file, for as long as there's more than MAX_COLUMN_BUFFER - // bytes left in the token: - - while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) { - outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - tokenoffset += MAX_COLUMN_BUFFER; - } - - } - - // buffer the remaining bytes and reset the buffered - // byte counter: - - System.arraycopy(token.getBytes(), - tokenoffset, - bufferedColumns[varindex], - 0, - tokensize - tokenoffset); - - bufferedSizes[varindex] = tokensize - tokenoffset; - - } else { - // continue buffering - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize); - bufferedSizes[varindex] += tokensize; - } - } - } else { - throw new IOException("Tab file has fewer rows than the stored number of cases!"); - } - } - } - - // OK, we've created the individual byte vectors of the tab file columns; - // they may be partially saved in temp files and/or in memory. - // We now need to go through all these buffers and create the final - // rotated image file. - - try (BufferedOutputStream finalOut = new BufferedOutputStream( - new FileOutputStream(new File(rotatedImageFileName)))) { - - // but first we should create the offset header and write it out into - // the final file; because it should be at the head, doh! - - long columnOffset = varcount * 8; - // (this is the offset of the first column vector; it is equal to the - // size of the offset header, i.e. varcount * 8 bytes) - - for (int varindex = 0; varindex < varcount; varindex++) { - long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex]; - columnOffset += totalColumnBytes; - // totalColumnBytes; - byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array(); - System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8); - } - - finalOut.write(offsetHeader, 0, varcount * 8); - - for (int varindex = 0; varindex < varcount; varindex++) { - long cachedBytesRead = 0; - - // check if there is a cached temp file: - - File cachedTempFile = columnTempFiles[varindex]; - if (cachedTempFile != null) { - byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER]; - try (BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile))) { - int readlen = 0; - while ((readlen = cachedIn.read(cachedBytes)) > -1) { - finalOut.write(cachedBytes, 0, readlen); - cachedBytesRead += readlen; - } - } - - // delete the temp file: - cachedTempFile.delete(); - - } - - if (cachedBytesRead != cachedfileSizes[varindex]) { - throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+ - cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read."); - } - - // then check if there are any bytes buffered for this column: - - if (bufferedSizes[varindex] > 0) { - finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]); - } - - } - } - - return new File(rotatedImageFileName); - - } - - /* - * Test method for taking a "rotated" image, and reversing it, reassembling - * all the columns in the original order. Which should result in a file - * byte-for-byte identical file to the original tab-delimited version. - * - * (do note that this method is not efficiently implemented; it's only - * being used for experiments so far, to confirm the accuracy of the - * accuracy of generateRotatedImage(). It should not be used for any - * practical means in the application!) - */ - private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException { - // open the file, read in the offset header: - try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile))) { - byte[] offsetHeader = new byte[varcount * 8]; - long[] byteOffsets = new long[varcount]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - //System.out.println(byteOffsets[varindex]); - } - - String [][] reversedMatrix = new String[casecount][varcount]; - - long offset = varcount * 8; - byte[] columnBytes; - - for (int varindex = 0; varindex < varcount; varindex++) { - long columnLength = byteOffsets[varindex] - offset; - - - - columnBytes = new byte[(int)columnLength]; - readlen = rotfileStream.read(columnBytes); - - if (readlen != columnLength) { - throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex); - } - /* - String columnString = new String(columnBytes); - //System.out.print(columnString); - String[] values = columnString.split("\n", -1); - - if (values.length < casecount) { - throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex); - } - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - reversedMatrix[caseindex][varindex] = values[caseindex]; - }*/ - - int bytecount = 0; - int byteoffset = 0; - int caseindex = 0; - //System.out.println("generating value vector for column "+varindex); - while (bytecount < columnLength) { - if (columnBytes[bytecount] == '\n') { - String token = new String(columnBytes, byteoffset, bytecount-byteoffset); - reversedMatrix[caseindex++][varindex] = token; - byteoffset = bytecount + 1; - } - bytecount++; - } - - if (caseindex != casecount) { - throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex); - } - offset = byteOffsets[varindex]; - } - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - for (int varindex = 0; varindex < varcount; varindex++) { - System.out.print(reversedMatrix[caseindex][varindex]); - if (varindex < varcount-1) { - System.out.print("\t"); - } else { - System.out.print("\n"); - } - } - } - - } - - - } - - /** - * main() method, for testing - * usage: java edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator testfile.tab varcount casecount column type - * make sure the CLASSPATH contains ... - * - */ - - public static void main(String[] args) { - - String tabFileName = args[0]; - int varcount = new Integer(args[1]).intValue(); - int casecount = new Integer(args[2]).intValue(); - int column = new Integer(args[3]).intValue(); - String type = args[4]; - - File tabFile = new File(tabFileName); - File rotatedImageFile = null; - - TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator(); - - /* - try { - rotatedImageFile = subsetGenerator.getRotatedImage(tabFile, varcount, casecount); - } catch (IOException ex) { - System.out.println(ex.getMessage()); - } - */ - - //System.out.println("\nFinished generating \"rotated\" column image file."); - - //System.out.println("\nOffsets:"); - - MathContext doubleMathContext = new MathContext(15, RoundingMode.HALF_EVEN); - String FORMAT_IEEE754 = "%+#.15e"; - - try { - //subsetGenerator.reverseRotatedImage(rotatedImageFile, varcount, casecount); - //String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount); - if ("string".equals(type)) { - String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount); - for (int i = 0; i < casecount; i++) { - System.out.println(columns[i]); - } - } else { - - Double[] columns = subsetGenerator.subsetDoubleVector(tabFile, column, varcount, casecount); - for (int i = 0; i < casecount; i++) { - if (columns[i] != null) { - BigDecimal outBigDecimal = new BigDecimal(columns[i], doubleMathContext); - System.out.println(String.format(FORMAT_IEEE754, outBigDecimal)); - } else { - System.out.println("NA"); - } - //System.out.println(columns[i]); - } - } - } catch (IOException ex) { - System.out.println(ex.getMessage()); - } - } -} - - + scanner.next(); + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java deleted file mode 100644 index 89e033353c1..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ - -package edu.harvard.iq.dataverse.dataaccess; - -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.datavariable.DataVariable; -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import java.util.logging.Logger; - -/** - * - * @author Leonid Andreev - */ -public class TabularSubsetInputStream extends InputStream { - private static final Logger logger = Logger.getLogger(TabularSubsetInputStream.class.getCanonicalName()); - - private TabularSubsetGenerator subsetGenerator = null; - private int numberOfSubsetVariables; - private int numberOfObservations; - private int numberOfObservationsRead = 0; - private byte[] leftoverBytes = null; - - public TabularSubsetInputStream(DataFile datafile, List variables) throws IOException { - if (datafile == null) { - throw new IOException("Null datafile in subset request"); - } - if (!datafile.isTabularData()) { - throw new IOException("Subset requested on a non-tabular data file"); - } - numberOfObservations = datafile.getDataTable().getCaseQuantity().intValue(); - - if (variables == null || variables.size() < 1) { - throw new IOException("Null or empty list of variables in subset request."); - } - numberOfSubsetVariables = variables.size(); - subsetGenerator = new TabularSubsetGenerator(datafile, variables); - - } - - //@Override - public int read() throws IOException { - throw new IOException("read() method not implemented; do not use."); - } - - //@Override - public int read(byte[] b) throws IOException { - // TODO: - // Move this code into TabularSubsetGenerator - logger.fine("subset input stream: read request, on a "+b.length+" byte buffer;"); - - if (numberOfSubsetVariables == 1) { - logger.fine("calling the single variable subset read method"); - return subsetGenerator.readSingleColumnSubset(b); - } - - int bytesread = 0; - byte [] linebuffer; - - // do we have a leftover? - if (leftoverBytes != null) { - if (leftoverBytes.length < b.length) { - System.arraycopy(leftoverBytes, 0, b, 0, leftoverBytes.length); - bytesread = leftoverBytes.length; - leftoverBytes = null; - - } else { - // shouldn't really happen... unless it's a very large subset, - // or a very long string, etc. - System.arraycopy(leftoverBytes, 0, b, 0, b.length); - byte[] tmp = new byte[leftoverBytes.length - b.length]; - System.arraycopy(leftoverBytes, b.length, tmp, 0, leftoverBytes.length - b.length); - leftoverBytes = tmp; - tmp = null; - return b.length; - } - } - - while (bytesread < b.length && numberOfObservationsRead < numberOfObservations) { - linebuffer = subsetGenerator.readSubsetLineBytes(); - numberOfObservationsRead++; - - if (bytesread + linebuffer.length < b.length) { - // copy linebuffer into the return buffer: - System.arraycopy(linebuffer, 0, b, bytesread, linebuffer.length); - bytesread += linebuffer.length; - } else { - System.arraycopy(linebuffer, 0, b, bytesread, b.length - bytesread); - // save the leftover; - if (bytesread + linebuffer.length > b.length) { - leftoverBytes = new byte[bytesread + linebuffer.length - b.length]; - System.arraycopy(linebuffer, b.length - bytesread, leftoverBytes, 0, bytesread + linebuffer.length - b.length); - } - return b.length; - } - } - - // and this means we've reached the end of the tab file! - - return bytesread > 0 ? bytesread : -1; - } - - //@Override - public void close() { - if (subsetGenerator != null) { - subsetGenerator.close(); - } - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java index 5119b4b96c7..edd01ae98a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java @@ -545,6 +545,16 @@ private void createDataFileDDI(XMLStreamWriter xmlw, Set excludedFieldSe List vars = variableService.findByDataTableId(dt.getId()); if (checkField("catgry", excludedFieldSet, includedFieldSet)) { if (checkIsWithoutFrequencies(vars)) { + // @todo: the method called here to calculate frequencies + // when they are missing from the database (for whatever + // reasons) subsets the physical tab-delimited file and + // calculates them in real time. this is very expensive operation + // potentially. let's make sure that, when we do this, we + // save the resulting frequencies in the database, so that + // we don't have to do this again. Also, let's double check + // whether the "checkIsWithoutFrequencies()" method is doing + // the right thing - as it appears to return true when there + // are no categorical variables in the DataTable (?) calculateFrequencies(df, vars); } } @@ -580,6 +590,7 @@ private boolean checkIsWithoutFrequencies(List vars) { private void calculateFrequencies(DataFile df, List vars) { + // @todo: see the comment in the part of the code that calls this method try { DataConverter dc = new DataConverter(); File tabFile = dc.downloadFromStorageIO(df.getStorageIO()); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 233f746fb17..9bacafd173f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -726,27 +726,17 @@ public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFil } public void produceContinuousSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { - - /* - // quick, but memory-inefficient way: - // - this method just loads the entire file-worth of continuous vectors - // into a Double[][] matrix. - //Double[][] variableVectors = subsetContinuousVectors(dataFile); - //calculateContinuousSummaryStatistics(dataFile, variableVectors); - - // A more sophisticated way: this subsets one column at a time, using - // the new optimized subsetting that does not have to read any extra - // bytes from the file to extract the column: - - TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator(); - */ for (int i = 0; i < dataFile.getDataTable().getVarQuantity(); i++) { if (dataFile.getDataTable().getDataVariables().get(i).isIntervalContinuous()) { logger.fine("subsetting continuous vector"); if ("float".equals(dataFile.getDataTable().getDataVariables().get(i).getFormat())) { - Float[] variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + Float[] variableVector = TabularSubsetGenerator.subsetFloatVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); logger.fine("Calculating summary statistics on a Float vector;"); calculateContinuousSummaryStatistics(dataFile, i, variableVector); // calculate the UNF while we are at it: @@ -754,7 +744,11 @@ public void produceContinuousSummaryStatistics(DataFile dataFile, File generated calculateUNF(dataFile, i, variableVector); variableVector = null; } else { - Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); logger.fine("Calculating summary statistics on a Double vector;"); calculateContinuousSummaryStatistics(dataFile, i, variableVector); // calculate the UNF while we are at it: @@ -776,7 +770,11 @@ public void produceDiscreteNumericSummaryStatistics(DataFile dataFile, File gene && dataFile.getDataTable().getDataVariables().get(i).isTypeNumeric()) { logger.fine("subsetting discrete-numeric vector"); - Long[] variableVector = TabularSubsetGenerator.subsetLongVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + Long[] variableVector = TabularSubsetGenerator.subsetLongVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); // We are discussing calculating the same summary stats for // all numerics (the same kind of sumstats that we've been calculating // for numeric continuous type) -- L.A. Jul. 2014 @@ -810,7 +808,11 @@ public void produceCharacterSummaryStatistics(DataFile dataFile, File generatedT if (dataFile.getDataTable().getDataVariables().get(i).isTypeCharacter()) { logger.fine("subsetting character vector"); - String[] variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + String[] variableVector = TabularSubsetGenerator.subsetStringVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); //calculateCharacterSummaryStatistics(dataFile, i, variableVector); // calculate the UNF while we are at it: logger.fine("Calculating UNF on a String vector"); @@ -828,20 +830,29 @@ public static void produceFrequencyStatistics(DataFile dataFile, File generatedT produceFrequencies(generatedTabularFile, vars); } - public static void produceFrequencies( File generatedTabularFile, List vars) throws IOException { + public static void produceFrequencies(File generatedTabularFile, List vars) throws IOException { for (int i = 0; i < vars.size(); i++) { Collection cats = vars.get(i).getCategories(); int caseQuantity = vars.get(i).getDataTable().getCaseQuantity().intValue(); boolean isNumeric = vars.get(i).isTypeNumeric(); + boolean skipVariableHeaderLine = vars.get(i).getDataTable().isStoredWithVariableHeader(); Object[] variableVector = null; if (cats.size() > 0) { if (isNumeric) { - variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, caseQuantity); + variableVector = TabularSubsetGenerator.subsetFloatVector( + new FileInputStream(generatedTabularFile), + i, + caseQuantity, + skipVariableHeaderLine); } else { - variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, caseQuantity); + variableVector = TabularSubsetGenerator.subsetStringVector( + new FileInputStream(generatedTabularFile), + i, + caseQuantity, + skipVariableHeaderLine); } if (variableVector != null) { Hashtable freq = calculateFrequency(variableVector); @@ -923,6 +934,7 @@ public boolean ingestAsTabular(Long datafile_id) { DataFile dataFile = fileService.find(datafile_id); boolean ingestSuccessful = false; boolean forceTypeCheck = false; + boolean storingWithVariableHeader = systemConfig.isStoringIngestedFilesWithHeaders(); // Never attempt to ingest a file that's already ingested! if (dataFile.isTabularData()) { @@ -1024,11 +1036,7 @@ public boolean ingestAsTabular(Long datafile_id) { TabularDataIngest tabDataIngest = null; try { - if (additionalData != null) { - tabDataIngest = ingestPlugin.read(inputStream, additionalData); - } else { - tabDataIngest = ingestPlugin.read(inputStream, null); - } + tabDataIngest = ingestPlugin.read(inputStream, storingWithVariableHeader, additionalData); } catch (IOException ingestEx) { dataFile.SetIngestProblem(); FileUtil.createIngestFailureReport(dataFile, ingestEx.getMessage()); @@ -1081,6 +1089,7 @@ public boolean ingestAsTabular(Long datafile_id) { dataFile.setDataTable(tabDataIngest.getDataTable()); tabDataIngest.getDataTable().setDataFile(dataFile); tabDataIngest.getDataTable().setOriginalFileName(originalFileName); + dataFile.getDataTable().setStoredWithVariableHeader(storingWithVariableHeader); try { produceSummaryStatistics(dataFile, tabFile); @@ -1172,6 +1181,7 @@ public boolean ingestAsTabular(Long datafile_id) { // Replace contents of the file with the tab-delimited data produced: dataAccess.savePath(Paths.get(tabFile.getAbsolutePath())); + // Reset the file size: dataFile.setFilesize(dataAccess.getSize()); @@ -2297,7 +2307,7 @@ public static void main(String[] args) { TabularDataIngest tabDataIngest = null; try { - tabDataIngest = ingestPlugin.read(fileInputStream, null); + tabDataIngest = ingestPlugin.read(fileInputStream, false, null); } catch (IOException ingestEx) { System.err.println("Caught an exception trying to ingest file "+file+"."); System.exit(1); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java index 223b171dfb5..0f23a3d9781 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java @@ -20,10 +20,13 @@ package edu.harvard.iq.dataverse.ingest.tabulardata; +import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.ingest.tabulardata.spi.*; //import edu.harvard.iq.dataverse.ingest.plugin.metadata.*; import java.io.*; import static java.lang.System.*; +import java.util.Iterator; +import java.util.List; import java.util.regex.Matcher; /** @@ -98,7 +101,7 @@ public void setDataLanguageEncoding(String dataLanguageEncoding) { * * @throws java.io.IOException if a reading error occurs. */ - public abstract TabularDataIngest read(BufferedInputStream stream, File dataFile) + public abstract TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException; @@ -176,5 +179,26 @@ protected String escapeCharacterString(String rawString) { return escapedString; } + + protected String generateVariableHeader(List dvs) { + String varHeader = null; + + if (dvs != null) { + Iterator iter = dvs.iterator(); + DataVariable dv; + + if (iter.hasNext()) { + dv = iter.next(); + varHeader = dv.getName(); + } + + while (iter.hasNext()) { + dv = iter.next(); + varHeader = varHeader + "\t" + dv.getName(); + } + } + + return varHeader; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java index 57f76df3802..f8816ababb4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java @@ -110,7 +110,7 @@ private void init() throws IOException { * @throws java.io.IOException if a reading error occurs. */ @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException { init(); if (stream == null) { @@ -124,7 +124,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws File tabFileDestination = File.createTempFile("data-", ".tab"); PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath()); - int lineCount = readFile(localBufferedReader, dataTable, tabFileWriter); + int lineCount = readFile(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter); logger.fine("Tab file produced: " + tabFileDestination.getAbsolutePath()); @@ -136,14 +136,17 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws } - public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { + public int readFile(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter finalOut) throws IOException { List variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map headers = parser.getHeaderMap(); int i = 0; + String variableNameHeader = null; + for (String varName : headers.keySet()) { + // @todo: is .keySet() guaranteed to return the names in the right order? if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. @@ -158,6 +161,13 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f dv.setTypeCharacter(); dv.setIntervalDiscrete(); + + if (saveWithVariableHeader) { + variableNameHeader = variableNameHeader == null + ? varName + : variableNameHeader.concat("\t" + varName); + } + i++; } @@ -342,6 +352,14 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; + + // Save the variable name header, if requested + if (saveWithVariableHeader) { + if (variableNameHeader == null) { + throw new IOException("failed to generate the Variable Names header"); + } + finalOut.println(variableNameHeader); + } for (CSVRecord record : parser) { if (!record.isConsistent()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java index 2dec701592e..73818f8fb62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java @@ -505,7 +505,7 @@ private void init() throws IOException { } @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException { dbgLog.info("***** DTAFileReader: read() start *****"); if (dataFile != null) { @@ -519,7 +519,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws if (releaseNumber!=104) { decodeExpansionFields(stream); } - decodeData(stream); + decodeData(stream, storeWithVariableHeader); decodeValueLabels(stream); ingesteddata.setDataTable(dataTable); @@ -1665,7 +1665,7 @@ private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOEx dbgLog.fine("parseValueLabelsRelease108(): end"); } - private void decodeData(BufferedInputStream stream) throws IOException { + private void decodeData(BufferedInputStream stream, boolean saveWithVariableHeader) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); @@ -1719,6 +1719,11 @@ private void decodeData(BufferedInputStream stream) throws IOException { BUT, this needs to be reviewed/confirmed etc! */ //String[][] dateFormat = new String[nvar][nobs]; + + // add the variable header here, if needed + if (saveWithVariableHeader) { + pwout.println(generateVariableHeader(dataTable.getDataVariables())); + } for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java index 22581834676..53607d541de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java @@ -339,7 +339,7 @@ private void init() throws IOException { } @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException { logger.fine("NewDTAFileReader: read() start"); // shit ton of diagnostics (still) needed here!! -- L.A. @@ -363,7 +363,13 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws // "characteristics" - STATA-proprietary information // (we are skipping it) readCharacteristics(dataReader); - readData(dataReader); + + String variableHeaderLine = null; + + if (storeWithVariableHeader) { + variableHeaderLine = generateVariableHeader(dataTable.getDataVariables()); + } + readData(dataReader, variableHeaderLine); // (potentially) large, (potentially) non-ASCII character strings // saved outside the section, and referenced @@ -707,7 +713,7 @@ private void readCharacteristics(DataReader reader) throws IOException { } - private void readData(DataReader reader) throws IOException { + private void readData(DataReader reader, String variableHeaderLine) throws IOException { logger.fine("Data section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_data()); logger.fine("readData(): start"); reader.readOpeningTag(TAG_DATA); @@ -731,6 +737,11 @@ private void readData(DataReader reader) throws IOException { FileOutputStream fileOutTab = new FileOutputStream(tabDelimitedDataFile); PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); + // add the variable header here, if needed + if (variableHeaderLine != null) { + pwout.println(variableHeaderLine); + } + logger.fine("Beginning to read data stream."); for (int i = 0; i < nobs; i++) { @@ -999,6 +1010,8 @@ private void readSTRLs(DataReader reader) throws IOException { int nobs = dataTable.getCaseQuantity().intValue(); String[] line; + + //@todo: adjust for the case of storing the file with the variable header for (int obsindex = 0; obsindex < nobs; obsindex++) { if (scanner.hasNext()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java index c90b0ea6950..2ee966c3e31 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java @@ -180,7 +180,7 @@ private void init() throws IOException { } @Override - public TabularDataIngest read(BufferedInputStream stream, File additionalData) throws IOException{ + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File additionalData) throws IOException{ dbgLog.fine("PORFileReader: read() start"); if (additionalData != null) { @@ -226,7 +226,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t headerId = "8S"; } - decode(headerId, bfReader); + decode(headerId, bfReader, storeWithVariableHeader); // for last iteration @@ -382,7 +382,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t return ingesteddata; } - private void decode(String headerId, BufferedReader reader) throws IOException{ + private void decode(String headerId, BufferedReader reader, boolean storeWithVariableHeader) throws IOException{ if (headerId.equals("1")) decodeProductName(reader); else if (headerId.equals("2")) decodeLicensee(reader); else if (headerId.equals("3")) decodeFileLabel(reader); @@ -398,7 +398,7 @@ private void decode(String headerId, BufferedReader reader) throws IOException{ else if (headerId.equals("C")) decodeVariableLabel(reader); else if (headerId.equals("D")) decodeValueLabel(reader); else if (headerId.equals("E")) decodeDocument(reader); - else if (headerId.equals("F")) decodeData(reader); + else if (headerId.equals("F")) decodeData(reader, storeWithVariableHeader); } @@ -1099,7 +1099,7 @@ private void decodeDocument(BufferedReader reader) throws IOException { } - private void decodeData(BufferedReader reader) throws IOException { + private void decodeData(BufferedReader reader, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("decodeData(): start"); // TODO: get rid of this "variableTypeFinal"; -- L.A. 4.0 beta int[] variableTypeFinal= new int[varQnty]; @@ -1126,6 +1126,9 @@ private void decodeData(BufferedReader reader) throws IOException { // contents (variable) checker concering decimals Arrays.fill(variableTypeFinal, 0); + if (storeWithVariableHeader) { + pwout.println(StringUtils.join(variableNameList, "\t")); + } // raw-case counter int j = 0; // case diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java index eb1353fd792..50f2f89e354 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java @@ -473,7 +473,7 @@ private void init() throws IOException { * @throws java.io.IOException if a reading error occurs. */ @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException { init(); @@ -509,7 +509,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws File tabFileDestination = File.createTempFile("data-", ".tab"); PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath(), "UTF-8"); - int lineCount = csvFileReader.read(localBufferedReader, dataTable, tabFileWriter); + int lineCount = csvFileReader.read(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter); LOG.fine("RDATAFileReader: successfully read "+lineCount+" lines of tab-delimited data."); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java index f60b7733463..fbe7e401b57 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java @@ -61,8 +61,8 @@ public RTabFileParser (char delimiterChar) { // should be used. - public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout) throws IOException { - dbgLog.warning("RTabFileParser: Inside R Tab file parser"); + public int read(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter pwout) throws IOException { + dbgLog.fine("RTabFileParser: Inside R Tab file parser"); int varQnty = 0; @@ -94,14 +94,17 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout boolean[] isTimeVariable = new boolean[varQnty]; boolean[] isBooleanVariable = new boolean[varQnty]; + String variableNameHeader = null; + if (dataTable.getDataVariables() != null) { for (int i = 0; i < varQnty; i++) { DataVariable var = dataTable.getDataVariables().get(i); if (var == null) { - // throw exception! + throw new IOException ("null dataVariable passed to the parser"); + } if (var.getType() == null) { - // throw exception! + throw new IOException ("null dataVariable type passed to the parser"); } if (var.isTypeCharacter()) { isCharacterVariable[i] = true; @@ -128,13 +131,24 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout } } } else { - // throw excepion "unknown variable format type" - ? + throw new IOException ("unknown dataVariable format passed to the parser"); } - + if (saveWithVariableHeader) { + variableNameHeader = variableNameHeader == null + ? var.getName() + : variableNameHeader.concat("\t" + var.getName()); + } } } else { - // throw exception! + throw new IOException ("null dataVariables list passed to the parser"); + } + + if (saveWithVariableHeader) { + if (variableNameHeader == null) { + throw new IOException ("failed to generate the Variable Names header"); + } + pwout.println(variableNameHeader); } while ((line = csvReader.readLine()) != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java index 682b8f1166c..5eecbdfb666 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java @@ -338,7 +338,7 @@ private void init() throws IOException { } } - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException{ + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException{ dbgLog.info("SAVFileReader: read() start"); if (dataFile != null) { @@ -422,7 +422,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws methodCurrentlyExecuted = "decodeRecordTypeData"; dbgLog.fine("***** SAVFileReader: executing method decodeRecordTypeData"); - decodeRecordTypeData(stream); + decodeRecordTypeData(stream, storeWithVariableHeader); } catch (IllegalArgumentException e) { @@ -2308,7 +2308,7 @@ void decodeRecordType999(BufferedInputStream stream) throws IOException { - void decodeRecordTypeData(BufferedInputStream stream) throws IOException { + void decodeRecordTypeData(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("decodeRecordTypeData(): start"); ///String fileUnfValue = null; @@ -2320,9 +2320,9 @@ void decodeRecordTypeData(BufferedInputStream stream) throws IOException { throw new IllegalArgumentException("stream == null!"); } if (isDataSectionCompressed){ - decodeRecordTypeDataCompressed(stream); + decodeRecordTypeDataCompressed(stream, storeWithVariableHeader); } else { - decodeRecordTypeDataUnCompressed(stream); + decodeRecordTypeDataUnCompressed(stream, storeWithVariableHeader); } /* UNF calculation was here... */ @@ -2362,7 +2362,7 @@ PrintWriter createOutputWriter (BufferedInputStream stream) throws IOException { } - void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException { + void decodeRecordTypeDataCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****"); @@ -2395,7 +2395,10 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti dbgLog.fine("printFormatTable:\n" + printFormatTable); variableFormatTypeList = new String[varQnty]; - + // write the variable header out, if instructed to do so + if (storeWithVariableHeader) { + pwout.println(generateVariableHeader(dataTable.getDataVariables())); + } for (int i = 0; i < varQnty; i++) { variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE.get( @@ -2947,7 +2950,7 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti } - void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException { + void decodeRecordTypeDataUnCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****"); if (stream ==null){ @@ -3013,6 +3016,11 @@ void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOExcep ///dataTable2 = new Object[varQnty][caseQnty]; // storage of date formats to pass to UNF ///dateFormats = new String[varQnty][caseQnty]; + + // write the variable header out, if instructed to do so + if (storeWithVariableHeader) { + pwout.println(generateVariableHeader(dataTable.getDataVariables())); + } try { for (int i = 0; ; i++){ // case-wise loop diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java index ea3f3868f24..ef91793690e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java @@ -36,7 +36,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.poi.xssf.eventusermodel.XSSFReader; -import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.poi.xssf.model.SharedStrings; import org.apache.poi.openxml4j.opc.OPCPackage; import org.xml.sax.Attributes; @@ -81,7 +80,9 @@ private void init() throws IOException { * @throws java.io.IOException if a reading error occurs. */ @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException { + // @todo: implement handling of "saveWithVariableHeader" option + init(); TabularDataIngest ingesteddata = new TabularDataIngest(); @@ -118,6 +119,10 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws String[] caseRow = new String[varQnty]; String[] valueTokens; + // add the variable header here, if needed + if (storeWithVariableHeader) { + finalWriter.println(generateVariableHeader(dataTable.getDataVariables())); + } while ((line = secondPassReader.readLine()) != null) { // chop the line: @@ -549,7 +554,7 @@ public static void main(String[] args) throws Exception { BufferedInputStream xlsxInputStream = new BufferedInputStream(new FileInputStream(new File(args[0]))); - TabularDataIngest dataIngest = testReader.read(xlsxInputStream, null); + TabularDataIngest dataIngest = testReader.read(xlsxInputStream, false, null); dataTable = dataIngest.getDataTable(); diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 627cef08d8b..3b7632f3d9e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -598,7 +598,12 @@ Whether Harvesting (OAI) service is enabled * Allows an instance admin to disable Solr search facets on the collection * and dataset pages instantly */ - DisableSolrFacets + DisableSolrFacets, + /** + * When ingesting tabular data files, store the generated tab-delimited + * files *with* the variable names line up top. + */ + StoreIngestedTabularFilesWithVarHeaders ; @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 3c6992f8ec3..ded394833f1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -1173,4 +1173,12 @@ public boolean isStorageQuotasEnforced() { public Long getTestStorageQuotaLimit() { return settingsService.getValueForKeyAsLong(SettingsServiceBean.Key.StorageQuotaSizeInBytes); } + /** + * Should we store tab-delimited files produced during ingest *with* the + * variable name header line included? + * @return boolean - defaults to false. + */ + public boolean isStoringIngestedFilesWithHeaders() { + return settingsService.isTrueForKey(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders, false); + } } diff --git a/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql new file mode 100644 index 00000000000..7c52a00107a --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql @@ -0,0 +1 @@ +ALTER TABLE datatable ADD COLUMN IF NOT EXISTS storedWithVariableHeader BOOLEAN DEFAULT FALSE; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 915f82a6de2..cfc6f9335b3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -16,6 +16,7 @@ import io.restassured.path.xml.XmlPath; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; import java.io.IOException; @@ -33,6 +34,8 @@ import jakarta.json.JsonObjectBuilder; import static jakarta.ws.rs.core.Response.Status.*; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import org.hamcrest.CoreMatchers; import org.hamcrest.Matchers; import org.junit.jupiter.api.AfterAll; @@ -2483,4 +2486,129 @@ public void testCollectionStorageQuotas() { UtilIT.deleteSetting(SettingsServiceBean.Key.UseStorageQuotas); } + + @Test + public void testIngestWithAndWithoutVariableHeader() throws NoSuchAlgorithmException { + msgt("testIngestWithAndWithoutVariableHeader"); + + // The compact Stata file we'll be using for this test: + // (this file is provided by Stata inc. - it's genuine quality) + String pathToFile = "scripts/search/data/tabular/stata13-auto.dta"; + // The pre-calculated MD5 signature of the *complete* tab-delimited + // file as seen by the final Access API user (i.e., with the variable + // header line in it): + String tabularFileMD5 = "f298c2567cc8eb544e36ad83edf6f595"; + // Expected byte sizes of the generated tab-delimited file as stored, + // with and without the header: + int tabularFileSizeWoutHeader = 4026; + int tabularFileSizeWithHeader = 4113; + + String apiToken = createUserGetToken(); + String dataverseAlias = createDataverseGetAlias(apiToken); + Integer datasetIdA = createDatasetGetId(dataverseAlias, apiToken); + + // Before we do anything else, make sure that the instance is configured + // the "old" way, i.e., to store ingested files without the headers: + UtilIT.deleteSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders); + + Response addResponse = UtilIT.uploadFileViaNative(datasetIdA.toString(), pathToFile, apiToken); + addResponse.prettyPrint(); + + addResponse.then().assertThat() + .body("data.files[0].dataFile.contentType", equalTo("application/x-stata-13")) + .body("data.files[0].label", equalTo("stata13-auto.dta")) + .statusCode(OK.getStatusCode()); + + Long fileIdA = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id"); + assertNotNull(fileIdA); + + // Give file time to ingest + assertTrue(UtilIT.sleepForLock(datasetIdA.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToFile + "(A)"); + + // Check the metadata to confirm that the file has ingested: + + Response fileDataResponse = UtilIT.getFileData(fileIdA.toString(), apiToken); + fileDataResponse.prettyPrint(); + fileDataResponse.then().assertThat() + .body("data.dataFile.filename", equalTo("stata13-auto.tab")) + .body("data.dataFile.contentType", equalTo("text/tab-separated-values")) + .body("data.dataFile.filesize", equalTo(tabularFileSizeWoutHeader)) + .statusCode(OK.getStatusCode()); + + + // Download the file, verify the checksum: + + Response fileDownloadResponse = UtilIT.downloadFile(fileIdA.intValue(), apiToken); + fileDownloadResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + + byte[] fileDownloadBytes = fileDownloadResponse.body().asByteArray(); + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + messageDigest.update(fileDownloadBytes); + byte[] rawDigestBytes = messageDigest.digest(); + String tabularFileMD5calculated = FileUtil.checksumDigestToString(rawDigestBytes); + + msgt("md5 of the downloaded file (saved without the variable name header): "+tabularFileMD5calculated); + + assertEquals(tabularFileMD5, tabularFileMD5calculated); + + // Repeat the whole thing, in another dataset (because we will be uploading + // an identical file), but with the "store with the header setting enabled): + + UtilIT.enableSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders); + + Integer datasetIdB = createDatasetGetId(dataverseAlias, apiToken); + + addResponse = UtilIT.uploadFileViaNative(datasetIdB.toString(), pathToFile, apiToken); + addResponse.prettyPrint(); + + addResponse.then().assertThat() + .body("data.files[0].dataFile.contentType", equalTo("application/x-stata-13")) + .body("data.files[0].label", equalTo("stata13-auto.dta")) + .statusCode(OK.getStatusCode()); + + Long fileIdB = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id"); + assertNotNull(fileIdB); + + // Give file time to ingest + assertTrue(UtilIT.sleepForLock(datasetIdB.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToFile + "(B)"); + + // Check the metadata to confirm that the file has ingested: + + fileDataResponse = UtilIT.getFileData(fileIdB.toString(), apiToken); + fileDataResponse.prettyPrint(); + fileDataResponse.then().assertThat() + .body("data.dataFile.filename", equalTo("stata13-auto.tab")) + .body("data.dataFile.contentType", equalTo("text/tab-separated-values")) + .body("data.dataFile.filesize", equalTo(tabularFileSizeWithHeader)) + .statusCode(OK.getStatusCode()); + + + // Download the file, verify the checksum, again + + fileDownloadResponse = UtilIT.downloadFile(fileIdB.intValue(), apiToken); + fileDownloadResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + + fileDownloadBytes = fileDownloadResponse.body().asByteArray(); + messageDigest.reset(); + messageDigest.update(fileDownloadBytes); + rawDigestBytes = messageDigest.digest(); + tabularFileMD5calculated = FileUtil.checksumDigestToString(rawDigestBytes); + + msgt("md5 of the downloaded file (saved with the variable name header): "+tabularFileMD5calculated); + + assertEquals(tabularFileMD5, tabularFileMD5calculated); + + // In other words, whether the file was saved with, or without the header, + // as downloaded by the user, the end result must be the same in both cases! + // In other words, whether that first line with the variable names is already + // in the physical file, or added by Dataverse on the fly, the downloaded + // content must be identical. + + UtilIT.deleteSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders); + + // @todo: cleanup? + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java index 96e314324ab..ca64bcc794f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java @@ -99,7 +99,7 @@ private DataFile readFileCalcFreq(String fileName, String type ) { TabularDataIngest tabDataIngest = null; try { - tabDataIngest = ingestPlugin.read(fileInputStream, null); + tabDataIngest = ingestPlugin.read(fileInputStream, false, null); } catch (IOException ingestEx) { tabDataIngest = null; System.out.println("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage()); diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java index fc066ef195e..9afb35918a4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java @@ -52,7 +52,7 @@ public void testRead() { try (BufferedInputStream stream = new BufferedInputStream( new FileInputStream(testFile))) { CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ','); - File outFile = instance.read(stream, null).getTabDelimitedFile(); + File outFile = instance.read(stream, false, null).getTabDelimitedFile(); result = new BufferedReader(new FileReader(outFile)); logger.fine("Final pass: " + outFile.getPath()); } catch (IOException ex) { @@ -104,7 +104,7 @@ public void testVariables() { try (BufferedInputStream stream = new BufferedInputStream( new FileInputStream(testFile))) { CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ','); - result = instance.read(stream, null).getDataTable(); + result = instance.read(stream, false, null).getDataTable(); } catch (IOException ex) { fail("" + ex); } @@ -154,7 +154,7 @@ public void testSubset() { new FileInputStream(testFile))) { CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ','); - ingestResult = instance.read(stream, null); + ingestResult = instance.read(stream, false, null); generatedTabFile = ingestResult.getTabDelimitedFile(); generatedDataTable = ingestResult.getDataTable(); @@ -195,7 +195,7 @@ public void testSubset() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); assertArrayEquals(floatVectors[vectorCount++], columnVector, "column " + i + ":"); } @@ -229,7 +229,7 @@ public void testSubset() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); assertArrayEquals(longVectors[vectorCount++], columnVector, "column " + i + ":"); } @@ -256,7 +256,7 @@ public void testSubset() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); assertArrayEquals(stringVectors[vectorCount++], columnVector, "column " + i + ":"); } @@ -298,7 +298,7 @@ public void testVariableUNFs() { new FileInputStream(testFile))) { CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ','); - ingestResult = instance.read(stream, null); + ingestResult = instance.read(stream, false, null); generatedTabFile = ingestResult.getTabDelimitedFile(); generatedDataTable = ingestResult.getDataTable(); @@ -327,7 +327,7 @@ public void testVariableUNFs() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); try { unf = UNFUtil.calculateUNF(columnVector); } catch (IOException | UnfException ioex) { @@ -345,7 +345,7 @@ public void testVariableUNFs() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); try { unf = UNFUtil.calculateUNF(columnVector); @@ -363,7 +363,7 @@ public void testVariableUNFs() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); String[] dateFormats = null; @@ -401,7 +401,7 @@ public void testVariableUNFs() { public void testBrokenCSV() { String brokenFile = "src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/BrokenCSV.csv"; try { - new CSVFileReader(new CSVFileReaderSpi(), ',').read(null, null); + new CSVFileReader(new CSVFileReaderSpi(), ',').read(null, false, null); fail("IOException not thrown on null csv"); } catch (NullPointerException ex) { String expMessage = null; @@ -412,7 +412,7 @@ public void testBrokenCSV() { } try (BufferedInputStream stream = new BufferedInputStream( new FileInputStream(brokenFile))) { - new CSVFileReader(new CSVFileReaderSpi(), ',').read(stream, null); + new CSVFileReader(new CSVFileReaderSpi(), ',').read(stream, false, null); fail("IOException was not thrown when collumns do not align."); } catch (IOException ex) { String expMessage = BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java index 113e9be6b54..8af36d6466d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java @@ -16,7 +16,7 @@ public class DTAFileReaderTest { @Test public void testOs() throws IOException { - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/50by1000.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/50by1000.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("rel_8_or_9", result.getDataTable().getOriginalFormatVersion()); assertEquals(50, result.getDataTable().getDataVariables().size()); diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java index c963346b05e..0f14054f472 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java @@ -25,7 +25,7 @@ public void testAuto() throws IOException { instance = new NewDTAFileReader(null, 117); // From https://www.stata-press.com/data/r13/auto.dta // `strings` shows "
    117" - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/stata13-auto.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/stata13-auto.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion()); assertEquals(12, result.getDataTable().getDataVariables().size()); @@ -39,7 +39,7 @@ public void testAuto() throws IOException { @Test public void testStrl() throws IOException { instance = new NewDTAFileReader(null, 118); - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "strl.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "strl.dta"))), false, nullDataFile); DataTable table = result.getDataTable(); assertEquals("application/x-stata", table.getOriginalFileFormat()); assertEquals("STATA 14", table.getOriginalFormatVersion()); @@ -58,7 +58,7 @@ public void testStrl() throws IOException { @Test public void testDates() throws IOException { instance = new NewDTAFileReader(null, 118); - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "dates.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "dates.dta"))), false, nullDataFile); DataTable table = result.getDataTable(); assertEquals("application/x-stata", table.getOriginalFileFormat()); assertEquals("STATA 14", table.getOriginalFormatVersion()); @@ -77,7 +77,7 @@ public void testDates() throws IOException { @Test void testNull() { instance = new NewDTAFileReader(null, 117); - assertThrows(IOException.class, () -> instance.read(null, new File(""))); + assertThrows(IOException.class, () -> instance.read(null, false, new File(""))); } // TODO: Can we create a small file to check into the code base that exercises the value-label names non-zero offset issue? @@ -87,7 +87,7 @@ public void testFirstCategoryNonZeroOffset() throws IOException { instance = new NewDTAFileReader(null, 117); // https://dataverse.harvard.edu/file.xhtml?fileId=2865667 Stata 13 HouseImputingCivilRightsInfo.dta md5=7dd144f27cdb9f8d1c3f4eb9c4744c42 - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/HouseImputingCivilRightsInfo.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/HouseImputingCivilRightsInfo.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion()); assertEquals(5, result.getDataTable().getDataVariables().size()); @@ -107,7 +107,7 @@ public void testFirstCategoryNonZeroOffset() throws IOException { public void testFirstCategoryNonZeroOffset1() throws IOException { instance = new NewDTAFileReader(null, 118); // https://dataverse.harvard.edu/file.xhtml?fileId=3140457 Stata 14: 2018_04_06_Aggregated_dataset_v2.dta - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/2018_04_06_Aggregated_dataset_v2.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/2018_04_06_Aggregated_dataset_v2.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("STATA 14", result.getDataTable().getOriginalFormatVersion()); assertEquals(227, result.getDataTable().getDataVariables().size()); @@ -136,7 +136,7 @@ public void test33k() throws IOException { @Test public void testCharacteristics() throws IOException { instance = new NewDTAFileReader(null, 117); - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/15aa6802ee5-5d2ed1bf55a5.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/15aa6802ee5-5d2ed1bf55a5.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion()); assertEquals(441, result.getDataTable().getDataVariables().size());