From b5fe4c75944deea540b807021e0f88401878118f Mon Sep 17 00:00:00 2001
From: "Balazs E. Pataki" <pataki@gmail.com>
Date: Fri, 17 Mar 2023 16:22:48 +0100
Subject: [PATCH 001/546] Fix placement of allowedApiCalls in example manifests

allowedApiCalls should be at the top level, not inside toolParameters.
---
 .../external-tools/dynamicDatasetTool.json    | 20 +++++++++----------
 .../root/external-tools/fabulousFileTool.json | 18 ++++++++---------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json
index 47413c8a625..22dd6477cb4 100644
--- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json
+++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json
@@ -14,14 +14,14 @@
       {
         "locale":"{localeCode}"
       }
-    ],
-    "allowedApiCalls": [
-        {
-          "name":"retrieveDatasetJson",
-          "httpMethod":"GET",
-          "urlTemplate":"/api/v1/datasets/{datasetId}",
-          "timeOut":10
-        }
-      ]
-  }
+    ]
+  },
+  "allowedApiCalls": [
+    {
+      "name":"retrieveDatasetJson",
+      "httpMethod":"GET",
+      "urlTemplate":"/api/v1/datasets/{datasetId}",
+      "timeOut":10
+    }
+  ]
 }
diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json
index 1c132576099..2b6a0b8e092 100644
--- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json
+++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json
@@ -21,14 +21,14 @@
       {
         "locale":"{localeCode}"
       }
-    ],
-    "allowedApiCalls": [
-      {
-        "name":"retrieveDataFile",
-        "httpMethod":"GET",
-        "urlTemplate":"/api/v1/access/datafile/{fileId}",
-        "timeOut":270
-      }
     ]
-  }
+  },
+  "allowedApiCalls": [
+    {
+      "name":"retrieveDataFile",
+      "httpMethod":"GET",
+      "urlTemplate":"/api/v1/access/datafile/{fileId}",
+      "timeOut":270
+    }
+  ]
 }

From d76092c1ec57a835920b8fd10e6883299f8b6d3a Mon Sep 17 00:00:00 2001
From: "Balazs E. Pataki" <pataki@gmail.com>
Date: Fri, 17 Mar 2023 16:24:41 +0100
Subject: [PATCH 002/546] Add missing break to DATASET case

Without this it also evaluates the FILE case causing NPE when dataFile
is accessed.
---
 .../harvard/iq/dataverse/externaltools/ExternalToolHandler.java  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
index 88a51017b75..dac046373ba 100644
--- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
+++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
@@ -111,6 +111,7 @@ public String handleRequest(boolean preview) {
                 case DATASET:
                     callback=SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/"
                             + dataset.getId() + "/versions/:latest/toolparams/" + externalTool.getId();
+                    break;
                 case FILE:
                     callback= SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/files/"
                             + dataFile.getId() + "/metadata/" + fileMetadata.getId() + "/toolparams/"

From ecac37fbd64c83bfc8d045ae3204ab86dc7bc29d Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 2 May 2023 10:52:13 -0400
Subject: [PATCH 003/546] initial Globus Store class with some quick test code

---
 pom.xml                                       |   7 +-
 .../dataaccess/GlobusOverlayAccessIO.java     | 655 ++++++++++++++++++
 .../dataaccess/RemoteOverlayAccessIO.java     |  34 +-
 .../iq/dataverse/settings/JvmSettings.java    |   2 +
 4 files changed, 680 insertions(+), 18 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java

diff --git a/pom.xml b/pom.xml
index 5f514819947..e5b191f0ed7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -167,8 +167,13 @@
         <dependency>
             <groupId>org.eclipse.microprofile.config</groupId>
             <artifactId>microprofile-config-api</artifactId>
-            <scope>provided</scope>
+            <!--scope>provided</scope-->
         </dependency>
+        <dependency>
+    <groupId>org.apache.geronimo.config</groupId>
+    <artifactId>geronimo-config-impl</artifactId>
+    <version>1.0</version>
+</dependency>
         <dependency>
             <groupId>jakarta.platform</groupId>
             <artifactId>jakarta.jakartaee-api</artifactId>
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
new file mode 100644
index 00000000000..fe62e25ad6f
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -0,0 +1,655 @@
+package edu.harvard.iq.dataverse.dataaccess;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.DvObject;
+import edu.harvard.iq.dataverse.datavariable.DataVariable;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.util.UrlSignerUtil;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.channels.Channel;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.nio.file.Path;
+import java.security.KeyManagementException;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.Predicate;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.http.client.config.CookieSpecs;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpDelete;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.protocol.HttpClientContext;
+import org.apache.http.config.Registry;
+import org.apache.http.config.RegistryBuilder;
+import org.apache.http.conn.socket.ConnectionSocketFactory;
+import org.apache.http.conn.ssl.NoopHostnameVerifier;
+import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
+import org.apache.http.conn.ssl.TrustAllStrategy;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
+import org.apache.http.ssl.SSLContextBuilder;
+import org.apache.http.util.EntityUtils;
+
+import javax.net.ssl.SSLContext;
+
+/**
+ * @author qqmyers
+ * @param <T> what it stores
+ */
+/*
+ * Globus Overlay Driver
+ * 
+ * StorageIdentifier format:
+ * <globusDriverId>://<local id>//<globusEndpointIdentifier>/<absolutePath>
+ */
+public class GlobusOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
+
+    private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO");
+
+    private StorageIO<DvObject> baseStore = null;
+    private String path = null;
+    private String endpointWithBasePath = null;
+
+    private static HttpClientContext localContext = HttpClientContext.create();
+    private PoolingHttpClientConnectionManager cm = null;
+    CloseableHttpClient httpclient = null;
+    private int timeout = 1200;
+    private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000)
+            .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000)
+            .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build();
+    private static boolean trustCerts = false;
+    private int httpConcurrency = 4;
+
+    public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
+        super(dvObject, req, driverId);
+        this.setIsLocalFile(false);
+        configureStores(req, driverId, null);
+        logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier());
+        path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
+        validatePath(path);
+        
+        logger.fine("Relative path: " + path);
+    }
+
+    public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException {
+        super(null, null, driverId);
+        this.setIsLocalFile(false);
+        configureStores(null, driverId, storageLocation);
+
+        path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
+        validatePath(path);
+        logger.fine("Relative path: " + path);
+    }
+    
+    private void validatePath(String relPath) throws IOException {
+        try {
+            URI absoluteURI = new URI(endpointWithBasePath + "/" + relPath);
+            if(!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) {
+                throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s endpoint/basePath");
+            }
+        } catch(URISyntaxException use) {
+            throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId);
+        }
+     }
+
+
+    @Override
+    public void open(DataAccessOption... options) throws IOException {
+
+        baseStore.open(options);
+
+        DataAccessRequest req = this.getRequest();
+
+        if (isWriteAccessRequested(options)) {
+            isWriteAccess = true;
+            isReadAccess = false;
+        } else {
+            isWriteAccess = false;
+            isReadAccess = true;
+        }
+
+        if (dvObject instanceof DataFile) {
+            String storageIdentifier = dvObject.getStorageIdentifier();
+
+            DataFile dataFile = this.getDataFile();
+
+            if (req != null && req.getParameter("noVarHeader") != null) {
+                baseStore.setNoVarHeader(true);
+            }
+
+            if (storageIdentifier == null || "".equals(storageIdentifier)) {
+                throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile.");
+            }
+
+            // Fix new DataFiles: DataFiles that have not yet been saved may use this method
+            // when they don't have their storageidentifier in the final form
+            // So we fix it up here. ToDo: refactor so that storageidentifier is generated
+            // by the appropriate StorageIO class and is final from the start.
+            logger.fine("StorageIdentifier is: " + storageIdentifier);
+
+            if (isReadAccess) {
+                if (dataFile.getFilesize() >= 0) {
+                    this.setSize(dataFile.getFilesize());
+                } else {
+                    logger.fine("Setting size");
+                    this.setSize(getSizeFromGlobus());
+                }
+                if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
+                        && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
+
+                    List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
+                    String varHeaderLine = generateVariableHeader(datavariables);
+                    this.setVarHeader(varHeaderLine);
+                }
+
+            }
+
+            this.setMimeType(dataFile.getContentType());
+
+            try {
+                this.setFileName(dataFile.getFileMetadata().getLabel());
+            } catch (Exception ex) {
+                this.setFileName("unknown");
+            }
+        } else if (dvObject instanceof Dataset) {
+            throw new IOException(
+                    "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet");
+        } else if (dvObject instanceof Dataverse) {
+            throw new IOException(
+                    "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet");
+        } else {
+            this.setSize(getSizeFromGlobus());
+        }
+    }
+
+    private long getSizeFromGlobus() {
+        throw new NotImplementedException();
+        /*
+        long size = -1;
+        HttpHead head = new HttpHead(endpointWithBasePath + "/" + path);
+        try {
+            CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext);
+
+            try {
+                int code = response.getStatusLine().getStatusCode();
+                logger.fine("Response for HEAD: " + code);
+                switch (code) {
+                case 200:
+                    Header[] headers = response.getHeaders(HTTP.CONTENT_LEN);
+                    logger.fine("Num headers: " + headers.length);
+                    String sizeString = response.getHeaders(HTTP.CONTENT_LEN)[0].getValue();
+                    logger.fine("Content-Length: " + sizeString);
+                    size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue());
+                    logger.fine("Found file size: " + size);
+                    break;
+                default:
+                    logger.warning("Response from " + head.getURI().toString() + " was " + code);
+                }
+            } finally {
+                EntityUtils.consume(response.getEntity());
+            }
+        } catch (IOException e) {
+            logger.warning(e.getMessage());
+        }
+        return size;
+        */
+    }
+
+    @Override
+    public InputStream getInputStream() throws IOException {
+        if (super.getInputStream() == null) {
+            try {
+                HttpGet get = new HttpGet(generateTemporaryDownloadUrl(null, null, null));
+                CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext);
+
+                int code = response.getStatusLine().getStatusCode();
+                switch (code) {
+                case 200:
+                    setInputStream(response.getEntity().getContent());
+                    break;
+                default:
+                    logger.warning("Response from " + get.getURI().toString() + " was " + code);
+                    throw new IOException("Cannot retrieve: " + endpointWithBasePath + "/" + path + " code: " + code);
+                }
+            } catch (Exception e) {
+                logger.warning(e.getMessage());
+                e.printStackTrace();
+                throw new IOException("Error retrieving: " + endpointWithBasePath + "/" + path + " " + e.getMessage());
+
+            }
+            setChannel(Channels.newChannel(super.getInputStream()));
+        }
+        return super.getInputStream();
+    }
+
+    @Override
+    public Channel getChannel() throws IOException {
+        if (super.getChannel() == null) {
+            getInputStream();
+        }
+        return channel;
+    }
+
+    @Override
+    public ReadableByteChannel getReadChannel() throws IOException {
+        // Make sure StorageIO.channel variable exists
+        getChannel();
+        return super.getReadChannel();
+    }
+
+    @Override
+    public void delete() throws IOException {
+        // Delete is best-effort - we tell the remote server and it may or may not
+        // implement this call
+        if (!isDirectAccess()) {
+            throw new IOException("Direct Access IO must be used to permanently delete stored file objects");
+        }
+        try {
+            HttpDelete del = new HttpDelete(endpointWithBasePath + "/" + path);
+            CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext);
+            try {
+                int code = response.getStatusLine().getStatusCode();
+                switch (code) {
+                case 200:
+                    logger.fine("Sent DELETE for " + endpointWithBasePath + "/" + path);
+                default:
+                    logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code);
+                }
+            } finally {
+                EntityUtils.consume(response.getEntity());
+            }
+        } catch (Exception e) {
+            logger.warning(e.getMessage());
+            throw new IOException("Error deleting: " + endpointWithBasePath + "/" + path);
+
+        }
+
+        // Delete all the cached aux files as well:
+        deleteAllAuxObjects();
+
+    }
+
+    @Override
+    public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException {
+        return baseStore.openAuxChannel(auxItemTag, options);
+    }
+
+    @Override
+    public boolean isAuxObjectCached(String auxItemTag) throws IOException {
+        return baseStore.isAuxObjectCached(auxItemTag);
+    }
+
+    @Override
+    public long getAuxObjectSize(String auxItemTag) throws IOException {
+        return baseStore.getAuxObjectSize(auxItemTag);
+    }
+
+    @Override
+    public Path getAuxObjectAsPath(String auxItemTag) throws IOException {
+        return baseStore.getAuxObjectAsPath(auxItemTag);
+    }
+
+    @Override
+    public void backupAsAux(String auxItemTag) throws IOException {
+        baseStore.backupAsAux(auxItemTag);
+    }
+
+    @Override
+    public void revertBackupAsAux(String auxItemTag) throws IOException {
+        baseStore.revertBackupAsAux(auxItemTag);
+    }
+
+    @Override
+    // this method copies a local filesystem Path into this DataAccess Auxiliary
+    // location:
+    public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException {
+        baseStore.savePathAsAux(fileSystemPath, auxItemTag);
+    }
+
+    @Override
+    public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException {
+        baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize);
+    }
+
+    /**
+     * @param inputStream InputStream we want to save
+     * @param auxItemTag  String representing this Auxiliary type ("extension")
+     * @throws IOException if anything goes wrong.
+     */
+    @Override
+    public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException {
+        baseStore.saveInputStreamAsAux(inputStream, auxItemTag);
+    }
+
+    @Override
+    public List<String> listAuxObjects() throws IOException {
+        return baseStore.listAuxObjects();
+    }
+
+    @Override
+    public void deleteAuxObject(String auxItemTag) throws IOException {
+        baseStore.deleteAuxObject(auxItemTag);
+    }
+
+    @Override
+    public void deleteAllAuxObjects() throws IOException {
+        baseStore.deleteAllAuxObjects();
+    }
+
+    @Override
+    public String getStorageLocation() throws IOException {
+        String fullStorageLocation = dvObject.getStorageIdentifier();
+        logger.fine("storageidentifier: " + fullStorageLocation);
+        int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR);
+        if(driverIndex >=0) {
+          fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+        }
+        if (this.getDvObject() instanceof Dataset) {
+            throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject");
+        } else if (this.getDvObject() instanceof DataFile) {
+            fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation;
+        } else if (dvObject instanceof Dataverse) {
+            throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
+        }
+        logger.fine("fullStorageLocation: " + fullStorageLocation);
+        return fullStorageLocation;
+    }
+
+    @Override
+    public Path getFileSystemPath() throws UnsupportedDataAccessOperationException {
+        throw new UnsupportedDataAccessOperationException(
+                "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it.");
+    }
+
+    @Override
+    public boolean exists() {
+        logger.fine("Exists called");
+        return (getSizeFromGlobus() != -1);
+    }
+
+    @Override
+    public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException {
+        throw new UnsupportedDataAccessOperationException(
+                "RemoteOverlayAccessIO: there are no write Channels associated with S3 objects.");
+    }
+
+    @Override
+    public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException {
+        throw new UnsupportedDataAccessOperationException(
+                "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects.");
+    }
+
+    @Override
+    public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException {
+        return baseStore.getAuxFileAsInputStream(auxItemTag);
+    }
+
+    @Override
+    public boolean downloadRedirectEnabled() {
+        String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect");
+        if ("true".equalsIgnoreCase(optionValue)) {
+            return true;
+        }
+        return false;
+    }
+    
+    public boolean downloadRedirectEnabled(String auxObjectTag) {
+        return baseStore.downloadRedirectEnabled(auxObjectTag);
+    }
+
+    @Override
+    public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName)
+            throws IOException {
+
+        // ToDo - support remote auxiliary Files
+        if (auxiliaryTag == null) {
+            String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key");
+            if (secretKey == null) {
+                return endpointWithBasePath + "/" + path;
+            } else {
+                return UrlSignerUtil.signUrl(endpointWithBasePath + "/" + path, getUrlExpirationMinutes(), null, "GET",
+                        secretKey);
+            }
+        } else {
+            return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName);
+        }
+    }
+
+    int getUrlExpirationMinutes() {
+        String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes");
+        if (optionValue != null) {
+            Integer num;
+            try {
+                num = Integer.parseInt(optionValue);
+            } catch (NumberFormatException ex) {
+                num = null;
+            }
+            if (num != null) {
+                return num;
+            }
+        }
+        return 60;
+    }
+
+    private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
+        endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId);
+        logger.info("base-uri is " + endpointWithBasePath);
+        if (endpointWithBasePath == null) {
+            throw new IOException("dataverse.files." + this.driverId + ".base-uri is required");
+        } else {
+            try {
+                new URI(endpointWithBasePath);
+            } catch (Exception e) {
+                logger.warning(
+                        "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage());
+                throw new IOException("Can't interpret base-url as a URI");
+            }
+
+        }
+
+        if (baseStore == null) {
+            String baseDriverId = getBaseStoreIdFor(driverId);
+            String fullStorageLocation = null;
+            String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+            
+            if(dvObject  instanceof Dataset) {
+                baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId);
+            } else {
+                if (this.getDvObject() != null) {
+                    fullStorageLocation = getStoragePath();
+
+                    // S3 expects <id>://<bucketname>/<key>
+                    switch (baseDriverType) {
+                    case DataAccess.S3:
+                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+                                + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/"
+                                + fullStorageLocation;
+                        break;
+                    case DataAccess.FILE:
+                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/"
+                                + fullStorageLocation;
+                        break;
+                    default:
+                        logger.warning("Not Implemented: RemoteOverlay store with base store type: "
+                                + System.getProperty("dataverse.files." + baseDriverId + ".type"));
+                        throw new IOException("Not implemented");
+                    }
+
+                } else if (storageLocation != null) {
+                    // <remoteDriverId>://<baseStorageIdentifier>//<baseUrlPath>
+                    //remoteDriverId:// is removed if coming through directStorageIO
+                    int index = storageLocation.indexOf(DataAccess.SEPARATOR);
+                    if(index > 0) {
+                        storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length());
+                    }
+                    //THe base store needs the baseStoreIdentifier and not the relative URL
+                    fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//"));
+
+                    switch (baseDriverType) {
+                    case DataAccess.S3:
+                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+                                + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/"
+                                + fullStorageLocation;
+                        break;
+                    case DataAccess.FILE:
+                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/"
+                                + fullStorageLocation;
+                        break;
+                    default:
+                        logger.warning("Not Implemented: RemoteOverlay store with base store type: "
+                                + System.getProperty("dataverse.files." + baseDriverId + ".type"));
+                        throw new IOException("Not implemented");
+                    }
+                }
+                baseStore = DataAccess.getDirectStorageIO(fullStorageLocation);
+            }
+            if (baseDriverType.contentEquals(DataAccess.S3)) {
+                ((S3AccessIO<?>) baseStore).setMainDriver(false);
+            }
+        }
+        remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name");
+        try {
+          remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url"));
+        } catch(MalformedURLException mfue) {
+            logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId);
+        }
+    }
+
+    //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required.
+    private String getStoragePath() throws IOException {
+        String fullStoragePath = dvObject.getStorageIdentifier();
+        logger.fine("storageidentifier: " + fullStoragePath);
+        int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR);
+        if(driverIndex >=0) {
+          fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+        }
+        int suffixIndex = fullStoragePath.indexOf("//");
+        if(suffixIndex >=0) {
+          fullStoragePath = fullStoragePath.substring(0, suffixIndex);
+        }
+        if (this.getDvObject() instanceof Dataset) {
+            fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/"
+                    + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath;
+        } else if (this.getDvObject() instanceof DataFile) {
+            fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/"
+                    + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; 
+        }else if (dvObject instanceof Dataverse) {
+            throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
+        }
+        logger.fine("fullStoragePath: " + fullStoragePath);
+        return fullStoragePath;
+    }
+    
+    public CloseableHttpClient getSharedHttpClient() {
+        if (httpclient == null) {
+            try {
+                initHttpPool();
+                httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build();
+
+            } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) {
+                logger.warning(ex.getMessage());
+            }
+        }
+        return httpclient;
+    }
+
+    private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException {
+        if (trustCerts) {
+            // use the TrustSelfSignedStrategy to allow Self Signed Certificates
+            SSLContext sslContext;
+            SSLConnectionSocketFactory connectionFactory;
+
+            sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build();
+            // create an SSL Socket Factory to use the SSLContext with the trust self signed
+            // certificate strategy
+            // and allow all hosts verifier.
+            connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE);
+
+            Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
+                    .register("https", connectionFactory).build();
+            cm = new PoolingHttpClientConnectionManager(registry);
+        } else {
+            cm = new PoolingHttpClientConnectionManager();
+        }
+        cm.setDefaultMaxPerRoute(httpConcurrency);
+        cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20);
+    }
+
+    @Override
+    public void savePath(Path fileSystemPath) throws IOException {
+        throw new UnsupportedDataAccessOperationException(
+                "RemoteOverlayAccessIO: savePath() not implemented in this storage driver.");
+
+    }
+
+    @Override
+    public void saveInputStream(InputStream inputStream) throws IOException {
+        throw new UnsupportedDataAccessOperationException(
+                "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver.");
+
+    }
+
+    @Override
+    public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
+        throw new UnsupportedDataAccessOperationException(
+                "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver.");
+
+    }
+
+    protected static boolean isValidIdentifier(String driverId, String storageId) {
+        String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2);
+        String baseUri = System.getProperty("dataverse.files." + driverId + ".base-uri");
+        try {
+            URI absoluteURI = new URI(baseUri + "/" + urlPath);
+            if(!absoluteURI.normalize().toString().startsWith(baseUri)) {
+                logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId);
+                return false;
+            }
+        } catch(URISyntaxException use) {
+            logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId);
+            logger.warning(use.getLocalizedMessage());
+            return false;
+        }
+        return true;
+    }
+
+    public static String getBaseStoreIdFor(String driverId) {
+        return System.getProperty("dataverse.files." + driverId + ".base-store");
+    }
+
+    @Override
+    public List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOException {
+        return baseStore.cleanUp(filter, dryRun);
+    }
+    
+    public static void main(String[] args) {
+        System.out.println("Running the main method");
+        if (args.length > 0) {
+            System.out.printf("List of arguments: {}", Arrays.toString(args));
+        }
+        System.setProperty("dataverse.files.globus.base-uri", "12345/top");
+        System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill"));
+        logger.info(JvmSettings.BASE_URI.lookup("globus"));
+    }
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index 66c6a4cc2ee..ee2b6779cba 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -65,7 +65,7 @@ public class RemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
 
     private StorageIO<DvObject> baseStore = null;
-    private String urlPath = null;
+    private String path = null;
     private String baseUrl = null;
 
     private static HttpClientContext localContext = HttpClientContext.create();
@@ -83,10 +83,10 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId)
         this.setIsLocalFile(false);
         configureStores(req, driverId, null);
         logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier());
-        urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
-        validatePath(urlPath);
+        path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
+        validatePath(path);
         
-        logger.fine("Base URL: " + urlPath);
+        logger.fine("Base URL: " + path);
     }
 
     public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException {
@@ -94,14 +94,14 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE
         this.setIsLocalFile(false);
         configureStores(null, driverId, storageLocation);
 
-        urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
-        validatePath(urlPath);
-        logger.fine("Base URL: " + urlPath);
+        path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
+        validatePath(path);
+        logger.fine("Base URL: " + path);
     }
     
-    private void validatePath(String path) throws IOException {
+    private void validatePath(String relPath) throws IOException {
         try {
-            URI absoluteURI = new URI(baseUrl + "/" + urlPath);
+            URI absoluteURI = new URI(baseUrl + "/" + relPath);
             if(!absoluteURI.normalize().toString().startsWith(baseUrl)) {
                 throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url");
             }
@@ -182,7 +182,7 @@ public void open(DataAccessOption... options) throws IOException {
 
     private long getSizeFromHttpHeader() {
         long size = -1;
-        HttpHead head = new HttpHead(baseUrl + "/" + urlPath);
+        HttpHead head = new HttpHead(baseUrl + "/" + path);
         try {
             CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext);
 
@@ -224,12 +224,12 @@ public InputStream getInputStream() throws IOException {
                     break;
                 default:
                     logger.warning("Response from " + get.getURI().toString() + " was " + code);
-                    throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code);
+                    throw new IOException("Cannot retrieve: " + baseUrl + "/" + path + " code: " + code);
                 }
             } catch (Exception e) {
                 logger.warning(e.getMessage());
                 e.printStackTrace();
-                throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage());
+                throw new IOException("Error retrieving: " + baseUrl + "/" + path + " " + e.getMessage());
 
             }
             setChannel(Channels.newChannel(super.getInputStream()));
@@ -260,13 +260,13 @@ public void delete() throws IOException {
             throw new IOException("Direct Access IO must be used to permanently delete stored file objects");
         }
         try {
-            HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath);
+            HttpDelete del = new HttpDelete(baseUrl + "/" + path);
             CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext);
             try {
                 int code = response.getStatusLine().getStatusCode();
                 switch (code) {
                 case 200:
-                    logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath);
+                    logger.fine("Sent DELETE for " + baseUrl + "/" + path);
                 default:
                     logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code);
                 }
@@ -275,7 +275,7 @@ public void delete() throws IOException {
             }
         } catch (Exception e) {
             logger.warning(e.getMessage());
-            throw new IOException("Error deleting: " + baseUrl + "/" + urlPath);
+            throw new IOException("Error deleting: " + baseUrl + "/" + path);
 
         }
 
@@ -420,9 +420,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
         if (auxiliaryTag == null) {
             String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key");
             if (secretKey == null) {
-                return baseUrl + "/" + urlPath;
+                return baseUrl + "/" + path;
             } else {
-                return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET",
+                return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET",
                         secretKey);
             }
         } else {
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index 86130f5146e..4fb895f5adc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -47,6 +47,8 @@ public enum JvmSettings {
     // FILES SETTINGS
     SCOPE_FILES(PREFIX, "files"),
     FILES_DIRECTORY(SCOPE_FILES, "directory"),
+    FILES(SCOPE_FILES),
+    BASE_URI(FILES, "base-uri"),
     
     // SOLR INDEX SETTINGS
     SCOPE_SOLR(PREFIX, "solr"),

From 2c4c927cc8f20d53ee1aaaf1979b793ee53f9b3f Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 5 May 2023 14:13:02 -0400
Subject: [PATCH 004/546] add token

---
 .../dataaccess/GlobusOverlayAccessIO.java     | 171 +++++++++++-------
 .../iq/dataverse/settings/JvmSettings.java    |   1 +
 2 files changed, 111 insertions(+), 61 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index fe62e25ad6f..050b9ddc176 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -7,6 +7,7 @@
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.UrlSignerUtil;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
@@ -31,6 +32,7 @@
 import java.util.logging.Logger;
 
 import org.apache.commons.lang3.NotImplementedException;
+import org.apache.http.client.ClientProtocolException;
 import org.apache.http.client.config.CookieSpecs;
 import org.apache.http.client.config.RequestConfig;
 import org.apache.http.client.methods.CloseableHttpResponse;
@@ -49,6 +51,7 @@
 import org.apache.http.ssl.SSLContextBuilder;
 import org.apache.http.util.EntityUtils;
 
+import javax.json.JsonObject;
 import javax.net.ssl.SSLContext;
 
 /**
@@ -58,8 +61,8 @@
 /*
  * Globus Overlay Driver
  * 
- * StorageIdentifier format:
- * <globusDriverId>://<local id>//<globusEndpointIdentifier>/<absolutePath>
+ * StorageIdentifier format: <globusDriverId>://<local
+ * id>//<globusEndpointIdentifier>/<absolutePath>
  */
 public class GlobusOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
 
@@ -68,6 +71,7 @@ public class GlobusOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
     private StorageIO<DvObject> baseStore = null;
     private String path = null;
     private String endpointWithBasePath = null;
+    private String globusToken = null;
 
     private static HttpClientContext localContext = HttpClientContext.create();
     private PoolingHttpClientConnectionManager cm = null;
@@ -86,7 +90,7 @@ public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId)
         logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier());
         path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
         validatePath(path);
-        
+
         logger.fine("Relative path: " + path);
     }
 
@@ -99,18 +103,17 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE
         validatePath(path);
         logger.fine("Relative path: " + path);
     }
-    
+
     private void validatePath(String relPath) throws IOException {
         try {
             URI absoluteURI = new URI(endpointWithBasePath + "/" + relPath);
-            if(!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) {
+            if (!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) {
                 throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s endpoint/basePath");
             }
-        } catch(URISyntaxException use) {
+        } catch (URISyntaxException use) {
             throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId);
         }
-     }
-
+    }
 
     @Override
     public void open(DataAccessOption... options) throws IOException {
@@ -181,37 +184,64 @@ public void open(DataAccessOption... options) throws IOException {
         }
     }
 
+    // Call the Globus API to get the file size
     private long getSizeFromGlobus() {
-        throw new NotImplementedException();
-        /*
-        long size = -1;
-        HttpHead head = new HttpHead(endpointWithBasePath + "/" + path);
+        // Construct Globus URL
+        URI absoluteURI = null;
         try {
-            CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext);
-
-            try {
-                int code = response.getStatusLine().getStatusCode();
-                logger.fine("Response for HEAD: " + code);
-                switch (code) {
-                case 200:
-                    Header[] headers = response.getHeaders(HTTP.CONTENT_LEN);
-                    logger.fine("Num headers: " + headers.length);
-                    String sizeString = response.getHeaders(HTTP.CONTENT_LEN)[0].getValue();
-                    logger.fine("Content-Length: " + sizeString);
-                    size = Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue());
-                    logger.fine("Found file size: " + size);
-                    break;
-                default:
-                    logger.warning("Response from " + head.getURI().toString() + " was " + code);
-                }
-            } finally {
-                EntityUtils.consume(response.getEntity());
+            int filenameStart = path.lastIndexOf("/") + 1;
+            int pathStart = endpointWithBasePath.indexOf("/") + 1;
+
+            String directoryPath = (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "")
+                    + path.substring(0, filenameStart);
+            String filename = path.substring(filenameStart);
+            String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath;
+
+            absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + path + "&filter=name:" + filename);
+            HttpGet get = new HttpGet(absoluteURI);
+            String token = JvmSettings.GLOBUS_TOKEN.lookup(driverId);
+            logger.info("Token is " + token);
+            get.addHeader("Authorization", "Bearer " + token);
+            CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext);
+            if (response.getStatusLine().getStatusCode() == 200) {
+                //Get reponse as string
+                String responseString = EntityUtils.toString(response.getEntity());
+                logger.fine("Response from " + get.getURI().toString() + " is: " + responseString);
+                JsonObject responseJson = JsonUtil.getJsonObject(responseString);
+                return (long) responseJson.getInt("size");
+            } else {
+                logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode());
+                logger.info(EntityUtils.toString(response.getEntity()));
             }
+        } catch (URISyntaxException e) {
+            // Should have been caught in validatePath
+            e.printStackTrace();
+        } catch (ClientProtocolException e) {
+            // TODO Auto-generated catch block
+            e.printStackTrace();
         } catch (IOException e) {
-            logger.warning(e.getMessage());
+            // TODO Auto-generated catch block
+            e.printStackTrace();
         }
-        return size;
-        */
+        return -1;
+
+        /*
+         * long size = -1; HttpHead head = new HttpHead(endpointWithBasePath + "/" +
+         * path); try { CloseableHttpResponse response =
+         * getSharedHttpClient().execute(head, localContext);
+         * 
+         * try { int code = response.getStatusLine().getStatusCode();
+         * logger.fine("Response for HEAD: " + code); switch (code) { case 200: Header[]
+         * headers = response.getHeaders(HTTP.CONTENT_LEN); logger.fine("Num headers: "
+         * + headers.length); String sizeString =
+         * response.getHeaders(HTTP.CONTENT_LEN)[0].getValue();
+         * logger.fine("Content-Length: " + sizeString); size =
+         * Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue());
+         * logger.fine("Found file size: " + size); break; default:
+         * logger.warning("Response from " + head.getURI().toString() + " was " + code);
+         * } } finally { EntityUtils.consume(response.getEntity()); } } catch
+         * (IOException e) { logger.warning(e.getMessage()); } return size;
+         */
     }
 
     @Override
@@ -360,8 +390,9 @@ public String getStorageLocation() throws IOException {
         String fullStorageLocation = dvObject.getStorageIdentifier();
         logger.fine("storageidentifier: " + fullStorageLocation);
         int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR);
-        if(driverIndex >=0) {
-          fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+        if (driverIndex >= 0) {
+            fullStorageLocation = fullStorageLocation
+                    .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
         }
         if (this.getDvObject() instanceof Dataset) {
             throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject");
@@ -411,7 +442,7 @@ public boolean downloadRedirectEnabled() {
         }
         return false;
     }
-    
+
     public boolean downloadRedirectEnabled(String auxObjectTag) {
         return baseStore.downloadRedirectEnabled(auxObjectTag);
     }
@@ -469,9 +500,10 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
         if (baseStore == null) {
             String baseDriverId = getBaseStoreIdFor(driverId);
             String fullStorageLocation = null;
-            String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
-            
-            if(dvObject  instanceof Dataset) {
+            String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type",
+                    DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+
+            if (dvObject instanceof Dataset) {
                 baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId);
             } else {
                 if (this.getDvObject() != null) {
@@ -486,8 +518,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                         break;
                     case DataAccess.FILE:
                         fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/"
-                                + fullStorageLocation;
+                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files")
+                                + "/" + fullStorageLocation;
                         break;
                     default:
                         logger.warning("Not Implemented: RemoteOverlay store with base store type: "
@@ -497,12 +529,12 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
 
                 } else if (storageLocation != null) {
                     // <remoteDriverId>://<baseStorageIdentifier>//<baseUrlPath>
-                    //remoteDriverId:// is removed if coming through directStorageIO
+                    // remoteDriverId:// is removed if coming through directStorageIO
                     int index = storageLocation.indexOf(DataAccess.SEPARATOR);
-                    if(index > 0) {
+                    if (index > 0) {
                         storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length());
                     }
-                    //THe base store needs the baseStoreIdentifier and not the relative URL
+                    // THe base store needs the baseStoreIdentifier and not the relative URL
                     fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//"));
 
                     switch (baseDriverType) {
@@ -513,8 +545,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                         break;
                     case DataAccess.FILE:
                         fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/"
-                                + fullStorageLocation;
+                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files")
+                                + "/" + fullStorageLocation;
                         break;
                     default:
                         logger.warning("Not Implemented: RemoteOverlay store with base store type: "
@@ -530,37 +562,41 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
         }
         remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name");
         try {
-          remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url"));
-        } catch(MalformedURLException mfue) {
+            remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url"));
+        } catch (MalformedURLException mfue) {
             logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId);
         }
     }
 
-    //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required.
+    // Convenience method to assemble the path, starting with the DOI
+    // authority/identifier/, that is needed to create a base store via
+    // DataAccess.getDirectStorageIO - the caller has to add the store type specific
+    // prefix required.
     private String getStoragePath() throws IOException {
         String fullStoragePath = dvObject.getStorageIdentifier();
         logger.fine("storageidentifier: " + fullStoragePath);
         int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR);
-        if(driverIndex >=0) {
-          fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+        if (driverIndex >= 0) {
+            fullStoragePath = fullStoragePath
+                    .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
         }
         int suffixIndex = fullStoragePath.indexOf("//");
-        if(suffixIndex >=0) {
-          fullStoragePath = fullStoragePath.substring(0, suffixIndex);
+        if (suffixIndex >= 0) {
+            fullStoragePath = fullStoragePath.substring(0, suffixIndex);
         }
         if (this.getDvObject() instanceof Dataset) {
             fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/"
                     + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath;
         } else if (this.getDvObject() instanceof DataFile) {
             fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/"
-                    + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; 
-        }else if (dvObject instanceof Dataverse) {
+                    + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath;
+        } else if (dvObject instanceof Dataverse) {
             throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
         }
         logger.fine("fullStoragePath: " + fullStoragePath);
         return fullStoragePath;
     }
-    
+
     public CloseableHttpClient getSharedHttpClient() {
         if (httpclient == null) {
             try {
@@ -622,11 +658,11 @@ protected static boolean isValidIdentifier(String driverId, String storageId) {
         String baseUri = System.getProperty("dataverse.files." + driverId + ".base-uri");
         try {
             URI absoluteURI = new URI(baseUri + "/" + urlPath);
-            if(!absoluteURI.normalize().toString().startsWith(baseUri)) {
+            if (!absoluteURI.normalize().toString().startsWith(baseUri)) {
                 logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId);
                 return false;
             }
-        } catch(URISyntaxException use) {
+        } catch (URISyntaxException use) {
             logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId);
             logger.warning(use.getLocalizedMessage());
             return false;
@@ -642,14 +678,27 @@ public static String getBaseStoreIdFor(String driverId) {
     public List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOException {
         return baseStore.cleanUp(filter, dryRun);
     }
-    
+
     public static void main(String[] args) {
         System.out.println("Running the main method");
         if (args.length > 0) {
             System.out.printf("List of arguments: {}", Arrays.toString(args));
         }
-        System.setProperty("dataverse.files.globus.base-uri", "12345/top");
+        System.setProperty("dataverse.files.globus.base-uri", "2791b83e-b989-47c5-a7fa-ce65fd949522");
         System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill"));
+        System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOlprRmxGejNTWDlkTVpUNk92ZmVJaFQyTWY0SDd4cXBoTDNSS29vUmRGVlE9");
+        System.setProperty("dataverse.files.globus.base-store","file");
+        System.setProperty("dataverse.files.file.type",
+                DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+        System.setProperty("dataverse.files.file.directory", "/tmp/files");
         logger.info(JvmSettings.BASE_URI.lookup("globus"));
+        try {
+            GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>("globus://1234//2791b83e-b989-47c5-a7fa-ce65fd949522/hdc1/image001.mrc", "globus");
+        logger.info("Size is " + gsio.getSizeFromGlobus());
+        
+        } catch (IOException e) {
+            // TODO Auto-generated catch block
+            e.printStackTrace();
+        }
     }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index 4fb895f5adc..eac8411c939 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -49,6 +49,7 @@ public enum JvmSettings {
     FILES_DIRECTORY(SCOPE_FILES, "directory"),
     FILES(SCOPE_FILES),
     BASE_URI(FILES, "base-uri"),
+    GLOBUS_TOKEN(FILES, "globus-token"),
     
     // SOLR INDEX SETTINGS
     SCOPE_SOLR(PREFIX, "solr"),

From 3c3378f5a3bf39eff13a582d0dc52a2a5549af8f Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 9 May 2023 14:53:25 -0400
Subject: [PATCH 005/546] start refactoring Globus bean

---
 .../dataaccess/GlobusOverlayAccessIO.java     | 28 +++++++++-----
 .../iq/dataverse/globus/AccessToken.java      |  2 +-
 .../dataverse/globus/GlobusServiceBean.java   | 37 +++++++++++--------
 3 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 050b9ddc176..0d7c5458e14 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -5,6 +5,8 @@
 import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.DvObject;
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
+import edu.harvard.iq.dataverse.globus.AccessToken;
+import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.UrlSignerUtil;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
@@ -28,10 +30,8 @@
 import java.util.Arrays;
 import java.util.List;
 import java.util.function.Predicate;
-import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import org.apache.commons.lang3.NotImplementedException;
 import org.apache.http.client.ClientProtocolException;
 import org.apache.http.client.config.CookieSpecs;
 import org.apache.http.client.config.RequestConfig;
@@ -83,6 +83,8 @@ public class GlobusOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
     private static boolean trustCerts = false;
     private int httpConcurrency = 4;
 
+    private String globusAccessToken = null;
+
     public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
         this.setIsLocalFile(false);
@@ -190,18 +192,19 @@ private long getSizeFromGlobus() {
         URI absoluteURI = null;
         try {
             int filenameStart = path.lastIndexOf("/") + 1;
-            int pathStart = endpointWithBasePath.indexOf("/") + 1;
-
-            String directoryPath = (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "")
+            int pathStart = endpointWithBasePath.indexOf("/");
+logger.info("endpointWithBasePath: " + endpointWithBasePath);
+            String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "")
                     + path.substring(0, filenameStart);
+            logger.info("directoryPath: " + directoryPath);
             String filename = path.substring(filenameStart);
             String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath;
 
-            absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + path + "&filter=name:" + filename);
+            absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + directoryPath + "&filter=name:" + filename);
             HttpGet get = new HttpGet(absoluteURI);
-            String token = JvmSettings.GLOBUS_TOKEN.lookup(driverId);
-            logger.info("Token is " + token);
-            get.addHeader("Authorization", "Bearer " + token);
+            
+            logger.info("Token is " + globusAccessToken);
+            get.addHeader("Authorization", "Bearer " + globusAccessToken);
             CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext);
             if (response.getStatusLine().getStatusCode() == 200) {
                 //Get reponse as string
@@ -482,6 +485,8 @@ int getUrlExpirationMinutes() {
     }
 
     private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
+        AccessToken accessToken = GlobusServiceBean.getClientToken(JvmSettings.GLOBUS_TOKEN.lookup(driverId));
+        globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken();
         endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId);
         logger.info("base-uri is " + endpointWithBasePath);
         if (endpointWithBasePath == null) {
@@ -692,8 +697,11 @@ public static void main(String[] args) {
                 DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
         System.setProperty("dataverse.files.file.directory", "/tmp/files");
         logger.info(JvmSettings.BASE_URI.lookup("globus"));
+        
+        
+        
         try {
-            GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>("globus://1234//2791b83e-b989-47c5-a7fa-ce65fd949522/hdc1/image001.mrc", "globus");
+            GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>("globus://1234///hdc1/image001.mrc", "globus");
         logger.info("Size is " + gsio.getSizeFromGlobus());
         
         } catch (IOException e) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java
index 877fc68e4a1..c93e2c6aa94 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java
@@ -46,7 +46,7 @@ String getRefreshToken() {
         return refreshToken;
     }
 
-    ArrayList<AccessToken> getOtherTokens() {
+    public ArrayList<AccessToken> getOtherTokens() {
         return otherTokens;
     }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 9d80c5cc280..c2137dd1f47 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -167,7 +167,8 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin
     public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException {
 
         if (ruleId.length() > 0) {
-            AccessToken clientTokenUser = getClientToken();
+            AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""));
+
             globusLogger.info("Start deleting permissions.");
             String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, "");
 
@@ -264,15 +265,21 @@ public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger glo
         return task;
     }
 
-    public AccessToken getClientToken() throws MalformedURLException {
-        String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "");
-        URL url = new URL(
-                "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials");
-
-        MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null);
+    public static AccessToken getClientToken(String globusBasicToken) {
+        URL url;
         AccessToken clientTokenUser = null;
-        if (result.status == 200) {
-            clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true);
+
+        try {
+            url = new URL(
+                    "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials");
+
+            MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null);
+            if (result.status == 200) {
+                clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true);
+            }
+        } catch (MalformedURLException e) {
+            // On a statically defined URL...
+            e.printStackTrace();
         }
         return clientTokenUser;
     }
@@ -306,7 +313,7 @@ public AccessToken getAccessToken(HttpServletRequest origRequest, String globusB
 
     }
 
-    public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method,
+    public static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method,
             String jsonString) {
         String str = null;
         HttpURLConnection connection = null;
@@ -359,7 +366,7 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode
 
     }
 
-    private StringBuilder readResultJson(InputStream in) {
+    private static StringBuilder readResultJson(InputStream in) {
         StringBuilder sb = null;
         try {
 
@@ -378,7 +385,7 @@ private StringBuilder readResultJson(InputStream in) {
         return sb;
     }
 
-    private <T> T parseJson(String sb, Class<T> jsonParserClass, boolean namingPolicy) {
+    private static <T> T parseJson(String sb, Class<T> jsonParserClass, boolean namingPolicy) {
         if (sb != null) {
             Gson gson = null;
             if (namingPolicy) {
@@ -420,7 +427,7 @@ public String getDirectory(String datasetId) {
 
     }
 
-    class MakeRequestResponse {
+    static class MakeRequestResponse {
         public String jsonResponse;
         public int status;
 
@@ -451,7 +458,7 @@ public boolean giveGlobusPublicPermissions(String datasetId)
         if (globusEndpoint.equals("") || globusBasicToken.equals("")) {
             return false;
         }
-        AccessToken clientTokenUser = getClientToken();
+        AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""));
         if (clientTokenUser == null) {
             logger.severe("Cannot get client token ");
             return false;
@@ -908,7 +915,7 @@ private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws
             try {
                 globusLogger.info("checking globus transfer task   " + taskId);
                 Thread.sleep(pollingInterval * 1000);
-                AccessToken clientTokenUser = getClientToken();
+                AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""));
                 // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId);
                 task = getTask(clientTokenUser, taskId, globusLogger);
                 if (task != null) {

From f14b75454a524fd8816d6f5367b0e15fbd0ded92 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 9 May 2023 14:53:56 -0400
Subject: [PATCH 006/546] enable globus store main() to run - will revert

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index e5b191f0ed7..4926f59f8a0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -184,7 +184,7 @@
         <dependency>
             <groupId>org.glassfish</groupId>
             <artifactId>jakarta.json</artifactId>
-            <scope>provided</scope>
+            <!--scope>provided</scope-->
             <!-- no version here as managed by Payara BOM above! -->
         </dependency>
         <dependency>

From 502e660fe342939a617edd6d17a425c83b5a269b Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 12 May 2023 13:22:46 -0400
Subject: [PATCH 007/546] suppress thumb generation after a failure

---
 .../edu/harvard/iq/dataverse/DvObject.java    | 14 +++++
 .../dataaccess/ImageThumbConverter.java       | 55 ++++++++++++-------
 .../dataverse/ingest/IngestServiceBean.java   |  4 +-
 .../V5.13.0.1__9506-track-thumb-failures.sql  |  1 +
 4 files changed, 54 insertions(+), 20 deletions(-)
 create mode 100644 src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql

diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
index 854888737ee..6cb3816e3f1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -181,7 +181,20 @@ public boolean isPreviewImageAvailable() {
     public void setPreviewImageAvailable(boolean status) {
         this.previewImageAvailable = status;
     }
+    
+    /** Indicates whether a previous attempt to generate a preview image has failed, regardless of size.
+     * If so, we won't want to try again every time the preview/thumbnail is requested for a view.
+     */
+    private boolean previewsHaveFailed;
+
+    public boolean isPreviewsHaveFailed() {
+        return previewsHaveFailed;
+    }
 
+    public void setPreviewsHaveFailed(boolean previewsHaveFailed) {
+        this.previewsHaveFailed = previewsHaveFailed;
+    }
+    
     public Timestamp getModificationTime() {
         return modificationTime;
     }
@@ -462,6 +475,7 @@ public void setStorageIdentifier(String storageIdentifier) {
      */
     public abstract boolean isAncestorOf( DvObject other );
     
+
     @OneToMany(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true)
     List<RoleAssignment> roleAssignments;
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index 2b4aed3a9a5..eb08646454d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -48,6 +48,7 @@
 import java.nio.channels.WritableByteChannel;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.logging.Level;
 import java.util.logging.Logger;
 import org.apache.commons.io.IOUtils;
 //import org.primefaces.util.Base64;
@@ -110,15 +111,24 @@ private static boolean isThumbnailAvailable(StorageIO<DataFile> storageIO, int s
         }
 
         if (isThumbnailCached(storageIO, size)) {
+            logger.fine("Found cached thumbnail for " + file.getId());
             return true;
         }
 
-        logger.fine("Checking for thumbnail, file type: " + file.getContentType());
-
-        if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) {
-            return generateImageThumbnail(storageIO, size);
-        } else if (file.getContentType().equalsIgnoreCase("application/pdf")) {
-            return generatePDFThumbnail(storageIO, size);
+        logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate thumbnail, file id: " + file.getId());
+        // Don't try to generate if there have been failures:
+        if (!file.isPreviewsHaveFailed()) {
+            boolean thumbnailGenerated = false;
+            if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) {
+                thumbnailGenerated = generateImageThumbnail(storageIO, size);
+            } else if (file.getContentType().equalsIgnoreCase("application/pdf")) {
+                thumbnailGenerated = generatePDFThumbnail(storageIO, size);
+            }
+            if (!thumbnailGenerated) {
+                logger.fine("No thumbnail generated for " + file.getId());
+                file.setPreviewGenerationHasPreviouslyFailed(true);
+            }
+            return thumbnailGenerated;
         }
 
         return false;
@@ -436,20 +446,27 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) {
         if (cachedThumbnailChannel == null) {
             logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size);
 
-            // try to generate, if not available: 
-            boolean generated = false;
-            if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) {
-                generated = generateImageThumbnail(storageIO, size);
-            } else if (file.getContentType().equalsIgnoreCase("application/pdf")) {
-                generated = generatePDFThumbnail(storageIO, size);
-            }
+            // try to generate, if not available and hasn't failed before
+            logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate base64 thumbnail, file id: " + file.getId());
+            if (!file.isPreviewsHaveFailed()) {
+                boolean generated = false;
+                if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) {
+                    generated = generateImageThumbnail(storageIO, size);
+                } else if (file.getContentType().equalsIgnoreCase("application/pdf")) {
+                    generated = generatePDFThumbnail(storageIO, size);
+                }
 
-            if (generated) {
-                // try to open again: 
-                try {
-                    cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size);
-                } catch (Exception ioEx) {
-                    cachedThumbnailChannel = null;
+                if (!generated) {
+                    // Record failure
+                    logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId());
+                    file.setPreviewGenerationHasPreviouslyFailed(true);
+                } else {
+                    // Success - try to open again:
+                    try {
+                        cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size);
+                    } catch (Exception ioEx) {
+                        cachedThumbnailChannel = null;
+                    }
                 }
             }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 5a353453fe8..fbe2d7b38ff 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -292,7 +292,9 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 
 								} catch (IOException ioex) {
 									logger.warning("Failed to save generated file " + generated.toString());
-								}
+									//Shouldn't mark this file as having a preview after this.
+									dataFile.setPreviewImageAvailable(false);
+                                }
 							}
 
 							// ... but we definitely want to delete it:
diff --git a/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql
new file mode 100644
index 00000000000..9b12d27db91
--- /dev/null
+++ b/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql
@@ -0,0 +1 @@
+ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewshavefailed BOOLEAN DEFAULT FALSE;
\ No newline at end of file

From 0fea5ccca11b2348429ddfee75e4bafc709c7473 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 12 May 2023 13:25:38 -0400
Subject: [PATCH 008/546] refactor error

---
 .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index eb08646454d..254c334d655 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -126,7 +126,7 @@ private static boolean isThumbnailAvailable(StorageIO<DataFile> storageIO, int s
             }
             if (!thumbnailGenerated) {
                 logger.fine("No thumbnail generated for " + file.getId());
-                file.setPreviewGenerationHasPreviouslyFailed(true);
+                file.setPreviewsHaveFailed(true);
             }
             return thumbnailGenerated;
         }
@@ -459,7 +459,7 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) {
                 if (!generated) {
                     // Record failure
                     logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId());
-                    file.setPreviewGenerationHasPreviouslyFailed(true);
+                    file.setPreviewsHaveFailed(true);
                 } else {
                     // Success - try to open again:
                     try {

From 8f5350ae0df4df60c55ff770259531935cb6ac9b Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Mon, 15 May 2023 10:32:21 -0400
Subject: [PATCH 009/546] cache isThumb available

---
 .../iq/dataverse/ThumbnailServiceWrapper.java       | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index 6c8db8c124b..e2bb21c8a4c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -49,6 +49,7 @@ public class ThumbnailServiceWrapper implements java.io.Serializable  {
     
     private Map<Long, String> dvobjectThumbnailsMap = new HashMap<>();
     private Map<Long, DvObject> dvobjectViewMap = new HashMap<>();
+    private Map<Long, Boolean> hasThumbMap = new HashMap<>();
 
     private String getAssignedDatasetImage(Dataset dataset, int size) {
         if (dataset == null) {
@@ -133,7 +134,7 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) {
 
             if ((!((DataFile)result.getEntity()).isRestricted()
                         || permissionsWrapper.hasDownloadFilePermission(result.getEntity()))
-                    && dataFileService.isThumbnailAvailable((DataFile) result.getEntity())) {
+                    && isThumbnailAvailable((DataFile) result.getEntity())) {
                 
                 cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64(
                         (DataFile) result.getEntity(),
@@ -159,6 +160,13 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) {
         return null;
     }
 
+    public boolean isThumbnailAvailable(DataFile entity) {
+        if(!hasThumbMap.containsKey(entity.getId())) {
+            hasThumbMap.put(entity.getId(), dataFileService.isThumbnailAvailable(entity));
+        }
+        return hasThumbMap.get(entity.getId());
+    }
+
     // it's the responsibility of the user - to make sure the search result
     // passed to this method is of the Dataset type!
     public String getDatasetCardImageAsBase64Url(SolrSearchResult result) {
@@ -295,7 +303,7 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo
                 }
             }
 
-            if (dataFileService.isThumbnailAvailable(thumbnailImageFile)) {
+            if (isThumbnailAvailable(thumbnailImageFile)) {
                 cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64(
                         thumbnailImageFile,
                         size);
@@ -323,6 +331,7 @@ public String getDataverseCardImageAsBase64Url(SolrSearchResult result) {
     public void resetObjectMaps() {
         dvobjectThumbnailsMap = new HashMap<>();
         dvobjectViewMap = new HashMap<>();
+        hasThumbMap = new HashMap<>();
     }
 
     

From 8604eef7f470eade8dbf885ed42bc47407db74ff Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Mon, 15 May 2023 13:22:18 -0400
Subject: [PATCH 010/546] set thumb fail column

---
 .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java   | 5 ++++-
 .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index 196f84b6877..a5822828682 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -1127,7 +1127,7 @@ public boolean isThumbnailAvailable (DataFile file) {
         }
         
         // If thumbnails are not even supported for this class of files, 
-        // there's notthing to talk about:      
+        // there's nothing to talk about:      
         if (!FileUtil.isThumbnailSupported(file)) {
             return false;
         }
@@ -1149,6 +1149,9 @@ public boolean isThumbnailAvailable (DataFile file) {
            file.setPreviewImageAvailable(true);
            this.save(file); 
            return true;
+       } else {
+           file.setPreviewsHaveFailed(true);
+           this.save(file);
        }
 
        return false;
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index 254c334d655..ab9294eea72 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -115,7 +115,7 @@ private static boolean isThumbnailAvailable(StorageIO<DataFile> storageIO, int s
             return true;
         }
 
-        logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate thumbnail, file id: " + file.getId());
+        logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId());
         // Don't try to generate if there have been failures:
         if (!file.isPreviewsHaveFailed()) {
             boolean thumbnailGenerated = false;

From aeae8f4ddbb05794c177e9b1d33725e1ed7d7e2f Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Mon, 15 May 2023 13:50:49 -0400
Subject: [PATCH 011/546] use thumb wrapper in edit and view files

---
 src/main/webapp/editFilesFragment.xhtml  | 4 ++--
 src/main/webapp/file-info-fragment.xhtml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml
index a4e635b8c14..af06b44e3bc 100644
--- a/src/main/webapp/editFilesFragment.xhtml
+++ b/src/main/webapp/editFilesFragment.xhtml
@@ -360,13 +360,13 @@
                                     <div class="pull-left col-file-thumb">
                                         <div class="thumbnail-block text-center">
                                             <!-- Thumbnail Preview -->
-                                            <span class="file-thumbnail-preview-img" jsf:rendered="#{!empty fileMetadata.dataFile.id and dataFileServiceBean.isThumbnailAvailable(fileMetadata.dataFile)}"
+                                            <span class="file-thumbnail-preview-img" jsf:rendered="#{!empty fileMetadata.dataFile.id and thumbnailServiceWrapper.isThumbnailAvailable(fileMetadata.dataFile)}"
                                                   data-container="body" data-toggle="popover" data-placement="top" data-trigger="hover" data-html="true" data-content="&lt;img src=&#34;/api/access/datafile/#{fileMetadata.dataFile.id}?imageThumb=400&#34; alt=&#34;#{bundle['file.preview']} #{fileMetadata.label}&#34; /&gt;"
                                                   data-template='&lt;div class="popover thumb-preview" role="tooltip"&gt;&lt;div class="arrow"&gt;&lt;/div&gt;&lt;h3 class="popover-title"&gt;&lt;/h3&gt;&lt;div class="popover-content"&gt;&lt;/div&gt;&lt;/div&gt;'>
                                                 <p:graphicImage value="/api/access/datafile/#{fileMetadata.dataFile.id}?imageThumb=true" alt="#{fileMetadata.label}"/>
                                             </span>
                                             <!-- Default Icon -->
-                                            <span class="icon-#{dataFileServiceBean.getFileThumbnailClass(fileMetadata.dataFile)} file-thumbnail-icon text-muted" jsf:rendered="#{(!empty fileMetadata.dataFile.id and !dataFileServiceBean.isThumbnailAvailable(fileMetadata.dataFile)) or (empty fileMetadata.dataFile.id and !fileMetadata.dataFile.previewImageAvailable)}"/>
+                                            <span class="icon-#{dataFileServiceBean.getFileThumbnailClass(fileMetadata.dataFile)} file-thumbnail-icon text-muted" jsf:rendered="#{(!empty fileMetadata.dataFile.id and !thumbnailServiceWrapper.isThumbnailAvailable(fileMetadata.dataFile)) or (empty fileMetadata.dataFile.id and !fileMetadata.dataFile.previewImageAvailable)}"/>
                                             <ui:fragment rendered="#{empty fileMetadata.dataFile.id and !empty fileMetadata.dataFile.storageIdentifier and fileMetadata.dataFile.previewImageAvailable}">
                                                 <img src="#{EditDatafilesPage.getTemporaryPreviewAsBase64(fileMetadata.dataFile.storageIdentifier)}" alt="#{fileMetadata.label}"/>
                                                 <h:outputText id="imgPreview" value="#{bundle['preview']}" styleClass="bg-info text-info text-center show"/>
diff --git a/src/main/webapp/file-info-fragment.xhtml b/src/main/webapp/file-info-fragment.xhtml
index 33a8d2c3ca5..3e8e80d51e7 100644
--- a/src/main/webapp/file-info-fragment.xhtml
+++ b/src/main/webapp/file-info-fragment.xhtml
@@ -28,8 +28,8 @@
     <p:outputPanel id="fileInfoInclude-filesTable" styleClass="media" rendered="#{fileMetadata != null}">
         <div class="media-left col-file-thumb" style="padding-top:4px;">
             <div class="media-object thumbnail-block text-center">
-                <span class="icon-#{dataFileServiceBean.getFileThumbnailClass(fileMetadata.dataFile)} file-thumbnail-icon text-muted" jsf:rendered="#{!fileDownloadHelper.canDownloadFile(fileMetadata) or !dataFileServiceBean.isThumbnailAvailable(fileMetadata.dataFile)}"/>
-                <span class="file-thumbnail-preview-img" jsf:rendered="#{fileDownloadHelper.canDownloadFile(fileMetadata) and dataFileServiceBean.isThumbnailAvailable(fileMetadata.dataFile)}"
+                <span class="icon-#{dataFileServiceBean.getFileThumbnailClass(fileMetadata.dataFile)} file-thumbnail-icon text-muted" jsf:rendered="#{!fileDownloadHelper.canDownloadFile(fileMetadata) or !thumbnailServiceWrapper.isThumbnailAvailable(fileMetadata.dataFile)}"/>
+                <span class="file-thumbnail-preview-img" jsf:rendered="#{fileDownloadHelper.canDownloadFile(fileMetadata) and thumbnailServiceWrapper.isThumbnailAvailable(fileMetadata.dataFile)}"
                       data-toggle="popover" data-placement="auto top" data-trigger="hover" data-html="true" data-content="&lt;img src=&#34;/api/access/datafile/#{fileMetadata.dataFile.id}?imageThumb=400&#34; alt=&#34;#{bundle['file.preview']} #{fileMetadata.label}&#34; /&gt;"
                       data-template='&lt;div class="popover thumb-preview" role="tooltip"&gt;&lt;div class="arrow"&gt;&lt;/div&gt;&lt;h3 class="popover-title"&gt;&lt;/h3&gt;&lt;div class="popover-content"&gt;&lt;/div&gt;&lt;/div&gt;'>
                     <p:graphicImage value="/api/access/datafile/#{fileMetadata.dataFile.id}?imageThumb=true" alt="#{fileMetadata.label}"/>

From c4ad20bc4b67b93908e60b76a251240f4a6e2540 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 17 May 2023 13:49:35 -0400
Subject: [PATCH 012/546] add api

---
 .../edu/harvard/iq/dataverse/api/Admin.java   | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
index d219339add9..14c556e9caa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
@@ -2321,4 +2321,26 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur
         return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl));
     }
  
+    @DELETE
+    @Path("/clearThumbnailFailureFlag")
+    public Response clearThumbnailFailureFlag() {
+        em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE").executeUpdate();
+        return ok("Thumnail Failure Flags cleared.");
+    }
+    
+    @DELETE
+    @Path("/clearThumbnailFailureFlag/{id}")
+    public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String fileId) {
+        try {
+            DataFile df = findDataFileOrDie(fileId);
+            Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE where id = ?");
+            deleteQuery.setParameter(1, df.getId());
+            deleteQuery.executeUpdate();
+            return ok("Thumnail Failure Flag cleared for file id=: " + df.getId() + ".");
+        } catch (WrappedResponse r) {
+            logger.info("Could not find file with the id: " + fileId);
+            return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId);
+        }
+    }
+    
 }

From 63e98b3b60a4baae98f1f88a282b97694929c443 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 17 May 2023 14:16:47 -0400
Subject: [PATCH 013/546] make clearer

---
 .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java  | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index a5822828682..f41565c9449 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -1149,11 +1149,9 @@ public boolean isThumbnailAvailable (DataFile file) {
            file.setPreviewImageAvailable(true);
            this.save(file); 
            return true;
-       } else {
-           file.setPreviewsHaveFailed(true);
-           this.save(file);
        }
-
+       file.setPreviewsHaveFailed(true);
+       this.save(file);
        return false;
     }
 

From 2671cb75effb5425d02b3e874c7525b7833dc533 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 17 May 2023 14:25:58 -0400
Subject: [PATCH 014/546] update comment

---
 src/main/java/edu/harvard/iq/dataverse/DvObject.java | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
index 6cb3816e3f1..87619450133 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -182,8 +182,11 @@ public void setPreviewImageAvailable(boolean status) {
         this.previewImageAvailable = status;
     }
     
-    /** Indicates whether a previous attempt to generate a preview image has failed, regardless of size.
-     * If so, we won't want to try again every time the preview/thumbnail is requested for a view.
+    /**
+     * Indicates whether a previous attempt to generate a preview image has failed,
+     * regardless of size. This could be due to the file not being accessible, or a
+     * real failure in generating the thumbnail. In both cases, we won't want to try
+     * again every time the preview/thumbnail is requested for a view.
      */
     private boolean previewsHaveFailed;
 

From 19db99b1427700c9cc4ad462c0edd017e6dd5799 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 17 May 2023 14:26:28 -0400
Subject: [PATCH 015/546] remove setting flag where datafile is not clearly
 being saved to db

---
 .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index ab9294eea72..921faba7989 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -126,7 +126,6 @@ private static boolean isThumbnailAvailable(StorageIO<DataFile> storageIO, int s
             }
             if (!thumbnailGenerated) {
                 logger.fine("No thumbnail generated for " + file.getId());
-                file.setPreviewsHaveFailed(true);
             }
             return thumbnailGenerated;
         }
@@ -459,7 +458,6 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) {
                 if (!generated) {
                     // Record failure
                     logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId());
-                    file.setPreviewsHaveFailed(true);
                 } else {
                     // Success - try to open again:
                     try {

From 156d025970eeb5223b6fd8343db09cafee057fed Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Thu, 1 Jun 2023 15:09:25 -0400
Subject: [PATCH 016/546] fix non-merge-able error when recording thumb fail

---
 .../iq/dataverse/DataFileServiceBean.java     | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index f41565c9449..880b2ea7dc4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -1142,17 +1142,17 @@ public boolean isThumbnailAvailable (DataFile file) {
          is more important... 
         
         */
-                
         
-       if (ImageThumbConverter.isThumbnailAvailable(file)) {
-           file = this.find(file.getId());
-           file.setPreviewImageAvailable(true);
-           this.save(file); 
-           return true;
-       }
-       file.setPreviewsHaveFailed(true);
-       this.save(file);
-       return false;
+        file = this.find(file.getId());
+        if (ImageThumbConverter.isThumbnailAvailable(file)) {
+            file.setPreviewImageAvailable(true);
+            this.save(file);
+            return true;
+        } else {
+            file.setPreviewsHaveFailed(true);
+            this.save(file);
+            return false;
+        }
     }
 
     

From 97aa46cb3e9bd2d424961e68e9d024216740c57f Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Tue, 13 Jun 2023 16:50:38 -0400
Subject: [PATCH 017/546] rename script

---
 ...humb-failures.sql => V5.13.0.2__9506-track-thumb-failures.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V5.13.0.1__9506-track-thumb-failures.sql => V5.13.0.2__9506-track-thumb-failures.sql} (100%)

diff --git a/src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V5.13.0.2__9506-track-thumb-failures.sql
similarity index 100%
rename from src/main/resources/db/migration/V5.13.0.1__9506-track-thumb-failures.sql
rename to src/main/resources/db/migration/V5.13.0.2__9506-track-thumb-failures.sql

From dbc36c9d938571a5b61156611c445d266fbafe76 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Tue, 13 Jun 2023 17:06:19 -0400
Subject: [PATCH 018/546] refactor - remove duplicate code

---
 .../dataaccess/ImageThumbConverter.java       | 29 ++++++-------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index 921faba7989..fb0785ffd7b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -114,7 +114,11 @@ private static boolean isThumbnailAvailable(StorageIO<DataFile> storageIO, int s
             logger.fine("Found cached thumbnail for " + file.getId());
             return true;
         }
+        return generateThumbnail(storageIO, size);
 
+    }
+
+    private static boolean generateThumbnail(StorageIO<DataFile> storageIO, int size) {
         logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId());
         // Don't try to generate if there have been failures:
         if (!file.isPreviewsHaveFailed()) {
@@ -131,7 +135,6 @@ private static boolean isThumbnailAvailable(StorageIO<DataFile> storageIO, int s
         }
 
         return false;
-
     }
 
     // Note that this method works on ALL file types for which thumbnail 
@@ -446,25 +449,11 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) {
             logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size);
 
             // try to generate, if not available and hasn't failed before
-            logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + "to generate base64 thumbnail, file id: " + file.getId());
-            if (!file.isPreviewsHaveFailed()) {
-                boolean generated = false;
-                if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) {
-                    generated = generateImageThumbnail(storageIO, size);
-                } else if (file.getContentType().equalsIgnoreCase("application/pdf")) {
-                    generated = generatePDFThumbnail(storageIO, size);
-                }
-
-                if (!generated) {
-                    // Record failure
-                    logger.fine("Failed to generate base64 thumbnail for file id: " + file.getId());
-                } else {
-                    // Success - try to open again:
-                    try {
-                        cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size);
-                    } catch (Exception ioEx) {
-                        cachedThumbnailChannel = null;
-                    }
+            if(generateThumbnail(storageIO, size)) {
+                try {
+                    cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size);
+                } catch (Exception ioEx) {
+                    cachedThumbnailChannel = null;
                 }
             }
 

From 0c8972304a43c25ed1de1c5cc6cc1c09ef419948 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 14 Jun 2023 10:30:05 -0400
Subject: [PATCH 019/546] try ds logos as url requests

---
 .../iq/dataverse/ThumbnailServiceWrapper.java    | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index e2bb21c8a4c..66f79472178 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -5,6 +5,7 @@
  */
 package edu.harvard.iq.dataverse;
 
+import edu.harvard.iq.dataverse.api.Datasets;
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
@@ -12,7 +13,8 @@
 import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail;
 import edu.harvard.iq.dataverse.search.SolrSearchResult;
 import edu.harvard.iq.dataverse.util.FileUtil;
-import java.io.File;
+import edu.harvard.iq.dataverse.util.SystemConfig;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Files;
@@ -21,6 +23,8 @@
 import java.util.Base64;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.logging.Logger;
+
 import javax.ejb.EJB;
 import javax.enterprise.context.RequestScoped;
 import javax.faces.view.ViewScoped;
@@ -36,6 +40,9 @@
 @RequestScoped
 @Named
 public class ThumbnailServiceWrapper implements java.io.Serializable  {
+    
+    private static final Logger logger = Logger.getLogger(ThumbnailServiceWrapper.class.getCanonicalName());
+    
     @Inject
     PermissionsWrapper permissionsWrapper;
     @EJB
@@ -214,7 +221,13 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo
             this.dvobjectThumbnailsMap.put(datasetId, "");
             return null; 
         }
+
+        String url = SystemConfig.getDataverseSiteUrlStatic() + "/datasets/" + dataset.getId() + "/logo";
+        logger.fine("getDatasetCardImageAsBase64Url: " + url);
+        this.dvobjectThumbnailsMap.put(datasetId,url);
+        return url;
         
+/*        
         String cardImageUrl = null;
         StorageIO<Dataset> dataAccess = null;
                 
@@ -320,6 +333,7 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo
         //logger.info("dataset id " + result.getEntityId() + ", returning " + cardImageUrl);
 
         return cardImageUrl;
+        */
     }
     
     // it's the responsibility of the user - to make sure the search result

From dc4b6ae5201af228b1b484c6dd430713f8728ccc Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 14 Jun 2023 17:19:41 -0400
Subject: [PATCH 020/546] set the datasetid for search cards

---
 .../java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index 66f79472178..4c3778527d7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -194,6 +194,7 @@ public String getDatasetCardImageAsBase64Url(SolrSearchResult result) {
             return null;
         }
         Dataset dataset = (Dataset)result.getEntity();
+        dataset.setId(result.getEntityId());
         
         Long versionId = result.getDatasetVersionId();
 

From 546cfdf2048158320e76a9345e9ebc3caf7ca6c2 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Thu, 1 Jun 2023 15:09:25 -0400
Subject: [PATCH 021/546] fix non-merge-able error when recording thumb fail

---
 .../java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index 880b2ea7dc4..ec12480d28d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -1148,11 +1148,10 @@ public boolean isThumbnailAvailable (DataFile file) {
             file.setPreviewImageAvailable(true);
             this.save(file);
             return true;
-        } else {
-            file.setPreviewsHaveFailed(true);
-            this.save(file);
-            return false;
         }
+        file.setPreviewsHaveFailed(true);
+        this.save(file);
+        return false;
     }
 
     

From d3a48dffdfaa56bba065b3c36a2b6469e4227c33 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 14 Jun 2023 17:44:02 -0400
Subject: [PATCH 022/546] typo

---
 .../java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index 4c3778527d7..8dda91fd6a3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -223,7 +223,7 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo
             return null; 
         }
 
-        String url = SystemConfig.getDataverseSiteUrlStatic() + "/datasets/" + dataset.getId() + "/logo";
+        String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo";
         logger.fine("getDatasetCardImageAsBase64Url: " + url);
         this.dvobjectThumbnailsMap.put(datasetId,url);
         return url;

From f505428f12a5ead774642837bdb871deda34ee27 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Mon, 19 Jun 2023 13:13:01 -0400
Subject: [PATCH 023/546] only send url if thumb should exist

---
 .../iq/dataverse/ThumbnailServiceWrapper.java     | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index 8dda91fd6a3..19c53ffa77e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -8,6 +8,7 @@
 import edu.harvard.iq.dataverse.api.Datasets;
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.dataset.DatasetUtil;
 import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
 import edu.harvard.iq.dataverse.dataset.DatasetUtil;
 import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail;
@@ -222,6 +223,20 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo
             this.dvobjectThumbnailsMap.put(datasetId, "");
             return null; 
         }
+        DataFile thumbnailFile = dataset.getThumbnailFile();
+
+        if (thumbnailFile == null) {
+                thumbnailFile = DatasetUtil.attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null);
+                if (thumbnailFile == null) {
+                    logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo available that could be selected automatically.");
+                    return null;
+                }
+        }
+        if (thumbnailFile.isRestricted()) {
+            logger.fine("Dataset (id :" + dataset.getId() + ") has a logo the user selected but the file must have later been restricted. Returning null.");
+            return null;
+        }
+        
 
         String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo";
         logger.fine("getDatasetCardImageAsBase64Url: " + url);

From 2d177a60fe67df26bafad35cf237e048a21545ee Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Mon, 19 Jun 2023 15:08:15 -0400
Subject: [PATCH 024/546] use inputStream.transferTo

---
 .../dataaccess/ImageThumbConverter.java       | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index fb0785ffd7b..bd87c5541a5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -223,30 +223,32 @@ private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int s
         }
 
         if (tempFilesRequired) {
-            ReadableByteChannel pdfFileChannel;
-
+            //ReadableByteChannel pdfFileChannel;
+            InputStream inputStream = null; 
             try {
                 storageIO.open();
-                //inputStream = storageIO.getInputStream();
-                pdfFileChannel = storageIO.getReadChannel();
+                inputStream = storageIO.getInputStream();
+                //pdfFileChannel = storageIO.getReadChannel();
             } catch (Exception ioex) {
                 logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier());
                 return false;
             }
 
             File tempFile;
-            FileChannel tempFileChannel = null;
+            OutputStream outputStream = null;
+            //FileChannel tempFileChannel = null;
             try {
                 tempFile = File.createTempFile("tempFileToRescale", ".tmp");
-                tempFileChannel = new FileOutputStream(tempFile).getChannel();
+                outputStream = new FileOutputStream(tempFile);
+                inputStream.transferTo(outputStream);
 
-                tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize());
+                //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize());
             } catch (IOException ioex) {
                 logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file.");
                 return false;
             } finally {
-                IOUtils.closeQuietly(tempFileChannel);
-                IOUtils.closeQuietly(pdfFileChannel);
+                IOUtils.closeQuietly(inputStream);
+                IOUtils.closeQuietly(outputStream);
             }
             sourcePdfFile = tempFile;
         }

From 6540b5da0966addffa3a0a6a9d7e67735f89e237 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Mon, 19 Jun 2023 15:42:29 -0400
Subject: [PATCH 025/546] add debug

---
 .../harvard/iq/dataverse/dataaccess/ImageThumbConverter.java | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index bd87c5541a5..4a2b8ea0e6d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -240,7 +240,8 @@ private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int s
             try {
                 tempFile = File.createTempFile("tempFileToRescale", ".tmp");
                 outputStream = new FileOutputStream(tempFile);
-                inputStream.transferTo(outputStream);
+                long sz = inputStream.transferTo(outputStream);
+                logger.info(" wrote " + sz + " bytes to " + tempFile.getAbsolutePath());
 
                 //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize());
             } catch (IOException ioex) {
@@ -763,7 +764,7 @@ public static String generatePDFThumbnailFromFile(String fileLocation, int size)
             try {
                 fileSize = new File(fileLocation).length();
             } catch (Exception ex) {
-                // 
+               logger.warning("Can't open file: " + fileLocation);
             }
 
             if (fileSize == 0 || fileSize > sizeLimit) {

From e202d0abc7395fe85218745510b32ade9b6ca770 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Mon, 19 Jun 2023 16:15:58 -0400
Subject: [PATCH 026/546] more debug

---
 .../iq/dataverse/dataaccess/ImageThumbConverter.java        | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index 4a2b8ea0e6d..3033269f3bc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -196,6 +196,7 @@ private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int s
         // We rely on ImageMagick to convert PDFs; so if it's not installed, 
         // better give up right away: 
         if (!isImageMagickInstalled()) {
+            logger.info("Couldn't find IM");
             return false;
         }
 
@@ -218,12 +219,15 @@ private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int s
             tempFilesRequired = true;
 
         } catch (IOException ioex) {
+            logger.warning(ioex.getMessage());
+            ioex.printStackTrace();
             // this on the other hand is likely a fatal condition :(
             return false;
         }
 
         if (tempFilesRequired) {
             //ReadableByteChannel pdfFileChannel;
+            logger.info("Creating temp file");
             InputStream inputStream = null; 
             try {
                 storageIO.open();
@@ -241,7 +245,7 @@ private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int s
                 tempFile = File.createTempFile("tempFileToRescale", ".tmp");
                 outputStream = new FileOutputStream(tempFile);
                 long sz = inputStream.transferTo(outputStream);
-                logger.info(" wrote " + sz + " bytes to " + tempFile.getAbsolutePath());
+                logger.info("Wrote " + sz + " bytes to " + tempFile.getAbsolutePath());
 
                 //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize());
             } catch (IOException ioex) {

From b9cd2bbf0c42fb4e7aada29d7cea817c195ca75d Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Tue, 20 Jun 2023 10:22:05 -0400
Subject: [PATCH 027/546] include failed preview flag in queries

---
 .../edu/harvard/iq/dataverse/DatasetVersionServiceBean.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
index 439e4b17ed4..0bd0a01aef1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
@@ -762,7 +762,7 @@ public Long getThumbnailByVersionId(Long versionId) {
                         + "AND df.id = o.id "
                         + "AND fm.datasetversion_id = dv.id "
                         + "AND fm.datafile_id = df.id "
-                        // + "AND o.previewImageAvailable = false "
+                        + "AND o.previewshavefailed = false "
                         + "AND df.restricted = false "
                         + "AND df.embargo_id is null "
                         + "AND df.contenttype LIKE 'image/%' "
@@ -796,7 +796,7 @@ public Long getThumbnailByVersionId(Long versionId) {
                         + "AND df.id = o.id "
                         + "AND fm.datasetversion_id = dv.id "
                         + "AND fm.datafile_id = df.id "
-                        // + "AND o.previewImageAvailable = false "
+                        + "AND o.previewshavefailed = false "
                         + "AND df.restricted = false "
                         + "AND df.embargo_id is null "
                         + "AND df.contenttype = 'application/pdf' "

From ac5a9564848ba241a993e8e9252641820e9041b4 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Tue, 20 Jun 2023 10:22:59 -0400
Subject: [PATCH 028/546] use getThumbnailByVersionId

---
 .../iq/dataverse/ThumbnailServiceWrapper.java   | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index 19c53ffa77e..ff5e510e82c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -226,23 +226,20 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo
         DataFile thumbnailFile = dataset.getThumbnailFile();
 
         if (thumbnailFile == null) {
-                thumbnailFile = DatasetUtil.attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null);
-                if (thumbnailFile == null) {
-                    logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo available that could be selected automatically.");
-                    return null;
-                }
-        }
-        if (thumbnailFile.isRestricted()) {
-            logger.fine("Dataset (id :" + dataset.getId() + ") has a logo the user selected but the file must have later been restricted. Returning null.");
-            return null;
+
+            // We attempt to auto-select via the optimized, native query-based method
+            // from the DatasetVersionService:
+            if (datasetVersionService.getThumbnailByVersionId(versionId) == null) {
+                return null;
+            }
         }
-        
 
         String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo";
         logger.fine("getDatasetCardImageAsBase64Url: " + url);
         this.dvobjectThumbnailsMap.put(datasetId,url);
         return url;
         
+
 /*        
         String cardImageUrl = null;
         StorageIO<Dataset> dataAccess = null;

From 98acd6b50af770779329de1201663d8599edf16a Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Tue, 20 Jun 2023 10:49:24 -0400
Subject: [PATCH 029/546] cleanup

---
 .../dataverse/dataaccess/ImageThumbConverter.java   | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index 3033269f3bc..458b8da227b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -196,7 +196,7 @@ private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int s
         // We rely on ImageMagick to convert PDFs; so if it's not installed, 
         // better give up right away: 
         if (!isImageMagickInstalled()) {
-            logger.info("Couldn't find IM");
+            logger.fine("Couldn't find ImageMagick");
             return false;
         }
 
@@ -220,19 +220,15 @@ private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int s
 
         } catch (IOException ioex) {
             logger.warning(ioex.getMessage());
-            ioex.printStackTrace();
             // this on the other hand is likely a fatal condition :(
             return false;
         }
 
         if (tempFilesRequired) {
-            //ReadableByteChannel pdfFileChannel;
-            logger.info("Creating temp file");
             InputStream inputStream = null; 
             try {
                 storageIO.open();
                 inputStream = storageIO.getInputStream();
-                //pdfFileChannel = storageIO.getReadChannel();
             } catch (Exception ioex) {
                 logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier());
                 return false;
@@ -240,14 +236,11 @@ private static boolean generatePDFThumbnail(StorageIO<DataFile> storageIO, int s
 
             File tempFile;
             OutputStream outputStream = null;
-            //FileChannel tempFileChannel = null;
             try {
                 tempFile = File.createTempFile("tempFileToRescale", ".tmp");
                 outputStream = new FileOutputStream(tempFile);
-                long sz = inputStream.transferTo(outputStream);
-                logger.info("Wrote " + sz + " bytes to " + tempFile.getAbsolutePath());
-
-                //tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize());
+                //Reads/transfers all bytes from the input stream to the output stream. 
+                inputStream.transferTo(outputStream);
             } catch (IOException ioex) {
                 logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file.");
                 return false;

From 610c65dc9ddd403041ee95475810db2977e57623 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 21 Jun 2023 12:56:13 -0400
Subject: [PATCH 030/546] rename and cleanup

---
 .../edu/harvard/iq/dataverse/DatasetPage.java |   2 +-
 .../iq/dataverse/DataverseServiceBean.java    |  45 -------
 .../iq/dataverse/ThumbnailServiceWrapper.java | 117 +-----------------
 .../search/SearchIncludeFragment.java         |   2 +-
 4 files changed, 6 insertions(+), 160 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 3d608153ba3..2ca1fb825f5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -483,7 +483,7 @@ public String getThumbnailString() {
 
             thumbnailString = datasetThumbnail.getBase64image();
         } else {
-            thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(dataset,
+            thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsUrl(dataset,
                     workingVersion.getId(),
                     !workingVersion.isDraft(),
                     ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE);
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index e092f209acd..e99458fbc9d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -346,51 +346,6 @@ public String getDataverseLogoThumbnailAsBase64ById(Long dvId) {
         } 
         return null;
     }
-    
-    /*
-    public boolean isDataverseLogoThumbnailAvailable(Dataverse dataverse, User user) {    
-        if (dataverse == null) {
-            return false; 
-        }
-                
-        // First, check if the dataverse has a defined logo: 
-        
-        //if (dataverse.getDataverseTheme() != null && dataverse.getDataverseTheme().getLogo() != null && !dataverse.getDataverseTheme().getLogo().equals("")) {
-            File dataverseLogoFile = getLogo(dataverse);
-            if (dataverseLogoFile != null) {
-                String logoThumbNailPath = null;
-
-                if (dataverseLogoFile.exists()) {
-                    logoThumbNailPath = ImageThumbConverter.generateImageThumbnailFromFile(dataverseLogoFile.getAbsolutePath(), 48);
-                    if (logoThumbNailPath != null) {
-                        return true;
-                    }
-                }
-            }
-        //}
-        */
-        // If there's no uploaded logo for this dataverse, go through its 
-        // [released] datasets and see if any of them have card images:
-        // 
-        // TODO:
-        // Discuss/Decide if we really want to do this - i.e., go through every
-        // file in every dataset below... 
-        // -- L.A. 4.0 beta14
-        /*
-        for (Dataset dataset : datasetService.findPublishedByOwnerId(dataverse.getId())) {
-            if (dataset != null) {
-                DatasetVersion releasedVersion = dataset.getReleasedVersion();
-                
-                if (releasedVersion != null) {
-                    if (datasetService.isDatasetCardImageAvailable(releasedVersion, user)) {
-                        return true;
-                    }
-                }
-            }
-        }   */     
-        /*
-        return false; 
-    } */
         
     private File getLogo(Dataverse dataverse) {
         if (dataverse.getId() == null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index ff5e510e82c..c75c29ea094 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -177,7 +177,7 @@ public boolean isThumbnailAvailable(DataFile entity) {
 
     // it's the responsibility of the user - to make sure the search result
     // passed to this method is of the Dataset type!
-    public String getDatasetCardImageAsBase64Url(SolrSearchResult result) {
+    public String getDatasetCardImageAsUrl(SolrSearchResult result) {
         // Before we do anything else, check if it's a harvested dataset; 
         // no need to check anything else if so (harvested datasets never have 
         // thumbnails)
@@ -199,10 +199,10 @@ public String getDatasetCardImageAsBase64Url(SolrSearchResult result) {
         
         Long versionId = result.getDatasetVersionId();
 
-        return getDatasetCardImageAsBase64Url(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
+        return getDatasetCardImageAsUrl(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
     }
     
-    public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, boolean autoselect, int size) {
+    public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean autoselect, int size) {
         Long datasetId = dataset.getId();
         if (datasetId != null) {
             if (this.dvobjectThumbnailsMap.containsKey(datasetId)) {
@@ -235,118 +235,9 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo
         }
 
         String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo";
-        logger.fine("getDatasetCardImageAsBase64Url: " + url);
+        logger.fine("getDatasetCardImageAsUrl: " + url);
         this.dvobjectThumbnailsMap.put(datasetId,url);
         return url;
-        
-
-/*        
-        String cardImageUrl = null;
-        StorageIO<Dataset> dataAccess = null;
-                
-        try{
-            dataAccess = DataAccess.getStorageIO(dataset);
-        }
-        catch(IOException ioex){
-          // ignore
-        }
-        
-        InputStream in = null;
-        // See if the dataset already has a dedicated thumbnail ("logo") saved as
-        // an auxilary file on the dataset level: 
-        // (don't bother checking if it exists; just try to open the input stream)
-        try {
-                in = dataAccess.getAuxFileAsInputStream(datasetLogoThumbnail + ".thumb" + size);
-                        //thumb48addedByImageThumbConverter);
-        } catch (Exception ioex) {
-              //ignore
-        }
-        
-        if (in != null) {
-            try {
-                byte[] bytes = IOUtils.toByteArray(in);
-                String base64image = Base64.getEncoder().encodeToString(bytes);
-                cardImageUrl = FileUtil.DATA_URI_SCHEME + base64image;
-                this.dvobjectThumbnailsMap.put(datasetId, cardImageUrl);
-                return cardImageUrl;
-            } catch (IOException ex) {
-                this.dvobjectThumbnailsMap.put(datasetId, "");
-                return null; 
-                // (alternatively, we could ignore the exception, and proceed with the 
-                // regular process of selecting the thumbnail from the available 
-                // image files - ?)
-            } finally
-	    {
-		    IOUtils.closeQuietly(in);
-	    }
-        } 
-
-        // If not, see if the dataset has one of its image files already assigned
-        // to be the designated thumbnail:
-        cardImageUrl = this.getAssignedDatasetImage(dataset, size);
-
-        if (cardImageUrl != null) {
-            //logger.info("dataset id " + result.getEntity().getId() + " has a dedicated image assigned; returning " + cardImageUrl);
-            return cardImageUrl;
-        }
-        
-        // And finally, try to auto-select the thumbnail (unless instructed not to):
-        
-        if (!autoselect) {
-            return null;
-        }
-
-        // We attempt to auto-select via the optimized, native query-based method 
-        // from the DatasetVersionService:
-        Long thumbnailImageFileId = datasetVersionService.getThumbnailByVersionId(versionId);
-
-        if (thumbnailImageFileId != null) {
-            //cardImageUrl = FILE_CARD_IMAGE_URL + thumbnailImageFileId;
-            if (this.dvobjectThumbnailsMap.containsKey(thumbnailImageFileId)) {
-                // Yes, return previous answer
-                //logger.info("using cached result for ... "+datasetId);
-                if (!"".equals(this.dvobjectThumbnailsMap.get(thumbnailImageFileId))) {
-                    return this.dvobjectThumbnailsMap.get(thumbnailImageFileId);
-                }
-                return null;
-            }
-
-            DataFile thumbnailImageFile = null;
-
-            if (dvobjectViewMap.containsKey(thumbnailImageFileId)
-                    && dvobjectViewMap.get(thumbnailImageFileId).isInstanceofDataFile()) {
-                thumbnailImageFile = (DataFile) dvobjectViewMap.get(thumbnailImageFileId);
-            } else {
-                thumbnailImageFile = dataFileService.findCheapAndEasy(thumbnailImageFileId);
-                if (thumbnailImageFile != null) {
-                    // TODO:
-                    // do we need this file on the map? - it may not even produce
-                    // a thumbnail!
-                    dvobjectViewMap.put(thumbnailImageFileId, thumbnailImageFile);
-                } else {
-                    this.dvobjectThumbnailsMap.put(thumbnailImageFileId, "");
-                    return null;
-                }
-            }
-
-            if (isThumbnailAvailable(thumbnailImageFile)) {
-                cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64(
-                        thumbnailImageFile,
-                        size);
-                        //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
-            }
-
-            if (cardImageUrl != null) {
-                this.dvobjectThumbnailsMap.put(thumbnailImageFileId, cardImageUrl);
-            } else {
-                this.dvobjectThumbnailsMap.put(thumbnailImageFileId, "");
-            }
-        }
-
-        //logger.info("dataset id " + result.getEntityId() + ", returning " + cardImageUrl);
-
-        return cardImageUrl;
-        */
     }
     
     // it's the responsibility of the user - to make sure the search result
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index bfe397cf48c..99fe4cd979b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -1302,7 +1302,7 @@ public void setDisplayCardValues() {
                 result.setImageUrl(thumbnailServiceWrapper.getDataverseCardImageAsBase64Url(result));
             } else if (result.getType().equals("datasets")) {
                 if (result.getEntity() != null) {
-                    result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(result));
+                    result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsUrl(result));
                 }
                 
                 if (result.isHarvested()) {

From 391504de43d8992e4b97d506fdfc763e512a8fc4 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 21 Jun 2023 13:46:35 -0400
Subject: [PATCH 031/546] api docs

---
 doc/sphinx-guides/source/api/native-api.rst | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index b39cf91337a..24f6c0d4ced 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -4649,3 +4649,23 @@ A curl example using an ``ID``
   curl -X POST -H 'Content-Type:application/json' -d "$JSON" $SERVER_URL/api/admin/feedback
 
 Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`.
+
+.. _thumbnail_reset:
+
+Reset Thumbnail Failure Flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If Dataverse attempts to create a thumbnail image for an image or pdf file and the attempt fails, Dataverse will set a flag for the file to avoid repeated attempts to generate the thumbnail.
+For cases where the problem may have been temporary (or fixed in a later Dataverse release), two API calls exist to reset this flag for all files or for a given file.
+
+Curl examples
+
+.. code-block:: bash
+
+  export SERVER_URL=http://localhost
+  export fileID=1234
+
+  curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag
+  
+  curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag/$fileID
+

From de7963a0635646f6c00e1362fc87152029394839 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 21 Jun 2023 13:53:30 -0400
Subject: [PATCH 032/546] refactor typo

---
 .../iq/dataverse/dataaccess/ImageThumbConverter.java        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index 458b8da227b..febf659b71a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -114,11 +114,11 @@ private static boolean isThumbnailAvailable(StorageIO<DataFile> storageIO, int s
             logger.fine("Found cached thumbnail for " + file.getId());
             return true;
         }
-        return generateThumbnail(storageIO, size);
+        return generateThumbnail(file, storageIO, size);
 
     }
 
-    private static boolean generateThumbnail(StorageIO<DataFile> storageIO, int size) {
+    private static boolean generateThumbnail(DataFile file, StorageIO<DataFile> storageIO, int size) {
         logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId());
         // Don't try to generate if there have been failures:
         if (!file.isPreviewsHaveFailed()) {
@@ -449,7 +449,7 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) {
             logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size);
 
             // try to generate, if not available and hasn't failed before
-            if(generateThumbnail(storageIO, size)) {
+            if(generateThumbnail(file, storageIO, size)) {
                 try {
                     cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size);
                 } catch (Exception ioEx) {

From e73806a6907ec630d7b2389abda632727821f48e Mon Sep 17 00:00:00 2001
From: lubitchv <victoria.lubitch@utoronto.ca>
Date: Thu, 27 Jul 2023 17:25:40 -0400
Subject: [PATCH 033/546] increase universe

---
 .../db/migration/V5.13.0.3__9728-universe-variablemetadata.sql  | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql

diff --git a/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql b/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql
new file mode 100644
index 00000000000..8e311c06b32
--- /dev/null
+++ b/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql
@@ -0,0 +1,2 @@
+-- increase field universe from 255 to text
+ALTER TABLE variablemetadata ALTER COLUMN universe TYPE text;

From 495594a2ed039b52951b7f1298426436b64a00f4 Mon Sep 17 00:00:00 2001
From: lubitchv <victoria.lubitch@utoronto.ca>
Date: Fri, 28 Jul 2023 10:50:22 -0400
Subject: [PATCH 034/546] column text

---
 .../edu/harvard/iq/dataverse/datavariable/VariableMetadata.java  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java
index c18355c9979..08fcd14e0e6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java
@@ -71,6 +71,7 @@ public class VariableMetadata implements Serializable  {
     /**
      * universe: metadata variable field.
      */
+    @Column(columnDefinition="TEXT")
     private String universe;
 
     /**

From be56f48f469ce319c1e3cacc4e14e5bbb9c0ecb9 Mon Sep 17 00:00:00 2001
From: lubitchv <victoria.lubitch@utoronto.ca>
Date: Fri, 28 Jul 2023 11:36:23 -0400
Subject: [PATCH 035/546] release note

---
 doc/release-notes/9728-universe-variablemetadata.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/release-notes/9728-universe-variablemetadata.md

diff --git a/doc/release-notes/9728-universe-variablemetadata.md b/doc/release-notes/9728-universe-variablemetadata.md
new file mode 100644
index 00000000000..66a2daf151b
--- /dev/null
+++ b/doc/release-notes/9728-universe-variablemetadata.md
@@ -0,0 +1 @@
+universe field in variablemetadata table was changed from varchar(255) to text. The change was made to support longer strings in "universe" metadata field, similar to the rest of text fields in variablemetadata table.

From 36d26d4b0ef9185869a006d78ca3be371dc19112 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 2 Aug 2023 11:52:39 -0400
Subject: [PATCH 036/546] update test cred

---
 .../harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 0d7c5458e14..081c5a622aa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -691,7 +691,7 @@ public static void main(String[] args) {
         }
         System.setProperty("dataverse.files.globus.base-uri", "2791b83e-b989-47c5-a7fa-ce65fd949522");
         System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill"));
-        System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOlprRmxGejNTWDlkTVpUNk92ZmVJaFQyTWY0SDd4cXBoTDNSS29vUmRGVlE9");
+        System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOmtsa1RZc242bU1oRXNuUFFwQy9oSzQxSi9EMDV6SjRtUDd1c0ZiN011MEk9");
         System.setProperty("dataverse.files.globus.base-store","file");
         System.setProperty("dataverse.files.file.type",
                 DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);

From 4b755b50bfbe729570dde943c1809ef80b3b840f Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 18 Aug 2023 17:25:52 -0400
Subject: [PATCH 037/546] setting is GlobusAppUrl not ...URL

---
 doc/sphinx-guides/source/installation/config.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 8493702406b..a5579c82c6d 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -3926,7 +3926,7 @@ GlobusEndpoint is Globus endpoint id used with Globus integration. See :ref:`glo
 
 A comma-separated list of the S3 stores that are configured to support Globus integration. See :ref:`globus-support` for details.
 
-:GlobusAppURL
+:GlobusAppUrl
 +++++++++++++
 
 The URL where the `dataverse-globus <https://github.com/scholarsportal/dataverse-globus>`_ "transfer" app has been deployed to support Globus integration. See :ref:`globus-support` for details.

From 4e6d948d712da42862b9f429d8ef65086a71baab Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 25 Aug 2023 10:00:30 -0400
Subject: [PATCH 038/546] remove req. that app and DV are on same host, note
 future todo

---
 .../java/edu/harvard/iq/dataverse/api/Datasets.java   | 11 +++--------
 .../iq/dataverse/globus/GlobusServiceBean.java        |  2 ++
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index dcd7eacf50b..b8165f0314f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3329,8 +3329,7 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam("
     public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
                                             @PathParam("id") String datasetId,
                                             @FormDataParam("jsonData") String jsonData,
-                                            @Context UriInfo uriInfo,
-                                            @Context HttpHeaders headers
+                                            @Context UriInfo uriInfo
     ) throws IOException, ExecutionException, InterruptedException {
 
         logger.info(" ====  (api addGlobusFilesToDataset) jsonData   ====== " + jsonData);
@@ -3390,12 +3389,8 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
         }
 
 
-        String requestUrl = headers.getRequestHeader("origin").get(0);
-
-        if(requestUrl.contains("localhost")){
-            requestUrl = "http://localhost:8080";
-        }
-
+        String requestUrl = SystemConfig.getDataverseSiteUrlStatic();
+        
         // Async Call
         globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser);
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index c2137dd1f47..5c387710844 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -737,6 +737,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
                             + datasetIdentifier + " -F jsonData='" + newjsonData + "'";
                     System.out.println("*******====command ==== " + command);
 
+                    //ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of calling API
+                
                     String output = addFilesAsync(command, globusLogger);
                     if (output.equalsIgnoreCase("ok")) {
                         // if(!taskSkippedFiles)

From b5e47b98a08f25c1160fc651b84bc1fbefe3dfa4 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 29 Aug 2023 12:52:33 -0400
Subject: [PATCH 039/546] fix retrieveSize parsing, refactoring

---
 .../dataaccess/GlobusOverlayAccessIO.java     | 169 ++++++------------
 .../dataaccess/RemoteOverlayAccessIO.java     |  82 ++++-----
 2 files changed, 93 insertions(+), 158 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 081c5a622aa..6a22f8b68f3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -4,14 +4,12 @@
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.DvObject;
-import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.globus.AccessToken;
 import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.UrlSignerUtil;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -56,7 +54,6 @@
 
 /**
  * @author qqmyers
- * @param <T> what it stores
  */
 /*
  * Globus Overlay Driver
@@ -64,14 +61,13 @@
  * StorageIdentifier format: <globusDriverId>://<local
  * id>//<globusEndpointIdentifier>/<absolutePath>
  */
-public class GlobusOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
+public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAccessIO<T> {
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO");
 
     private StorageIO<DvObject> baseStore = null;
     private String path = null;
     private String endpointWithBasePath = null;
-    private String globusToken = null;
 
     private static HttpClientContext localContext = HttpClientContext.create();
     private PoolingHttpClientConnectionManager cm = null;
@@ -117,103 +113,37 @@ private void validatePath(String relPath) throws IOException {
         }
     }
 
-    @Override
-    public void open(DataAccessOption... options) throws IOException {
-
-        baseStore.open(options);
-
-        DataAccessRequest req = this.getRequest();
-
-        if (isWriteAccessRequested(options)) {
-            isWriteAccess = true;
-            isReadAccess = false;
-        } else {
-            isWriteAccess = false;
-            isReadAccess = true;
-        }
-
-        if (dvObject instanceof DataFile) {
-            String storageIdentifier = dvObject.getStorageIdentifier();
-
-            DataFile dataFile = this.getDataFile();
-
-            if (req != null && req.getParameter("noVarHeader") != null) {
-                baseStore.setNoVarHeader(true);
-            }
-
-            if (storageIdentifier == null || "".equals(storageIdentifier)) {
-                throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile.");
-            }
-
-            // Fix new DataFiles: DataFiles that have not yet been saved may use this method
-            // when they don't have their storageidentifier in the final form
-            // So we fix it up here. ToDo: refactor so that storageidentifier is generated
-            // by the appropriate StorageIO class and is final from the start.
-            logger.fine("StorageIdentifier is: " + storageIdentifier);
-
-            if (isReadAccess) {
-                if (dataFile.getFilesize() >= 0) {
-                    this.setSize(dataFile.getFilesize());
-                } else {
-                    logger.fine("Setting size");
-                    this.setSize(getSizeFromGlobus());
-                }
-                if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
-                        && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
-
-                    List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
-                    String varHeaderLine = generateVariableHeader(datavariables);
-                    this.setVarHeader(varHeaderLine);
-                }
-
-            }
-
-            this.setMimeType(dataFile.getContentType());
-
-            try {
-                this.setFileName(dataFile.getFileMetadata().getLabel());
-            } catch (Exception ex) {
-                this.setFileName("unknown");
-            }
-        } else if (dvObject instanceof Dataset) {
-            throw new IOException(
-                    "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet");
-        } else if (dvObject instanceof Dataverse) {
-            throw new IOException(
-                    "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet");
-        } else {
-            this.setSize(getSizeFromGlobus());
-        }
-    }
-
     // Call the Globus API to get the file size
-    private long getSizeFromGlobus() {
+    @Override
+    long retrieveSize() {
         // Construct Globus URL
         URI absoluteURI = null;
         try {
             int filenameStart = path.lastIndexOf("/") + 1;
             int pathStart = endpointWithBasePath.indexOf("/");
-logger.info("endpointWithBasePath: " + endpointWithBasePath);
+            logger.info("endpointWithBasePath: " + endpointWithBasePath);
             String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "")
                     + path.substring(0, filenameStart);
             logger.info("directoryPath: " + directoryPath);
             String filename = path.substring(filenameStart);
             String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath;
 
-            absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint + "/ls?path=" + directoryPath + "&filter=name:" + filename);
+            absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint
+                    + "/ls?path=" + directoryPath + "&filter=name:" + filename);
             HttpGet get = new HttpGet(absoluteURI);
-            
+
             logger.info("Token is " + globusAccessToken);
             get.addHeader("Authorization", "Bearer " + globusAccessToken);
             CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext);
             if (response.getStatusLine().getStatusCode() == 200) {
-                //Get reponse as string
+                // Get reponse as string
                 String responseString = EntityUtils.toString(response.getEntity());
                 logger.fine("Response from " + get.getURI().toString() + " is: " + responseString);
                 JsonObject responseJson = JsonUtil.getJsonObject(responseString);
-                return (long) responseJson.getInt("size");
+                return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size");
             } else {
-                logger.warning("Response from " + get.getURI().toString() + " was " + response.getStatusLine().getStatusCode());
+                logger.warning("Response from " + get.getURI().toString() + " was "
+                        + response.getStatusLine().getStatusCode());
                 logger.info(EntityUtils.toString(response.getEntity()));
             }
         } catch (URISyntaxException e) {
@@ -227,24 +157,6 @@ private long getSizeFromGlobus() {
             e.printStackTrace();
         }
         return -1;
-
-        /*
-         * long size = -1; HttpHead head = new HttpHead(endpointWithBasePath + "/" +
-         * path); try { CloseableHttpResponse response =
-         * getSharedHttpClient().execute(head, localContext);
-         * 
-         * try { int code = response.getStatusLine().getStatusCode();
-         * logger.fine("Response for HEAD: " + code); switch (code) { case 200: Header[]
-         * headers = response.getHeaders(HTTP.CONTENT_LEN); logger.fine("Num headers: "
-         * + headers.length); String sizeString =
-         * response.getHeaders(HTTP.CONTENT_LEN)[0].getValue();
-         * logger.fine("Content-Length: " + sizeString); size =
-         * Long.parseLong(response.getHeaders(HTTP.CONTENT_LEN)[0].getValue());
-         * logger.fine("Found file size: " + size); break; default:
-         * logger.warning("Response from " + head.getURI().toString() + " was " + code);
-         * } } finally { EntityUtils.consume(response.getEntity()); } } catch
-         * (IOException e) { logger.warning(e.getMessage()); } return size;
-         */
     }
 
     @Override
@@ -417,7 +329,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException {
     @Override
     public boolean exists() {
         logger.fine("Exists called");
-        return (getSizeFromGlobus() != -1);
+        return (retrieveSize() != -1);
     }
 
     @Override
@@ -485,9 +397,12 @@ int getUrlExpirationMinutes() {
     }
 
     private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
-        AccessToken accessToken = GlobusServiceBean.getClientToken(JvmSettings.GLOBUS_TOKEN.lookup(driverId));
+        // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId);
+        String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token");
+        AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
         globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken();
-        endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId);
+        // endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId);
+        endpointWithBasePath = System.getProperty("dataverse.files." + this.driverId + ".base-uri");
         logger.info("base-uri is " + endpointWithBasePath);
         if (endpointWithBasePath == null) {
             throw new IOException("dataverse.files." + this.driverId + ".base-uri is required");
@@ -527,7 +442,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                                 + "/" + fullStorageLocation;
                         break;
                     default:
-                        logger.warning("Not Implemented: RemoteOverlay store with base store type: "
+                        logger.warning("Not Implemented: GlobusOverlay store with base store type: "
                                 + System.getProperty("dataverse.files." + baseDriverId + ".type"));
                         throw new IOException("Not implemented");
                     }
@@ -554,7 +469,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                                 + "/" + fullStorageLocation;
                         break;
                     default:
-                        logger.warning("Not Implemented: RemoteOverlay store with base store type: "
+                        logger.warning("Not Implemented: GlobusOverlay store with base store type: "
                                 + System.getProperty("dataverse.files." + baseDriverId + ".type"));
                         throw new IOException("Not implemented");
                     }
@@ -640,21 +555,21 @@ private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementExcept
     @Override
     public void savePath(Path fileSystemPath) throws IOException {
         throw new UnsupportedDataAccessOperationException(
-                "RemoteOverlayAccessIO: savePath() not implemented in this storage driver.");
+                "GlobusOverlayAccessIO: savePath() not implemented in this storage driver.");
 
     }
 
     @Override
     public void saveInputStream(InputStream inputStream) throws IOException {
         throw new UnsupportedDataAccessOperationException(
-                "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver.");
+                "GlobusOverlayAccessIO: saveInputStream() not implemented in this storage driver.");
 
     }
 
     @Override
     public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
         throw new UnsupportedDataAccessOperationException(
-                "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver.");
+                "GlobusOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver.");
 
     }
 
@@ -689,24 +604,42 @@ public static void main(String[] args) {
         if (args.length > 0) {
             System.out.printf("List of arguments: {}", Arrays.toString(args));
         }
-        System.setProperty("dataverse.files.globus.base-uri", "2791b83e-b989-47c5-a7fa-ce65fd949522");
+        // System.setProperty("dataverse.files.globus.globus_client_id",
+        // "2791b83e-b989-47c5-a7fa-ce65fd949522");
+        System.setProperty("dataverse.files.globus.base-uri", "d8c42580-6528-4605-9ad8-116a61982644");
         System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill"));
-        System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOmtsa1RZc242bU1oRXNuUFFwQy9oSzQxSi9EMDV6SjRtUDd1c0ZiN011MEk9");
-        System.setProperty("dataverse.files.globus.base-store","file");
-        System.setProperty("dataverse.files.file.type",
-                DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+        // System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOkt4ZEdndFVDUDVZZG5sRG4rRHEzaVMxTHBtTVRGNlB3RjlwWm9kRTBWNVE9");
+        System.setProperty("dataverse.files.globus.globus-token",
+                "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9");
+        System.setProperty("dataverse.files.globus.base-store", "file");
+        System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
         System.setProperty("dataverse.files.file.directory", "/tmp/files");
         logger.info(JvmSettings.BASE_URI.lookup("globus"));
-        
-        
-        
+        logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus"));
+
         try {
-            GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>("globus://1234///hdc1/image001.mrc", "globus");
-        logger.info("Size is " + gsio.getSizeFromGlobus());
-        
+            GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>(
+                    "globus://1234///hdc1/image001.mrc", "globus");
+            logger.info("Size is " + gsio.retrieveSize());
+
         } catch (IOException e) {
             // TODO Auto-generated catch block
             e.printStackTrace();
         }
+        try {
+            DataFile df = new DataFile();
+            Dataset ds = new Dataset();
+            ds.setAuthority("10.5072");
+            ds.setIdentifier("FK21234");
+            df.setOwner(ds);
+            df.setStorageIdentifier("globus://1234///hdc1/image001.mrc");
+            GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>(df, null, "globus");
+            logger.info("Size2 is " + gsio.retrieveSize());
+
+        } catch (IOException e) {
+            // TODO Auto-generated catch block
+            e.printStackTrace();
+        }
+
     }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index ee2b6779cba..710d7a38fb4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -85,7 +85,7 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId)
         logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier());
         path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
         validatePath(path);
-        
+
         logger.fine("Base URL: " + path);
     }
 
@@ -98,18 +98,17 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE
         validatePath(path);
         logger.fine("Base URL: " + path);
     }
-    
+
     private void validatePath(String relPath) throws IOException {
         try {
             URI absoluteURI = new URI(baseUrl + "/" + relPath);
-            if(!absoluteURI.normalize().toString().startsWith(baseUrl)) {
+            if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
                 throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url");
             }
-        } catch(URISyntaxException use) {
+        } catch (URISyntaxException use) {
             throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId);
         }
-     }
-
+    }
 
     @Override
     public void open(DataAccessOption... options) throws IOException {
@@ -150,7 +149,7 @@ public void open(DataAccessOption... options) throws IOException {
                     this.setSize(dataFile.getFilesize());
                 } else {
                     logger.fine("Setting size");
-                    this.setSize(getSizeFromHttpHeader());
+                    this.setSize(retrieveSize());
                 }
                 if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
                         && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
@@ -171,16 +170,14 @@ public void open(DataAccessOption... options) throws IOException {
             }
         } else if (dvObject instanceof Dataset) {
             throw new IOException(
-                    "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet");
+                    "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet");
         } else if (dvObject instanceof Dataverse) {
             throw new IOException(
-                    "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet");
-        } else {
-            this.setSize(getSizeFromHttpHeader());
+                    "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet");
         }
     }
 
-    private long getSizeFromHttpHeader() {
+    long retrieveSize() {
         long size = -1;
         HttpHead head = new HttpHead(baseUrl + "/" + path);
         try {
@@ -356,8 +353,9 @@ public String getStorageLocation() throws IOException {
         String fullStorageLocation = dvObject.getStorageIdentifier();
         logger.fine("storageidentifier: " + fullStorageLocation);
         int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR);
-        if(driverIndex >=0) {
-          fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+        if (driverIndex >= 0) {
+            fullStorageLocation = fullStorageLocation
+                    .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
         }
         if (this.getDvObject() instanceof Dataset) {
             throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject");
@@ -379,7 +377,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException {
     @Override
     public boolean exists() {
         logger.fine("Exists called");
-        return (getSizeFromHttpHeader() != -1);
+        return (retrieveSize() != -1);
     }
 
     @Override
@@ -407,7 +405,7 @@ public boolean downloadRedirectEnabled() {
         }
         return false;
     }
-    
+
     public boolean downloadRedirectEnabled(String auxObjectTag) {
         return baseStore.downloadRedirectEnabled(auxObjectTag);
     }
@@ -422,8 +420,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
             if (secretKey == null) {
                 return baseUrl + "/" + path;
             } else {
-                return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET",
-                        secretKey);
+                return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey);
             }
         } else {
             return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName);
@@ -464,9 +461,10 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
         if (baseStore == null) {
             String baseDriverId = getBaseStoreIdFor(driverId);
             String fullStorageLocation = null;
-            String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
-            
-            if(dvObject  instanceof Dataset) {
+            String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type",
+                    DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+
+            if (dvObject instanceof Dataset) {
                 baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId);
             } else {
                 if (this.getDvObject() != null) {
@@ -481,8 +479,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                         break;
                     case DataAccess.FILE:
                         fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/"
-                                + fullStorageLocation;
+                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files")
+                                + "/" + fullStorageLocation;
                         break;
                     default:
                         logger.warning("Not Implemented: RemoteOverlay store with base store type: "
@@ -492,12 +490,12 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
 
                 } else if (storageLocation != null) {
                     // <remoteDriverId>://<baseStorageIdentifier>//<baseUrlPath>
-                    //remoteDriverId:// is removed if coming through directStorageIO
+                    // remoteDriverId:// is removed if coming through directStorageIO
                     int index = storageLocation.indexOf(DataAccess.SEPARATOR);
-                    if(index > 0) {
+                    if (index > 0) {
                         storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length());
                     }
-                    //THe base store needs the baseStoreIdentifier and not the relative URL
+                    // THe base store needs the baseStoreIdentifier and not the relative URL
                     fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//"));
 
                     switch (baseDriverType) {
@@ -508,8 +506,8 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                         break;
                     case DataAccess.FILE:
                         fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/"
-                                + fullStorageLocation;
+                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files")
+                                + "/" + fullStorageLocation;
                         break;
                     default:
                         logger.warning("Not Implemented: RemoteOverlay store with base store type: "
@@ -525,37 +523,41 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
         }
         remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name");
         try {
-          remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url"));
-        } catch(MalformedURLException mfue) {
+            remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url"));
+        } catch (MalformedURLException mfue) {
             logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId);
         }
     }
 
-    //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required.
+    // Convenience method to assemble the path, starting with the DOI
+    // authority/identifier/, that is needed to create a base store via
+    // DataAccess.getDirectStorageIO - the caller has to add the store type specific
+    // prefix required.
     private String getStoragePath() throws IOException {
         String fullStoragePath = dvObject.getStorageIdentifier();
         logger.fine("storageidentifier: " + fullStoragePath);
         int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR);
-        if(driverIndex >=0) {
-          fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+        if (driverIndex >= 0) {
+            fullStoragePath = fullStoragePath
+                    .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
         }
         int suffixIndex = fullStoragePath.indexOf("//");
-        if(suffixIndex >=0) {
-          fullStoragePath = fullStoragePath.substring(0, suffixIndex);
+        if (suffixIndex >= 0) {
+            fullStoragePath = fullStoragePath.substring(0, suffixIndex);
         }
         if (this.getDvObject() instanceof Dataset) {
             fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/"
                     + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath;
         } else if (this.getDvObject() instanceof DataFile) {
             fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/"
-                    + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; 
-        }else if (dvObject instanceof Dataverse) {
+                    + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath;
+        } else if (dvObject instanceof Dataverse) {
             throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
         }
         logger.fine("fullStoragePath: " + fullStoragePath);
         return fullStoragePath;
     }
-    
+
     public CloseableHttpClient getSharedHttpClient() {
         if (httpclient == null) {
             try {
@@ -617,11 +619,11 @@ protected static boolean isValidIdentifier(String driverId, String storageId) {
         String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
         try {
             URI absoluteURI = new URI(baseUrl + "/" + urlPath);
-            if(!absoluteURI.normalize().toString().startsWith(baseUrl)) {
+            if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
                 logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId);
                 return false;
             }
-        } catch(URISyntaxException use) {
+        } catch (URISyntaxException use) {
             logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId);
             return false;
         }

From cec0b519948d8ba480f49f915dabd5f31e5c5082 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 29 Aug 2023 12:52:48 -0400
Subject: [PATCH 040/546] add globus type

---
 .../java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
index d046fa4661d..f2eb0236df4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
@@ -48,6 +48,7 @@ public DataAccess() {
     public static final String S3 = "s3";
     static final String SWIFT = "swift";
     static final String REMOTE = "remote";
+    static final String GLOBUS = "globus";
     static final String TMP = "tmp";
     public static final String SEPARATOR = "://";
     //Default to "file" is for tests only
@@ -98,6 +99,8 @@ protected static <T extends DvObject> StorageIO<T> getStorageIO(T dvObject, Data
 			return new SwiftAccessIO<>(dvObject, req, storageDriverId);
 		case REMOTE:
 			return new RemoteOverlayAccessIO<>(dvObject, req, storageDriverId);
+	     case GLOBUS:
+	            return new GlobusOverlayAccessIO<>(dvObject, req, storageDriverId);
 		case TMP:
 			throw new IOException(
 					"DataAccess IO attempted on a temporary file that hasn't been permanently saved yet.");
@@ -369,6 +372,8 @@ public static boolean isValidDirectStorageIdentifier(String storageId) {
             return S3AccessIO.isValidIdentifier(driverId, storageId);
         case REMOTE:
             return RemoteOverlayAccessIO.isValidIdentifier(driverId, storageId);
+        case GLOBUS:
+            return GlobusOverlayAccessIO.isValidIdentifier(driverId, storageId);
         default:
             logger.warning("Request to validate for storage driver: " + driverId);
         }

From 555bf05af241c555300f5c528656de3d10b3c584 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 30 Aug 2023 10:07:49 -0400
Subject: [PATCH 041/546] refactoring g store as a remotestore

---
 .../dataaccess/GlobusOverlayAccessIO.java     | 370 +++---------------
 .../dataaccess/RemoteOverlayAccessIO.java     |  46 +--
 .../iq/dataverse/settings/JvmSettings.java    |   2 +-
 3 files changed, 80 insertions(+), 338 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 6a22f8b68f3..16345cd1f9c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -58,28 +58,26 @@
 /*
  * Globus Overlay Driver
  * 
- * StorageIdentifier format: <globusDriverId>://<local
- * id>//<globusEndpointIdentifier>/<absolutePath>
+ * Remote: 
+ *   StorageIdentifier format: <globusDriverId>://<baseStorageIdentifier>//<relativePath>
+ *   Storage location: <globusendpointId/basepath>/<relPath>
+ * Internal
+ *   StorageIdentifier format: <globusDriverId>://<baseStorageIdentifier>//<relativePath>
+ *   Storage location: <globusEndpointId/basepath>/<dataset authority>/<dataset identifier>/<baseStorageIdentifier>
+ *
+ * baseUrl: globus://<globusEndpointId/basePath>
+ 
  */
 public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAccessIO<T> {
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO");
 
-    private StorageIO<DvObject> baseStore = null;
-    private String path = null;
-    private String endpointWithBasePath = null;
-
-    private static HttpClientContext localContext = HttpClientContext.create();
-    private PoolingHttpClientConnectionManager cm = null;
-    CloseableHttpClient httpclient = null;
-    private int timeout = 1200;
-    private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000)
-            .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000)
-            .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build();
-    private static boolean trustCerts = false;
-    private int httpConcurrency = 4;
 
     private String globusAccessToken = null;
+    /*
+     * If this is set to true, the store supports Globus transfer in and Dataverse/the globus app manage file locations, access controls, deletion, etc.
+     */
+    private boolean isDataverseManaged = false;
 
     public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
@@ -104,9 +102,9 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE
 
     private void validatePath(String relPath) throws IOException {
         try {
-            URI absoluteURI = new URI(endpointWithBasePath + "/" + relPath);
-            if (!absoluteURI.normalize().toString().startsWith(endpointWithBasePath)) {
-                throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s endpoint/basePath");
+            URI absoluteURI = new URI(baseUrl + "/" + relPath);
+            if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
+                throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url");
             }
         } catch (URISyntaxException use) {
             throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId);
@@ -120,13 +118,24 @@ long retrieveSize() {
         URI absoluteURI = null;
         try {
             int filenameStart = path.lastIndexOf("/") + 1;
+            String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3);
             int pathStart = endpointWithBasePath.indexOf("/");
             logger.info("endpointWithBasePath: " + endpointWithBasePath);
-            String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "")
-                    + path.substring(0, filenameStart);
+            String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart+1) : "");
             logger.info("directoryPath: " + directoryPath);
+
+            if(isDataverseManaged) {
+                Dataset ds = ((DataFile) dvObject).getOwner();
+                directoryPath = directoryPath + "/" + ds.getAuthority() + "/" + ds.getIdentifier();
+                logger.info("directoryPath now: " + directoryPath);
+
+            }
+            if(filenameStart > 0) {
+                directoryPath = directoryPath + path.substring(0, filenameStart);
+            }
+            logger.info("directoryPath finally: " + directoryPath);
             String filename = path.substring(filenameStart);
-            String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart - 1) : endpointWithBasePath;
+            String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
 
             absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint
                     + "/ls?path=" + directoryPath + "&filter=name:" + filename);
@@ -138,7 +147,7 @@ long retrieveSize() {
             if (response.getStatusLine().getStatusCode() == 200) {
                 // Get reponse as string
                 String responseString = EntityUtils.toString(response.getEntity());
-                logger.fine("Response from " + get.getURI().toString() + " is: " + responseString);
+                logger.info("Response from " + get.getURI().toString() + " is: " + responseString);
                 JsonObject responseJson = JsonUtil.getJsonObject(responseString);
                 return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size");
             } else {
@@ -159,63 +168,26 @@ long retrieveSize() {
         return -1;
     }
 
-    @Override
-    public InputStream getInputStream() throws IOException {
-        if (super.getInputStream() == null) {
-            try {
-                HttpGet get = new HttpGet(generateTemporaryDownloadUrl(null, null, null));
-                CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext);
 
-                int code = response.getStatusLine().getStatusCode();
-                switch (code) {
-                case 200:
-                    setInputStream(response.getEntity().getContent());
-                    break;
-                default:
-                    logger.warning("Response from " + get.getURI().toString() + " was " + code);
-                    throw new IOException("Cannot retrieve: " + endpointWithBasePath + "/" + path + " code: " + code);
-                }
-            } catch (Exception e) {
-                logger.warning(e.getMessage());
-                e.printStackTrace();
-                throw new IOException("Error retrieving: " + endpointWithBasePath + "/" + path + " " + e.getMessage());
 
-            }
-            setChannel(Channels.newChannel(super.getInputStream()));
-        }
-        return super.getInputStream();
-    }
-
-    @Override
-    public Channel getChannel() throws IOException {
-        if (super.getChannel() == null) {
-            getInputStream();
-        }
-        return channel;
-    }
-
-    @Override
-    public ReadableByteChannel getReadChannel() throws IOException {
-        // Make sure StorageIO.channel variable exists
-        getChannel();
-        return super.getReadChannel();
-    }
 
     @Override
     public void delete() throws IOException {
+        
+// Fix
         // Delete is best-effort - we tell the remote server and it may or may not
         // implement this call
         if (!isDirectAccess()) {
             throw new IOException("Direct Access IO must be used to permanently delete stored file objects");
         }
         try {
-            HttpDelete del = new HttpDelete(endpointWithBasePath + "/" + path);
+            HttpDelete del = new HttpDelete(baseUrl + "/" + path);
             CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext);
             try {
                 int code = response.getStatusLine().getStatusCode();
                 switch (code) {
                 case 200:
-                    logger.fine("Sent DELETE for " + endpointWithBasePath + "/" + path);
+                    logger.fine("Sent DELETE for " + baseUrl + "/" + path);
                 default:
                     logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code);
                 }
@@ -224,7 +196,7 @@ public void delete() throws IOException {
             }
         } catch (Exception e) {
             logger.warning(e.getMessage());
-            throw new IOException("Error deleting: " + endpointWithBasePath + "/" + path);
+            throw new IOException("Error deleting: " + baseUrl + "/" + path);
 
         }
 
@@ -233,146 +205,20 @@ public void delete() throws IOException {
 
     }
 
-    @Override
-    public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException {
-        return baseStore.openAuxChannel(auxItemTag, options);
-    }
-
-    @Override
-    public boolean isAuxObjectCached(String auxItemTag) throws IOException {
-        return baseStore.isAuxObjectCached(auxItemTag);
-    }
-
-    @Override
-    public long getAuxObjectSize(String auxItemTag) throws IOException {
-        return baseStore.getAuxObjectSize(auxItemTag);
-    }
-
-    @Override
-    public Path getAuxObjectAsPath(String auxItemTag) throws IOException {
-        return baseStore.getAuxObjectAsPath(auxItemTag);
-    }
-
-    @Override
-    public void backupAsAux(String auxItemTag) throws IOException {
-        baseStore.backupAsAux(auxItemTag);
-    }
-
-    @Override
-    public void revertBackupAsAux(String auxItemTag) throws IOException {
-        baseStore.revertBackupAsAux(auxItemTag);
-    }
-
-    @Override
-    // this method copies a local filesystem Path into this DataAccess Auxiliary
-    // location:
-    public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException {
-        baseStore.savePathAsAux(fileSystemPath, auxItemTag);
-    }
-
-    @Override
-    public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException {
-        baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize);
-    }
-
-    /**
-     * @param inputStream InputStream we want to save
-     * @param auxItemTag  String representing this Auxiliary type ("extension")
-     * @throws IOException if anything goes wrong.
-     */
-    @Override
-    public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException {
-        baseStore.saveInputStreamAsAux(inputStream, auxItemTag);
-    }
-
-    @Override
-    public List<String> listAuxObjects() throws IOException {
-        return baseStore.listAuxObjects();
-    }
-
-    @Override
-    public void deleteAuxObject(String auxItemTag) throws IOException {
-        baseStore.deleteAuxObject(auxItemTag);
-    }
-
-    @Override
-    public void deleteAllAuxObjects() throws IOException {
-        baseStore.deleteAllAuxObjects();
-    }
-
-    @Override
-    public String getStorageLocation() throws IOException {
-        String fullStorageLocation = dvObject.getStorageIdentifier();
-        logger.fine("storageidentifier: " + fullStorageLocation);
-        int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR);
-        if (driverIndex >= 0) {
-            fullStorageLocation = fullStorageLocation
-                    .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
-        }
-        if (this.getDvObject() instanceof Dataset) {
-            throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject");
-        } else if (this.getDvObject() instanceof DataFile) {
-            fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation;
-        } else if (dvObject instanceof Dataverse) {
-            throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
-        }
-        logger.fine("fullStorageLocation: " + fullStorageLocation);
-        return fullStorageLocation;
-    }
-
-    @Override
-    public Path getFileSystemPath() throws UnsupportedDataAccessOperationException {
-        throw new UnsupportedDataAccessOperationException(
-                "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it.");
-    }
-
-    @Override
-    public boolean exists() {
-        logger.fine("Exists called");
-        return (retrieveSize() != -1);
-    }
 
-    @Override
-    public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException {
-        throw new UnsupportedDataAccessOperationException(
-                "RemoteOverlayAccessIO: there are no write Channels associated with S3 objects.");
-    }
 
-    @Override
-    public OutputStream getOutputStream() throws UnsupportedDataAccessOperationException {
-        throw new UnsupportedDataAccessOperationException(
-                "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects.");
-    }
-
-    @Override
-    public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException {
-        return baseStore.getAuxFileAsInputStream(auxItemTag);
-    }
-
-    @Override
-    public boolean downloadRedirectEnabled() {
-        String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect");
-        if ("true".equalsIgnoreCase(optionValue)) {
-            return true;
-        }
-        return false;
-    }
-
-    public boolean downloadRedirectEnabled(String auxObjectTag) {
-        return baseStore.downloadRedirectEnabled(auxObjectTag);
-    }
 
     @Override
     public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName)
             throws IOException {
-
+//Fix
         // ToDo - support remote auxiliary Files
         if (auxiliaryTag == null) {
             String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key");
             if (secretKey == null) {
-                return endpointWithBasePath + "/" + path;
+                return baseUrl + "/" + path;
             } else {
-                return UrlSignerUtil.signUrl(endpointWithBasePath + "/" + path, getUrlExpirationMinutes(), null, "GET",
+                return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET",
                         secretKey);
             }
         } else {
@@ -380,35 +226,21 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
         }
     }
 
-    int getUrlExpirationMinutes() {
-        String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes");
-        if (optionValue != null) {
-            Integer num;
-            try {
-                num = Integer.parseInt(optionValue);
-            } catch (NumberFormatException ex) {
-                num = null;
-            }
-            if (num != null) {
-                return num;
-            }
-        }
-        return 60;
-    }
-
     private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
         // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId);
         String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token");
+        isDataverseManaged = Boolean.getBoolean("dataverse.files." + this.driverId + ".managed");
+
         AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
         globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken();
         // endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId);
-        endpointWithBasePath = System.getProperty("dataverse.files." + this.driverId + ".base-uri");
-        logger.info("base-uri is " + endpointWithBasePath);
-        if (endpointWithBasePath == null) {
-            throw new IOException("dataverse.files." + this.driverId + ".base-uri is required");
+        baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url");
+        logger.info("base-url is " + baseUrl);
+        if (baseUrl == null) {
+            throw new IOException("dataverse.files." + this.driverId + ".base-url is required");
         } else {
             try {
-                new URI(endpointWithBasePath);
+                new URI(baseUrl);
             } catch (Exception e) {
                 logger.warning(
                         "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage());
@@ -442,9 +274,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                                 + "/" + fullStorageLocation;
                         break;
                     default:
-                        logger.warning("Not Implemented: GlobusOverlay store with base store type: "
+                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
                                 + System.getProperty("dataverse.files." + baseDriverId + ".type"));
-                        throw new IOException("Not implemented");
+                        throw new IOException("Not supported");
                     }
 
                 } else if (storageLocation != null) {
@@ -469,9 +301,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                                 + "/" + fullStorageLocation;
                         break;
                     default:
-                        logger.warning("Not Implemented: GlobusOverlay store with base store type: "
+                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
                                 + System.getProperty("dataverse.files." + baseDriverId + ".type"));
-                        throw new IOException("Not implemented");
+                        throw new IOException("Not supported");
                     }
                 }
                 baseStore = DataAccess.getDirectStorageIO(fullStorageLocation);
@@ -488,97 +320,13 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
         }
     }
 
-    // Convenience method to assemble the path, starting with the DOI
-    // authority/identifier/, that is needed to create a base store via
-    // DataAccess.getDirectStorageIO - the caller has to add the store type specific
-    // prefix required.
-    private String getStoragePath() throws IOException {
-        String fullStoragePath = dvObject.getStorageIdentifier();
-        logger.fine("storageidentifier: " + fullStoragePath);
-        int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR);
-        if (driverIndex >= 0) {
-            fullStoragePath = fullStoragePath
-                    .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
-        }
-        int suffixIndex = fullStoragePath.indexOf("//");
-        if (suffixIndex >= 0) {
-            fullStoragePath = fullStoragePath.substring(0, suffixIndex);
-        }
-        if (this.getDvObject() instanceof Dataset) {
-            fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/"
-                    + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath;
-        } else if (this.getDvObject() instanceof DataFile) {
-            fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/"
-                    + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath;
-        } else if (dvObject instanceof Dataverse) {
-            throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
-        }
-        logger.fine("fullStoragePath: " + fullStoragePath);
-        return fullStoragePath;
-    }
-
-    public CloseableHttpClient getSharedHttpClient() {
-        if (httpclient == null) {
-            try {
-                initHttpPool();
-                httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build();
-
-            } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) {
-                logger.warning(ex.getMessage());
-            }
-        }
-        return httpclient;
-    }
-
-    private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException {
-        if (trustCerts) {
-            // use the TrustSelfSignedStrategy to allow Self Signed Certificates
-            SSLContext sslContext;
-            SSLConnectionSocketFactory connectionFactory;
-
-            sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build();
-            // create an SSL Socket Factory to use the SSLContext with the trust self signed
-            // certificate strategy
-            // and allow all hosts verifier.
-            connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE);
-
-            Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
-                    .register("https", connectionFactory).build();
-            cm = new PoolingHttpClientConnectionManager(registry);
-        } else {
-            cm = new PoolingHttpClientConnectionManager();
-        }
-        cm.setDefaultMaxPerRoute(httpConcurrency);
-        cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20);
-    }
-
-    @Override
-    public void savePath(Path fileSystemPath) throws IOException {
-        throw new UnsupportedDataAccessOperationException(
-                "GlobusOverlayAccessIO: savePath() not implemented in this storage driver.");
-
-    }
-
-    @Override
-    public void saveInputStream(InputStream inputStream) throws IOException {
-        throw new UnsupportedDataAccessOperationException(
-                "GlobusOverlayAccessIO: saveInputStream() not implemented in this storage driver.");
-
-    }
-
-    @Override
-    public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
-        throw new UnsupportedDataAccessOperationException(
-                "GlobusOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver.");
-
-    }
 
     protected static boolean isValidIdentifier(String driverId, String storageId) {
         String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2);
-        String baseUri = System.getProperty("dataverse.files." + driverId + ".base-uri");
+        String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
         try {
-            URI absoluteURI = new URI(baseUri + "/" + urlPath);
-            if (!absoluteURI.normalize().toString().startsWith(baseUri)) {
+            URI absoluteURI = new URI(baseUrl + "/" + urlPath);
+            if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
                 logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId);
                 return false;
             }
@@ -590,14 +338,6 @@ protected static boolean isValidIdentifier(String driverId, String storageId) {
         return true;
     }
 
-    public static String getBaseStoreIdFor(String driverId) {
-        return System.getProperty("dataverse.files." + driverId + ".base-store");
-    }
-
-    @Override
-    public List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOException {
-        return baseStore.cleanUp(filter, dryRun);
-    }
 
     public static void main(String[] args) {
         System.out.println("Running the main method");
@@ -606,7 +346,7 @@ public static void main(String[] args) {
         }
         // System.setProperty("dataverse.files.globus.globus_client_id",
         // "2791b83e-b989-47c5-a7fa-ce65fd949522");
-        System.setProperty("dataverse.files.globus.base-uri", "d8c42580-6528-4605-9ad8-116a61982644");
+        System.setProperty("dataverse.files.globus.base-url", "globus://d8c42580-6528-4605-9ad8-116a61982644");
         System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill"));
         // System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOkt4ZEdndFVDUDVZZG5sRG4rRHEzaVMxTHBtTVRGNlB3RjlwWm9kRTBWNVE9");
         System.setProperty("dataverse.files.globus.globus-token",
@@ -614,7 +354,7 @@ public static void main(String[] args) {
         System.setProperty("dataverse.files.globus.base-store", "file");
         System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
         System.setProperty("dataverse.files.file.directory", "/tmp/files");
-        logger.info(JvmSettings.BASE_URI.lookup("globus"));
+        logger.info(JvmSettings.BASE_URL.lookup("globus"));
         logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus"));
 
         try {
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index 710d7a38fb4..6b15bcf1dc8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -52,31 +52,32 @@
 
 /**
  * @author qqmyers
- * @param <T> what it stores
  */
 /*
  * Remote Overlay Driver
  * 
  * StorageIdentifier format:
- * <httpDriverId>://<baseStorageIdentifier>//<baseUrlPath>
+ * <remoteDriverId>://<baseStorageIdentifier>//<relativePath>
+ * 
+ * baseUrl: http(s)://<host(:port)/basePath>
  */
 public class RemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
 
-    private StorageIO<DvObject> baseStore = null;
-    private String path = null;
-    private String baseUrl = null;
+    protected StorageIO<DvObject> baseStore = null;
+    protected String path = null;
+    protected String baseUrl = null;
 
-    private static HttpClientContext localContext = HttpClientContext.create();
-    private PoolingHttpClientConnectionManager cm = null;
+    protected static HttpClientContext localContext = HttpClientContext.create();
+    protected PoolingHttpClientConnectionManager cm = null;
     CloseableHttpClient httpclient = null;
-    private int timeout = 1200;
-    private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000)
+    protected int timeout = 1200;
+    protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000)
             .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000)
             .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build();
-    private static boolean trustCerts = false;
-    private int httpConcurrency = 4;
+    protected static boolean trustCerts = false;
+    protected int httpConcurrency = 4;
 
     public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
@@ -86,7 +87,7 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId)
         path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
         validatePath(path);
 
-        logger.fine("Base URL: " + path);
+        logger.fine("Relative path: " + path);
     }
 
     public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException {
@@ -96,7 +97,7 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE
 
         path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
         validatePath(path);
-        logger.fine("Base URL: " + path);
+        logger.fine("Relative path: " + path);
     }
 
     private void validatePath(String relPath) throws IOException {
@@ -420,7 +421,8 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
             if (secretKey == null) {
                 return baseUrl + "/" + path;
             } else {
-                return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey);
+                return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET",
+                        secretKey);
             }
         } else {
             return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName);
@@ -483,9 +485,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                                 + "/" + fullStorageLocation;
                         break;
                     default:
-                        logger.warning("Not Implemented: RemoteOverlay store with base store type: "
+                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
                                 + System.getProperty("dataverse.files." + baseDriverId + ".type"));
-                        throw new IOException("Not implemented");
+                        throw new IOException("Not supported");
                     }
 
                 } else if (storageLocation != null) {
@@ -510,9 +512,9 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
                                 + "/" + fullStorageLocation;
                         break;
                     default:
-                        logger.warning("Not Implemented: RemoteOverlay store with base store type: "
+                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
                                 + System.getProperty("dataverse.files." + baseDriverId + ".type"));
-                        throw new IOException("Not implemented");
+                        throw new IOException("Not supported");
                     }
                 }
                 baseStore = DataAccess.getDirectStorageIO(fullStorageLocation);
@@ -533,7 +535,7 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
     // authority/identifier/, that is needed to create a base store via
     // DataAccess.getDirectStorageIO - the caller has to add the store type specific
     // prefix required.
-    private String getStoragePath() throws IOException {
+    protected String getStoragePath() throws IOException {
         String fullStoragePath = dvObject.getStorageIdentifier();
         logger.fine("storageidentifier: " + fullStoragePath);
         int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR);
@@ -596,21 +598,21 @@ private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementExcept
     @Override
     public void savePath(Path fileSystemPath) throws IOException {
         throw new UnsupportedDataAccessOperationException(
-                "RemoteOverlayAccessIO: savePath() not implemented in this storage driver.");
+                this.getClass().getName() + ": savePath() not implemented in this storage driver.");
 
     }
 
     @Override
     public void saveInputStream(InputStream inputStream) throws IOException {
         throw new UnsupportedDataAccessOperationException(
-                "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver.");
+                this.getClass().getName() + ": saveInputStream() not implemented in this storage driver.");
 
     }
 
     @Override
     public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
         throw new UnsupportedDataAccessOperationException(
-                "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver.");
+                this.getClass().getName() + ": saveInputStream(InputStream, Long) not implemented in this storage driver.");
 
     }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index 451dbcc56d1..ffe08a6afb9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -50,7 +50,7 @@ public enum JvmSettings {
     SCOPE_FILES(PREFIX, "files"),
     FILES_DIRECTORY(SCOPE_FILES, "directory"),
     FILES(SCOPE_FILES),
-    BASE_URI(FILES, "base-uri"),
+    BASE_URL(FILES, "base-url"),
     GLOBUS_TOKEN(FILES, "globus-token"),
     
     // SOLR INDEX SETTINGS

From 270e0fd0a28b516f62dc29e927bbb19753f47d19 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 12 Sep 2023 10:08:33 -0400
Subject: [PATCH 042/546] temporary fix for local compile issues

---
 .../harvest/server/web/servlet/OAIServlet.java    | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
index 9cf1629abfc..3ce88fdf204 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
@@ -74,18 +74,13 @@ public class OAIServlet extends HttpServlet {
     
     @EJB
     SystemConfig systemConfig;
-
-    @Inject
-    @ConfigProperty(name = "dataverse.oai.server.maxidentifiers", defaultValue="100")
-    private Integer maxListIdentifiers;
     
-    @Inject
-    @ConfigProperty(name = "dataverse.oai.server.maxsets", defaultValue="100")
-    private Integer maxListSets;
+    //Todo - revert this change - added to get past some local compile issues
+    private Integer maxListIdentifiers=100;
+    
+    private Integer maxListSets=100;
     
-    @Inject
-    @ConfigProperty(name = "dataverse.oai.server.maxrecords", defaultValue="10")
-    private Integer maxListRecords;
+    private Integer maxListRecords=10;
     
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.web.servlet.OAIServlet");
     // If we are going to stick with this solution - of providing a minimalist 

From 1828855a162683d564e02507ce60fd99963b43d0 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 12 Sep 2023 10:09:53 -0400
Subject: [PATCH 043/546] updates/fixes re: extending RemoteOverlay, etc.

---
 .../iq/dataverse/dataaccess/DataAccess.java   |   2 +
 .../dataaccess/GlobusOverlayAccessIO.java     | 208 +++++++-----------
 .../dataaccess/RemoteOverlayAccessIO.java     |   9 +-
 .../iq/dataverse/dataaccess/StorageIO.java    |   2 +-
 .../dataverse/globus/GlobusServiceBean.java   |  52 +++--
 5 files changed, 119 insertions(+), 154 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
index f2eb0236df4..8387f8110cf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
@@ -132,6 +132,8 @@ public static StorageIO<DvObject> getDirectStorageIO(String fullStorageLocation)
             return new SwiftAccessIO<>(storageLocation, storageDriverId);
         case REMOTE:
             return new RemoteOverlayAccessIO<>(storageLocation, storageDriverId);
+        case GLOBUS:
+            return new GlobusOverlayAccessIO<>(storageLocation, storageDriverId);
         default:
         	logger.warning("Could not find storage driver for: " + fullStorageLocation);
         	throw new IOException("getDirectStorageIO: Unsupported storage method.");
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 16345cd1f9c..b00724e2825 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -58,46 +58,63 @@
 /*
  * Globus Overlay Driver
  * 
- * Remote: 
- *   StorageIdentifier format: <globusDriverId>://<baseStorageIdentifier>//<relativePath>
- *   Storage location: <globusendpointId/basepath>/<relPath>
- * Internal
- *   StorageIdentifier format: <globusDriverId>://<baseStorageIdentifier>//<relativePath>
- *   Storage location: <globusEndpointId/basepath>/<dataset authority>/<dataset identifier>/<baseStorageIdentifier>
+ * Remote: StorageIdentifier format:
+ * <globusDriverId>://<baseStorageIdentifier>//<relativePath> Storage location:
+ * <globusendpointId/basepath>/<relPath> Internal StorageIdentifier format:
+ * <globusDriverId>://<baseStorageIdentifier> Storage location:
+ * <globusEndpointId/basepath>/<dataset authority>/<dataset
+ * identifier>/<baseStorageIdentifier>
  *
  * baseUrl: globus://<globusEndpointId/basePath>
- 
+ * 
  */
 public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAccessIO<T> {
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO");
 
-
-    private String globusAccessToken = null;
+    String globusAccessToken = null;
     /*
-     * If this is set to true, the store supports Globus transfer in and Dataverse/the globus app manage file locations, access controls, deletion, etc.
+     * If this is set to true, the store supports Globus transfer in and
+     * Dataverse/the globus app manage file locations, access controls, deletion,
+     * etc.
      */
-    private boolean isDataverseManaged = false;
+    private boolean dataverseManaged = false;
 
     public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
-        this.setIsLocalFile(false);
-        configureStores(req, driverId, null);
-        logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier());
-        path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
-        validatePath(path);
+        if (dvObject instanceof DataFile) {
+            globusAccessToken = retrieveGlobusAccessToken();
+        }
+        dataverseManaged = isDataverseManaged(this.driverId);
 
-        logger.fine("Relative path: " + path);
+        logger.info("GAT3: " + globusAccessToken);
     }
 
     public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException {
-        super(null, null, driverId);
-        this.setIsLocalFile(false);
-        configureStores(null, driverId, storageLocation);
+        this.driverId = driverId;
+        this.dataverseManaged = isDataverseManaged(this.driverId);
+        if (dataverseManaged) {
+            String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation);
+            path = parts[1];
+        } else {
+            this.setIsLocalFile(false);
+            configureStores(null, driverId, storageLocation);
+
+            path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
+            validatePath(path);
+            logger.fine("Relative path: " + path);
+        }
+//ToDo - only when needed?
+        globusAccessToken = retrieveGlobusAccessToken();
+
+    }
+
+    private String retrieveGlobusAccessToken() {
+        // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId);
+        String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token");
 
-        path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
-        validatePath(path);
-        logger.fine("Relative path: " + path);
+        AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
+        return accessToken.getOtherTokens().get(0).getAccessToken();
     }
 
     private void validatePath(String relPath) throws IOException {
@@ -114,6 +131,7 @@ private void validatePath(String relPath) throws IOException {
     // Call the Globus API to get the file size
     @Override
     long retrieveSize() {
+        logger.info("GAT2: " + globusAccessToken);
         // Construct Globus URL
         URI absoluteURI = null;
         try {
@@ -121,16 +139,16 @@ long retrieveSize() {
             String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3);
             int pathStart = endpointWithBasePath.indexOf("/");
             logger.info("endpointWithBasePath: " + endpointWithBasePath);
-            String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart+1) : "");
+            String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : "");
             logger.info("directoryPath: " + directoryPath);
 
-            if(isDataverseManaged) {
+            if (dataverseManaged && (dvObject!=null)) {
                 Dataset ds = ((DataFile) dvObject).getOwner();
                 directoryPath = directoryPath + "/" + ds.getAuthority() + "/" + ds.getIdentifier();
                 logger.info("directoryPath now: " + directoryPath);
 
             }
-            if(filenameStart > 0) {
+            if (filenameStart > 0) {
                 directoryPath = directoryPath + path.substring(0, filenameStart);
             }
             logger.info("directoryPath finally: " + directoryPath);
@@ -168,12 +186,15 @@ long retrieveSize() {
         return -1;
     }
 
-
-
-
+    
+    @Override
+    public InputStream getInputStream() throws IOException {
+        throw new IOException("Not implemented");
+    }
+    
     @Override
     public void delete() throws IOException {
-        
+
 // Fix
         // Delete is best-effort - we tell the remote server and it may or may not
         // implement this call
@@ -205,9 +226,6 @@ public void delete() throws IOException {
 
     }
 
-
-
-
     @Override
     public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName)
             throws IOException {
@@ -218,114 +236,37 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
             if (secretKey == null) {
                 return baseUrl + "/" + path;
             } else {
-                return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET",
-                        secretKey);
+                return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey);
             }
         } else {
             return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName);
         }
     }
 
-    private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
-        // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId);
-        String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token");
-        isDataverseManaged = Boolean.getBoolean("dataverse.files." + this.driverId + ".managed");
+    private static boolean isDataverseManaged(String driverId) {
+        return Boolean.getBoolean("dataverse.files." + driverId + ".managed");
+    }
 
-        AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
-        globusAccessToken = accessToken.getOtherTokens().get(0).getAccessToken();
-        // endpointWithBasePath = JvmSettings.BASE_URI.lookup(this.driverId);
-        baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url");
-        logger.info("base-url is " + baseUrl);
+    static boolean isValidIdentifier(String driverId, String storageId) {
+        String baseIdentifier = storageId.substring(storageId.lastIndexOf("//") + 2);
+        String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
         if (baseUrl == null) {
-            throw new IOException("dataverse.files." + this.driverId + ".base-url is required");
-        } else {
-            try {
-                new URI(baseUrl);
-            } catch (Exception e) {
-                logger.warning(
-                        "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage());
-                throw new IOException("Can't interpret base-url as a URI");
-            }
-
+            return false;
         }
-
-        if (baseStore == null) {
-            String baseDriverId = getBaseStoreIdFor(driverId);
-            String fullStorageLocation = null;
-            String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type",
-                    DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
-
-            if (dvObject instanceof Dataset) {
-                baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId);
+        // Internally managed endpoints require standard name pattern (submitted via
+        // /addFile(s) api)
+        if (isDataverseManaged(driverId)) {
+            boolean hasStandardName = usesStandardNamePattern(baseIdentifier);
+            if (hasStandardName) {
+                return true;
             } else {
-                if (this.getDvObject() != null) {
-                    fullStorageLocation = getStoragePath();
-
-                    // S3 expects <id>://<bucketname>/<key>
-                    switch (baseDriverType) {
-                    case DataAccess.S3:
-                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/"
-                                + fullStorageLocation;
-                        break;
-                    case DataAccess.FILE:
-                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files")
-                                + "/" + fullStorageLocation;
-                        break;
-                    default:
-                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
-                                + System.getProperty("dataverse.files." + baseDriverId + ".type"));
-                        throw new IOException("Not supported");
-                    }
-
-                } else if (storageLocation != null) {
-                    // <remoteDriverId>://<baseStorageIdentifier>//<baseUrlPath>
-                    // remoteDriverId:// is removed if coming through directStorageIO
-                    int index = storageLocation.indexOf(DataAccess.SEPARATOR);
-                    if (index > 0) {
-                        storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length());
-                    }
-                    // THe base store needs the baseStoreIdentifier and not the relative URL
-                    fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//"));
-
-                    switch (baseDriverType) {
-                    case DataAccess.S3:
-                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/"
-                                + fullStorageLocation;
-                        break;
-                    case DataAccess.FILE:
-                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files")
-                                + "/" + fullStorageLocation;
-                        break;
-                    default:
-                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
-                                + System.getProperty("dataverse.files." + baseDriverId + ".type"));
-                        throw new IOException("Not supported");
-                    }
-                }
-                baseStore = DataAccess.getDirectStorageIO(fullStorageLocation);
-            }
-            if (baseDriverType.contentEquals(DataAccess.S3)) {
-                ((S3AccessIO<?>) baseStore).setMainDriver(false);
+                logger.warning("Unacceptable identifier pattern in submitted identifier: " + baseIdentifier);
+                return false;
             }
         }
-        remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name");
+        // Remote endpoints require a valid URI within the baseUrl
         try {
-            remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url"));
-        } catch (MalformedURLException mfue) {
-            logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId);
-        }
-    }
-
-
-    protected static boolean isValidIdentifier(String driverId, String storageId) {
-        String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2);
-        String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
-        try {
-            URI absoluteURI = new URI(baseUrl + "/" + urlPath);
+            URI absoluteURI = new URI(baseUrl + "/" + baseIdentifier);
             if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
                 logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId);
                 return false;
@@ -338,7 +279,6 @@ protected static boolean isValidIdentifier(String driverId, String storageId) {
         return true;
     }
 
-
     public static void main(String[] args) {
         System.out.println("Running the main method");
         if (args.length > 0) {
@@ -347,15 +287,19 @@ public static void main(String[] args) {
         // System.setProperty("dataverse.files.globus.globus_client_id",
         // "2791b83e-b989-47c5-a7fa-ce65fd949522");
         System.setProperty("dataverse.files.globus.base-url", "globus://d8c42580-6528-4605-9ad8-116a61982644");
-        System.out.println("Valid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill"));
+        System.out.println("NotValid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill"));
+        System.out.println("ValidRemote: " + isValidIdentifier("globus", "globus://localid//of/the/hill"));
+        System.setProperty("dataverse.files.globus.managed", "true");
+
+        System.out.println("ValidLocal: " + isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42"));
         // System.setProperty("dataverse.files.globus.globus-token","Mjc5MWI4M2UtYjk4OS00N2M1LWE3ZmEtY2U2NWZkOTQ5NTIyOkt4ZEdndFVDUDVZZG5sRG4rRHEzaVMxTHBtTVRGNlB3RjlwWm9kRTBWNVE9");
         System.setProperty("dataverse.files.globus.globus-token",
                 "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9");
         System.setProperty("dataverse.files.globus.base-store", "file");
         System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
         System.setProperty("dataverse.files.file.directory", "/tmp/files");
-        logger.info(JvmSettings.BASE_URL.lookup("globus"));
-        logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus"));
+        // logger.info(JvmSettings.BASE_URL.lookup("globus"));
+        // logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus"));
 
         try {
             GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>(
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index 6b15bcf1dc8..a9653f2ab68 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -65,6 +65,8 @@ public class RemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
 
+    String globusAccessToken = null;
+    
     protected StorageIO<DvObject> baseStore = null;
     protected String path = null;
     protected String baseUrl = null;
@@ -79,6 +81,9 @@ public class RemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
     protected static boolean trustCerts = false;
     protected int httpConcurrency = 4;
 
+    public RemoteOverlayAccessIO() {
+    }
+    
     public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
         this.setIsLocalFile(false);
@@ -445,7 +450,7 @@ int getUrlExpirationMinutes() {
         return 60;
     }
 
-    private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
+    protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
         baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url");
         if (baseUrl == null) {
             throw new IOException("dataverse.files." + this.driverId + ".base-url is required");
@@ -616,7 +621,7 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc
 
     }
 
-    protected static boolean isValidIdentifier(String driverId, String storageId) {
+    static boolean isValidIdentifier(String driverId, String storageId) {
         String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2);
         String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
         try {
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
index bfd5c5f0d8f..333d72e09b2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -611,7 +611,7 @@ public static boolean isDirectUploadEnabled(String driverId) {
     
     //Check that storageIdentifier is consistent with store's config
     //False will prevent direct uploads
-    protected static boolean isValidIdentifier(String driverId, String storageId) {
+    static boolean isValidIdentifier(String driverId, String storageId) {
         return false;
     }
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 5c387710844..d98e1c9b7f5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -594,11 +594,10 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
 
         globusLogger.info("Starting an globusUpload ");
 
-        String datasetIdentifier = dataset.getStorageIdentifier();
-
+        
         // ToDo - use DataAccess methods?
-        String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3);
-        datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3);
+        //String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3);
+        //datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3);
 
         Thread.sleep(5000);
 
@@ -670,18 +669,26 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
                 JsonArray filesJsonArray = jsonObject.getJsonArray("files");
 
                 if (filesJsonArray != null) {
+                    String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage();
 
                     for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) {
 
                         // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from
                         // externalTool
                         String storageIdentifier = fileJsonObject.getString("storageIdentifier");
-                        String[] bits = storageIdentifier.split(":");
-                        String bucketName = bits[1].replace("/", "");
+                        String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier);
+                        String storeId = parts[0];
+                        //If this is an S3 store, we need to split out the bucket name
+                        String[] bits = parts[1].split(":");
+                        String bucketName = "";
+                        if(bits.length > 1) {
+                            bucketName = bits[0];
+                        }
                         String fileId = bits[bits.length - 1];
 
                         // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873
-                        String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId;
+                        //or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873
+                        String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId;
                         String fileName = fileJsonObject.getString("fileName");
 
                         inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName);
@@ -690,7 +697,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
                     // calculateMissingMetadataFields: checksum, mimetype
                     JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger);
                     JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files");
-
+logger.info("Size: " + newfilesJsonArray.size());
+logger.info("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0)));
                     JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder();
 
                     for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) {
@@ -699,15 +707,21 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
                         String storageIdentifier = fileJsonObject.getString("storageIdentifier");
                         String fileName = fileJsonObject.getString("fileName");
                         String directoryLabel = fileJsonObject.getString("directoryLabel");
-                        String[] bits = storageIdentifier.split(":");
+                        String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier);
+                        //If this is an S3 store, we need to split out the bucket name
+                        String[] bits = parts[1].split(":");
+                        String bucketName = "";
+                        if(bits.length > 1) {
+                            bucketName = bits[0];
+                        }
                         String fileId = bits[bits.length - 1];
-
+                        
                         List<JsonObject> newfileJsonObject = IntStream.range(0, newfilesJsonArray.size())
                                 .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId))
                                 .filter(Objects::nonNull).collect(Collectors.toList());
-
                         if (newfileJsonObject != null) {
-                            if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) {
+                            logger.info("List Size: " + newfileJsonObject.size());
+                            //if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) {
                                 JsonPatch path = Json.createPatchBuilder()
                                         .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build();
                                 fileJsonObject = path.apply(fileJsonObject);
@@ -716,11 +730,11 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
                                 fileJsonObject = path.apply(fileJsonObject);
                                 jsonDataSecondAPI.add(fileJsonObject);
                                 countSuccess++;
-                            } else {
-                                globusLogger.info(fileName
-                                        + " will be skipped from adding to dataset by second API due to missing values ");
-                                countError++;
-                            }
+                           // } else {
+                           //     globusLogger.info(fileName
+                           //             + " will be skipped from adding to dataset by second API due to missing values ");
+                           //     countError++;
+                           // }
                         } else {
                             globusLogger.info(fileName
                                     + " will be skipped from adding to dataset by second API due to missing values ");
@@ -1045,8 +1059,8 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger)
             } catch (IOException ioex) {
                 count = 3;
                 logger.info(ioex.getMessage());
-                globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath
-                        + ") does not appear to be an S3 object associated with driver: ");
+                globusLogger.info("DataFile (fullPAth " + fullPath
+                        + ") does not appear to be accessible withing Dataverse: ");
             } catch (Exception ex) {
                 count = count + 1;
                 ex.printStackTrace();

From 3d2255b963f869028b68576075462664f67a5888 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= <jerome.roucou@inrae.fr>
Date: Wed, 13 Sep 2023 18:35:40 +0200
Subject: [PATCH 044/546] Assign roles from email address Give a user a role
 from email address of the user's account

---
 .../iq/dataverse/authorization/users/AuthenticatedUser.java    | 3 ++-
 src/main/webapp/roles-assign.xhtml                             | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java
index 89429b912f6..17db9e63e8b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java
@@ -64,7 +64,8 @@
     @NamedQuery( name="AuthenticatedUser.filter",
                 query="select au from AuthenticatedUser au WHERE ("
                         + "LOWER(au.userIdentifier) like LOWER(:query) OR "
-                        + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query))"),
+                        + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query) or "
+                        + "lower(au.email) like lower(:query))"),
     @NamedQuery( name="AuthenticatedUser.findAdminUser",
                 query="select au from AuthenticatedUser au WHERE "
                         + "au.superuser = true "
diff --git a/src/main/webapp/roles-assign.xhtml b/src/main/webapp/roles-assign.xhtml
index 4b31f10dbfc..4b355c74d5c 100644
--- a/src/main/webapp/roles-assign.xhtml
+++ b/src/main/webapp/roles-assign.xhtml
@@ -31,7 +31,8 @@
                                         styleClass="DropdownPopup" panelStyleClass="DropdownPopupPanel"
                                         var="roleAssignee" itemLabel="#{roleAssignee.displayInfo.title}" itemValue="#{roleAssignee}" converter="roleAssigneeConverter">
                             <p:column>
-                                <h:outputText value="#{roleAssignee.displayInfo.title}"/> 
+                                <h:outputText value="#{roleAssignee.displayInfo.title}"/>
+                                <h:outputText value=" - #{roleAssignee.displayInfo.emailAddress}"/>
                                 <h:outputText value=" (#{roleAssignee.displayInfo.affiliation})" rendered="#{!empty roleAssignee.displayInfo.affiliation}"/>
                             </p:column>
                             <p:column>

From ae16dadddd7978dae23dd62671c05433db2f9300 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 18 Sep 2023 10:13:52 -0400
Subject: [PATCH 045/546] minor cleanup

---
 .../iq/dataverse/globus/GlobusServiceBean.java     | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 56219f843a7..9aae4dffc03 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -130,7 +130,7 @@ ArrayList<String> checkPermisions(AccessToken clientTokenUser, String directory,
 
         return ids;
     }
-
+/*
     public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm)
             throws MalformedURLException {
         if (directory != null && !directory.equals("")) {
@@ -163,8 +163,8 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin
             count++;
         }
     }
-
-    public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException {
+*/
+    public void deletePermission(String ruleId, Logger globusLogger) throws MalformedURLException {
 
         if (ruleId.length() > 0) {
             AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""));
@@ -554,7 +554,9 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df)
                         + rawStorageId + "&fileName=" + df.getCurrentName();
             }
         }
-        return tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix;
+        String finalUrl = tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix;
+        logger.info("Calling app: " + finalUrl);
+        return finalUrl;
     }
 
     public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) {
@@ -624,7 +626,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
         String taskStatus = getTaskStatus(task);
 
         if (ruleId.length() > 0) {
-            deletePermision(ruleId, globusLogger);
+            deletePermission(ruleId, globusLogger);
         }
 
         // If success, switch to an EditInProgress lock - do this before removing the
@@ -897,7 +899,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
         String taskStatus = getTaskStatus(task);
 
         if (ruleId.length() > 0) {
-            deletePermision(ruleId, globusLogger);
+            deletePermission(ruleId, globusLogger);
         }
 
         if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) {

From 9562b788b7dfbfec53d6d7e9aeb52e690cddddf4 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 18 Sep 2023 10:14:43 -0400
Subject: [PATCH 046/546] start allowupload method, fix messaging when disabled

---
 .../harvard/iq/dataverse/api/Datasets.java    | 63 ++++++++++++++++++-
 src/main/java/propertyFiles/Bundle.properties |  3 +
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 599890913fd..a999a71b2d4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3393,6 +3393,65 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
 
     }
 
+    /** Requests permissions for a given globus user to upload to the dataset
+     * 
+     * @param crc
+     * @param datasetId
+     * @param jsonData
+     * @return
+     * @throws IOException
+     * @throws ExecutionException
+     * @throws InterruptedException
+     */
+    @POST
+    @AuthRequired
+    @Path("{id}/allowGlobusUpload")
+    @Consumes(MediaType.APPLICATION_JSON)
+    public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody
+    ) throws IOException, ExecutionException, InterruptedException {
+
+
+        logger.info(" ====  (api allowGlobusUpload) jsonBody   ====== " + jsonBody);
+
+
+        if (!systemConfig.isGlobusUpload()) {
+            return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
+        }
+
+        // -------------------------------------
+        // (1) Get the user from the ContainerRequestContext
+        // -------------------------------------
+        User authUser;
+        authUser = getRequestUser(crc);
+
+        // -------------------------------------
+        // (2) Get the Dataset Id
+        // -------------------------------------
+        Dataset dataset;
+
+        try {
+            dataset = findDatasetOrDie(datasetId);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+
+        // Async Call
+        globusService.givePermission(jsonBody, jsonBody, jsonBody, null, datasetId, jsonBody).globusDownload(jsonData, dataset, authUser);
+
+        return ok("Async call to Globus Download started");
+
+    }
+
+    /** Monitors a globus download and removes permissions on the dir/dataset when done
+     * 
+     * @param crc
+     * @param datasetId
+     * @param jsonData
+     * @return
+     * @throws IOException
+     * @throws ExecutionException
+     * @throws InterruptedException
+     */
     @POST
     @AuthRequired
     @Path("{id}/deleteglobusRule")
@@ -3404,8 +3463,8 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara
         logger.info(" ====  (api deleteglobusRule) jsonData   ====== " + jsonData);
 
 
-        if (!systemConfig.isHTTPUpload()) {
-            return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled"));
+        if (!systemConfig.isGlobusDownload()) {
+            return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
         }
 
         // -------------------------------------
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 997f0470cc3..0343e109e61 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -2623,6 +2623,9 @@ datasets.api.privateurl.anonymized.error.released=Can't create a URL for anonymi
 datasets.api.creationdate=Date Created
 datasets.api.modificationdate=Last Modified Date
 datasets.api.curationstatus=Curation Status
+datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this installation of Dataverse.
+datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this installation of Dataverse.
+
 
 
 #Dataverses.java

From c6197b3bf23ad1dccb023ea668799e7a79805d93 Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Mon, 18 Sep 2023 10:40:05 -0400
Subject: [PATCH 047/546] #9920 support Postgres 16

---
 pom.xml                      | 4 ++--
 scripts/installer/install.py | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index 7ba22d2a076..c5b7fc302f3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
         <packaging.type>war</packaging.type>
         
         <reload4j.version>1.2.18.4</reload4j.version>
-        <flyway.version>9.21.2</flyway.version>
+        <flyway.version>9.22.1</flyway.version>
         <jhove.version>1.20.1</jhove.version>
         <jacoco.version>0.8.7</jacoco.version>
         <poi.version>5.2.1</poi.version>
@@ -790,7 +790,7 @@
                 <skipIntegrationTests>true</skipIntegrationTests>
                 <!-- Once we truly run tests with Testcontainers, this should be switch to "docker", activating ITs -->
                 <packaging.type>docker-build</packaging.type>
-                <postgresql.server.version>13</postgresql.server.version>
+                <postgresql.server.version>16</postgresql.server.version>
             
                 <app.image>gdcc/dataverse:${app.image.tag}</app.image>
                 <app.image.tag>unstable</app.image.tag>
diff --git a/scripts/installer/install.py b/scripts/installer/install.py
index 5a7b9f75696..18995695638 100644
--- a/scripts/installer/install.py
+++ b/scripts/installer/install.py
@@ -422,9 +422,13 @@
    conn.close()
 
    if int(pg_major_version) >= 15:
+      admin_conn_string = "dbname='"+pgDb+"' user='postgres' password='"+pgAdminPassword+"' host='"+pgHost+"'"
+      conn = psycopg2.connect(admin_conn_string)
+      conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+      cur = conn.cursor()
       conn_cmd = "GRANT CREATE ON SCHEMA public TO "+pgUser+";"
-      print("PostgreSQL 15 or higher detected. Running " + conn_cmd)
       try:
+         print("PostgreSQL 15 or higher detected. Running " + conn_cmd)
          cur.execute(conn_cmd)
       except:
          if force:

From 116845c753a8364d14bad2edafcebf6a0e28dde6 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 19 Sep 2023 15:09:11 -0400
Subject: [PATCH 048/546] refactoring, add allowUpload api call

---
 .../harvard/iq/dataverse/api/Datasets.java    |   7 +-
 .../dataaccess/GlobusOverlayAccessIO.java     |   2 +-
 .../iq/dataverse/globus/GlobusEndpoint.java   |  31 ++++++
 .../dataverse/globus/GlobusServiceBean.java   | 104 ++++++++++++------
 4 files changed, 109 insertions(+), 35 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index a999a71b2d4..745f294fee6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3434,11 +3434,14 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar
         } catch (WrappedResponse wr) {
             return wr.getResponse();
         }
+        
+        JsonObject params = JsonUtil.getJsonObject(jsonBody);
+        String principal = params.getString("principal");
 
         // Async Call
-        globusService.givePermission(jsonBody, jsonBody, jsonBody, null, datasetId, jsonBody).globusDownload(jsonData, dataset, authUser);
+        globusService.givePermission("identity", principal, "rw", dataset);
 
-        return ok("Async call to Globus Download started");
+        return ok("Permission Granted");
 
     }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index b18e6bb7e76..965dc3c0947 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -214,7 +214,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
         }
     }
 
-    private static boolean isDataverseManaged(String driverId) {
+    public static boolean isDataverseManaged(String driverId) {
         return Boolean.getBoolean("dataverse.files." + driverId + ".managed");
     }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java
new file mode 100644
index 00000000000..d1e5d19a592
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java
@@ -0,0 +1,31 @@
+package edu.harvard.iq.dataverse.globus;
+
+public class GlobusEndpoint {
+
+    private String id;
+    private String clientToken;
+    private String basePath;
+ 
+    
+    public GlobusEndpoint(String id, String clientToken, String basePath) {
+        
+    }
+        public String getId() {
+        return id;
+    }
+        public void setId(String id) {
+        this.id = id;
+    }
+        public String getClientToken() {
+        return clientToken;
+    }
+        public void setClientToken(String clientToken) {
+        this.clientToken = clientToken;
+    }
+        public String getBasePath() {
+        return basePath;
+    }
+        public void setBasePath(String basePath) {
+        this.basePath = basePath;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 9aae4dffc03..910ee796e0e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -46,6 +46,7 @@
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.authorization.users.User;
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.FileUtil;
@@ -106,23 +107,23 @@ public void setUserTransferToken(String userTransferToken) {
         this.userTransferToken = userTransferToken;
     }
 
-    ArrayList<String> checkPermisions(AccessToken clientTokenUser, String directory, String globusEndpoint,
-            String principalType, String principal) throws MalformedURLException {
-        URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list");
+    private ArrayList<String> checkPermissions(GlobusEndpoint endpoint, String principalType, String principal) throws MalformedURLException {
+       
+        URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list");
         MakeRequestResponse result = makeRequest(url, "Bearer",
-                clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null);
+                endpoint.getClientToken(), "GET", null);
         ArrayList<String> ids = new ArrayList<String>();
         if (result.status == 200) {
             AccessList al = parseJson(result.jsonResponse, AccessList.class, false);
 
             for (int i = 0; i < al.getDATA().size(); i++) {
                 Permissions pr = al.getDATA().get(i);
-                if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory))
+                if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath()))
                         && pr.getPrincipalType().equals(principalType)
                         && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) {
                     ids.add(pr.getId());
                 } else {
-                    logger.info(pr.getPath() + " === " + directory + " == " + pr.getPrincipalType());
+                    logger.info(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType());
                     continue;
                 }
             }
@@ -185,24 +186,24 @@ public void deletePermission(String ruleId, Logger globusLogger) throws Malforme
 
     }
 
-    public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser,
-            String directory, String globusEndpoint) throws MalformedURLException {
+    public int givePermission(String principalType, String principal, String perm, Dataset dataset) throws MalformedURLException {
 
-        ArrayList<?> rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, principal);
+        GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+        ArrayList<?> rules = checkPermissions(endpoint, principalType, principal);
 
         Permissions permissions = new Permissions();
         permissions.setDATA_TYPE("access");
         permissions.setPrincipalType(principalType);
         permissions.setPrincipal(principal);
-        permissions.setPath(directory + "/");
+        permissions.setPath(endpoint.getBasePath() + "/");
         permissions.setPermissions(perm);
 
         Gson gson = new GsonBuilder().create();
         MakeRequestResponse result = null;
         if (rules.size() == 0) {
             logger.info("Start creating the rule");
-            URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access");
-            result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST",
+            URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access");
+            result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST",
                     gson.toJson(permissions));
 
             if (result.status == 400) {
@@ -214,9 +215,9 @@ public int givePermission(String principalType, String principal, String perm, A
             return result.status;
         } else {
             logger.info("Start Updating the rule");
-            URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/"
+            URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access/"
                     + rules.get(0));
-            result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT",
+            result = makeRequest(url, "Bearer", endpoint.getClientToken(), "PUT",
                     gson.toJson(permissions));
 
             if (result.status == 400) {
@@ -438,36 +439,25 @@ static class MakeRequestResponse {
 
     }
 
-    private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint)
+    private MakeRequestResponse findDirectory(String directory, String clientToken, String globusEndpoint)
             throws MalformedURLException {
         URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path="
                 + directory + "/");
 
         MakeRequestResponse result = makeRequest(url, "Bearer",
-                clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null);
+                clientToken, "GET", null);
         logger.info("find directory status:" + result.status);
 
         return result;
     }
 
-    public boolean giveGlobusPublicPermissions(String datasetId)
+    public boolean giveGlobusPublicPermissions(Dataset dataset)
             throws UnsupportedEncodingException, MalformedURLException {
 
-        String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, "");
-        String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "");
-        if (globusEndpoint.equals("") || globusBasicToken.equals("")) {
-            return false;
-        }
-        AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""));
-        if (clientTokenUser == null) {
-            logger.severe("Cannot get client token ");
-            return false;
-        }
+        GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
 
-        String directory = getDirectory(datasetId);
-        logger.info(directory);
 
-        MakeRequestResponse status = findDirectory(directory, clientTokenUser, globusEndpoint);
+        MakeRequestResponse status = findDirectory(endpoint.getBasePath(), endpoint.getClientToken(), endpoint.getId());
 
         if (status.status == 200) {
 
@@ -485,8 +475,7 @@ public boolean giveGlobusPublicPermissions(String datasetId)
              * 201) { logger.info("Cannot get permission for " + file.getName()); } } } }
              */
 
-            int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory,
-                    globusEndpoint);
+            int perStatus = givePermission("all_authenticated_users", "", "r", dataset);
             logger.info("givePermission status " + perStatus);
             if (perStatus == 409) {
                 logger.info("Permissions already exist or limit was reached");
@@ -1287,4 +1276,55 @@ public String calculatemime(String fileName) throws InterruptedException {
      * updatePermision(clientTokenUser, directory, "identity", "r"); return true; }
      * 
      */
+    
+    GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
+        Dataset dataset = null;
+        if (dvObject instanceof Dataset) {
+            dataset = (Dataset) dvObject;
+        } else if (dvObject instanceof DataFile) {
+            dataset = (Dataset) dvObject.getOwner();
+        } else {
+            throw new IllegalArgumentException("Unsupported DvObject type: " + dvObject.getClass().getName());
+        }
+        String driverId = dataset.getEffectiveStorageDriverId();
+        GlobusEndpoint endpoint = null;
+        String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
+
+        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3);
+        int pathStart = endpointWithBasePath.indexOf("/");
+        logger.info("endpointWithBasePath: " + endpointWithBasePath);
+        String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : "");
+        logger.info("directoryPath: " + directoryPath);
+
+        if (GlobusOverlayAccessIO.isDataverseManaged(driverId) && (dataset!=null)) {
+            directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage();
+            logger.info("directoryPath now: " + directoryPath);
+
+        } else {
+            //remote store - may have path in file storageidentifier
+            String relPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
+            int filenameStart = relPath.lastIndexOf("/") + 1;
+            if (filenameStart > 0) {
+                directoryPath = directoryPath + relPath.substring(0, filenameStart);
+            }
+        }
+        logger.info("directoryPath finally: " + directoryPath);
+        
+        String endpointId = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
+        
+        logger.info("endpointId: " + endpointId);
+        
+        String globusToken = System.getProperty("dataverse.files." + driverId + ".globus-token");
+
+        AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
+        String clientToken = accessToken.getOtherTokens().get(0).getAccessToken();
+
+        endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath);
+
+        return endpoint;
+    }
+    
+    private static boolean isDataverseManaged(String driverId) {
+        return Boolean.getBoolean("dataverse.files." + driverId + ".managed");
+    }
 }

From c0dacb50fb117f01639b22bae6b404c6cc71596b Mon Sep 17 00:00:00 2001
From: Ludovic DANIEL <ludovic.daniel@smile.fr>
Date: Wed, 20 Sep 2023 16:59:25 +0200
Subject: [PATCH 049/546] #9940 - fixed various issues with generated urls of
 authors for signposting

---
 .../dataverse/util/SignpostingResources.java  | 30 ++++++++++++-------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java
index 2c9b7167059..19e1c1298ae 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java
@@ -19,6 +19,8 @@ Two configurable options allow changing the limit for the number of authors or d
 import jakarta.json.Json;
 import jakarta.json.JsonArrayBuilder;
 import jakarta.json.JsonObjectBuilder;
+import org.apache.commons.validator.routines.UrlValidator;
+
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
@@ -164,12 +166,11 @@ private List<String> getAuthorURLs(boolean limit) {
         for (DatasetAuthor da : workingDatasetVersion.getDatasetAuthors()) {
             logger.fine(String.format("idtype: %s; idvalue: %s, affiliation: %s; identifierUrl: %s", da.getIdType(),
                     da.getIdValue(), da.getAffiliation(), da.getIdentifierAsUrl()));
-            String authorURL = "";
-            authorURL = getAuthorUrl(da);
+            String authorURL = getAuthorUrl(da);
             if (authorURL != null && !authorURL.isBlank()) {
                 // return empty if number of visible author more than max allowed
                 // >= since we're comparing before incrementing visibleAuthorCounter
-                if (visibleAuthorCounter >= maxAuthors) {
+                if (limit && visibleAuthorCounter >= maxAuthors) {
                     authorURLs.clear();
                     break;
                 }
@@ -211,15 +212,22 @@ private String getAuthorsAsString(List<String> datasetAuthorURLs) {
      * 
      */
     private String getAuthorUrl(DatasetAuthor da) {
-        String authorURL = "";
-        //If no type and there's a value, assume it is a URL (is this reasonable?)
-        //Otherise, get the URL using the type and value
-        if (da.getIdType() != null && !da.getIdType().isBlank() && da.getIdValue()!=null) {
-            authorURL = da.getIdValue();
-        } else {
-            authorURL = da.getIdentifierAsUrl();
+
+        final String identifierAsUrl = da.getIdentifierAsUrl();
+        // First, try to get URL using the type and value
+        if(identifierAsUrl != null) {
+            return identifierAsUrl;
         }
-        return authorURL;
+
+        final String idValue = da.getIdValue();
+        UrlValidator urlValidator = new UrlValidator(new String[]{"http", "https"});
+        // Otherwise, try to use idValue as url if it's valid
+        if(urlValidator.isValid(idValue)) {
+            return idValue;
+        }
+
+        // No url found
+        return null;
     }
 
     private JsonArrayBuilder getJsonAuthors(List<String> datasetAuthorURLs) {

From 9d846d2455e820cc9312863079086c66b0799c7a Mon Sep 17 00:00:00 2001
From: Vera Clemens <clemens@zbmed.de>
Date: Tue, 26 Sep 2023 09:13:13 +0200
Subject: [PATCH 050/546] fix: require ManageDatasetPermissions for listing
 role assignments on datasets

---
 .../engine/command/impl/ListRoleAssignments.java    | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java
index 1858ba377ab..b619d32cc7e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java
@@ -6,16 +6,18 @@
 import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
 import edu.harvard.iq.dataverse.engine.command.CommandContext;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
-import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Collections;
 
 /**
  * 
  * @author michael
  */
-@RequiredPermissions( Permission.ManageDataversePermissions )
+// no annotations here, since permissions are dynamically decided
 public class ListRoleAssignments extends AbstractCommand<List<RoleAssignment>> {
 	
 	private final DvObject definitionPoint;
@@ -34,5 +36,12 @@ public List<RoleAssignment> execute(CommandContext ctxt) throws CommandException
             }
 		return ctxt.permissions().assignmentsOn(definitionPoint);
 	}
+
+	@Override
+	public Map<String, Set<Permission>> getRequiredPermissions() {
+		return Collections.singletonMap("",
+				definitionPoint.isInstanceofDataset() ? Collections.singleton(Permission.ManageDatasetPermissions)
+						: Collections.singleton(Permission.ManageDataversePermissions));
+	}
 	
 }

From 41e363e343861f6b416e6add60e60778f697cce0 Mon Sep 17 00:00:00 2001
From: Vera Clemens <clemens@zbmed.de>
Date: Tue, 26 Sep 2023 09:13:36 +0200
Subject: [PATCH 051/546] test: require ManageDatasetPermissions for listing
 role assignments on datasets

---
 scripts/api/data/role-contributor-plus.json   | 12 +++
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 87 +++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 scripts/api/data/role-contributor-plus.json

diff --git a/scripts/api/data/role-contributor-plus.json b/scripts/api/data/role-contributor-plus.json
new file mode 100644
index 00000000000..ef9ba3aaff6
--- /dev/null
+++ b/scripts/api/data/role-contributor-plus.json
@@ -0,0 +1,12 @@
+{
+    "alias":"contributorPlus",
+    "name":"ContributorPlus",
+    "description":"For datasets, a person who can edit License + Terms, then submit them for review, and add collaborators.",
+    "permissions":[
+        "ViewUnpublishedDataset",
+        "EditDataset",
+        "DownloadFile",
+        "DeleteDatasetDraft",
+        "ManageDatasetPermissions"
+    ]
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 3b6d4d1ecdf..b51d400d2d4 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -1296,6 +1296,93 @@ public void testAddRoles(){
         
     }
 
+    @Test
+    public void testListRoleAssignments() {
+        Response createAdminUser = UtilIT.createRandomUser();
+        String adminUsername = UtilIT.getUsernameFromResponse(createAdminUser);
+        String adminApiToken = UtilIT.getApiTokenFromResponse(createAdminUser);
+        UtilIT.makeSuperUser(adminUsername);
+
+        Response createDataverseResponse = UtilIT.createRandomDataverse(adminApiToken);
+        createDataverseResponse.prettyPrint();
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        // Now, let's allow anyone with a Dataverse account (any "random user")
+        // to create datasets in this dataverse:
+
+        Response grantRole = UtilIT.grantRoleOnDataverse(dataverseAlias, DataverseRole.DS_CONTRIBUTOR, AuthenticatedUsers.get().getIdentifier(), adminApiToken);
+        grantRole.prettyPrint();
+        assertEquals(OK.getStatusCode(), grantRole.getStatusCode());
+
+        Response createContributorUser = UtilIT.createRandomUser();
+        String contributorUsername = UtilIT.getUsernameFromResponse(createContributorUser);
+        String contributorApiToken = UtilIT.getApiTokenFromResponse(createContributorUser);
+
+        // First, we test listing role assignments on a dataverse which requires "ManageDataversePermissions"
+
+        Response notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken);
+        assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode());
+
+        Response roleAssignmentsOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, adminApiToken);
+        roleAssignmentsOnDataverse.prettyPrint();
+        assertEquals(OK.getStatusCode(), roleAssignmentsOnDataverse.getStatusCode());
+
+        // Second, we test listing role assignments on a dataset which requires "ManageDatasetPermissions"
+
+        Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, contributorApiToken);
+        createDatasetResponse.prettyPrint();
+        Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
+        logger.info("dataset id: " + datasetId);
+
+        Response datasetAsJson = UtilIT.nativeGet(datasetId, adminApiToken);
+        datasetAsJson.then().assertThat()
+                .statusCode(OK.getStatusCode());
+
+        String identifier = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier");
+        assertEquals(10, identifier.length());
+
+        String protocol1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.protocol");
+        String authority1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.authority");
+        String identifier1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier");
+        String datasetPersistentId = protocol1 + ":" + authority1 + "/" + identifier1;
+
+        Response notPermittedToListRoleAssignmentOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken);
+        assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataset.getStatusCode());
+
+        // We create a new role that includes "ManageDatasetPermissions" which are required for listing role assignments
+        // of a dataset and assign it to the contributor user
+
+        String pathToJsonFile = "scripts/api/data/role-contributor-plus.json";
+        Response addDataverseRoleResponse = UtilIT.addDataverseRole(pathToJsonFile, dataverseAlias, adminApiToken);
+        addDataverseRoleResponse.prettyPrint();
+        String body = addDataverseRoleResponse.getBody().asString();
+        String status = JsonPath.from(body).getString("status");
+        assertEquals("OK", status);
+
+        Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "contributorPlus", "@" + contributorUsername, adminApiToken);
+        giveRandoPermission.prettyPrint();
+        assertEquals(200, giveRandoPermission.getStatusCode());
+
+        // Contributor user should now be able to list dataset role assignments as well
+
+        Response roleAssignmentsOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken);
+        roleAssignmentsOnDataset.prettyPrint();
+        assertEquals(OK.getStatusCode(), roleAssignmentsOnDataset.getStatusCode());
+
+        // ...but not dataverse role assignments
+
+        notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken);
+        assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode());
+
+        // Finally, we clean up and delete the role we created
+
+        Response deleteDataverseRoleResponse = UtilIT.deleteDataverseRole("contributorPlus", adminApiToken);
+        deleteDataverseRoleResponse.prettyPrint();
+        body = deleteDataverseRoleResponse.getBody().asString();
+        status = JsonPath.from(body).getString("status");
+        assertEquals("OK", status);
+    }
+
     @Test
     public void testFileChecksum() {
 

From a8883981daa5d84d4553150804fe59942886d069 Mon Sep 17 00:00:00 2001
From: Eryk Kulikowski <eryk.kulikowski@kuleuven.be>
Date: Thu, 28 Sep 2023 13:36:19 +0200
Subject: [PATCH 052/546] always_add_validity_field_to_solr_doc

---
 .../edu/harvard/iq/dataverse/search/IndexServiceBean.java     | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
index d6d0be7a17b..04bc824c4b1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
@@ -811,9 +811,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
             version.setDatasetFields(version.initDatasetFields());
             valid = version.isValid();
         }
-        if (JvmSettings.API_ALLOW_INCOMPLETE_METADATA.lookupOptional(Boolean.class).orElse(false)) {
-            solrInputDocument.addField(SearchFields.DATASET_VALID, valid);
-        }
+        solrInputDocument.addField(SearchFields.DATASET_VALID, valid);
 
         final Dataverse dataverse = dataset.getDataverseContext();
         final String dvIndexableCategoryName = dataverse.getIndexableCategoryName();

From 794045fbc0f43f0cebccc9ebb4f4b234a6c2215a Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 3 Oct 2023 17:03:38 -0400
Subject: [PATCH 053/546] update auth checks and err handling

---
 .../harvard/iq/dataverse/api/Datasets.java    | 41 +++++++++++++++----
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 110bfcc1553..25839544ce9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -65,6 +65,7 @@
 import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
 import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO;
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO;
 import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
 import edu.harvard.iq.dataverse.dataaccess.S3AccessIO;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
@@ -3429,8 +3430,12 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar
         // -------------------------------------
         // (1) Get the user from the ContainerRequestContext
         // -------------------------------------
-        User authUser;
-        authUser = getRequestUser(crc);
+        AuthenticatedUser authUser;
+        try {
+            authUser = getRequestAuthenticatedUserOrDie(crc);
+        } catch (WrappedResponse e) {
+            return e.getResponse();
+        }
 
         // -------------------------------------
         // (2) Get the Dataset Id
@@ -3442,14 +3447,32 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar
         } catch (WrappedResponse wr) {
             return wr.getResponse();
         }
-        
-        JsonObject params = JsonUtil.getJsonObject(jsonBody);
-        String principal = params.getString("principal");
 
-        // Async Call
-        globusService.givePermission("identity", principal, "rw", dataset);
-
-        return ok("Permission Granted");
+        if(!GlobusOverlayAccessIO.isDataverseManaged(dataset.getEffectiveStorageDriverId())) {
+            return badRequest("This dataset does not have managed Globus storage");
+        }
+            
+        if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset)
+                .canIssue(UpdateDatasetVersionCommand.class)) {
+
+            JsonObject params = JsonUtil.getJsonObject(jsonBody);
+            String principal = params.getString("principal");
+
+            // Async Call
+            int status = globusService.givePermission("identity", principal, "rw", dataset);
+            switch (status) {
+            case 201:
+                return ok("Permission Granted");
+            case 400:
+                return badRequest("Unable to grant permission");
+            case 409:
+                return conflict("Permission already exists");
+            default:
+                return error(null, "Unexpected error when granting permission");
+            }
+        } else {
+            return forbidden("User doesn't have permission to upload to this dataset");
+        }
 
     }
 

From c724094dcfffaa83c61f415d572e2e5a8958cef0 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 3 Oct 2023 17:03:57 -0400
Subject: [PATCH 054/546] fix constructor, reformat

---
 .../iq/dataverse/globus/GlobusEndpoint.java   | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java
index d1e5d19a592..7e555935e2e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java
@@ -5,27 +5,34 @@ public class GlobusEndpoint {
     private String id;
     private String clientToken;
     private String basePath;
- 
-    
+
     public GlobusEndpoint(String id, String clientToken, String basePath) {
-        
+        this.id = id;
+        this.clientToken = clientToken;
+        this.basePath = basePath;
     }
-        public String getId() {
+
+    public String getId() {
         return id;
     }
-        public void setId(String id) {
+
+    public void setId(String id) {
         this.id = id;
     }
-        public String getClientToken() {
+
+    public String getClientToken() {
         return clientToken;
     }
-        public void setClientToken(String clientToken) {
+
+    public void setClientToken(String clientToken) {
         this.clientToken = clientToken;
     }
-        public String getBasePath() {
+
+    public String getBasePath() {
         return basePath;
     }
-        public void setBasePath(String basePath) {
+
+    public void setBasePath(String basePath) {
         this.basePath = basePath;
     }
 }
\ No newline at end of file

From ed87e0640788278b5af838ba98efd72413d2586d Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 3 Oct 2023 17:04:51 -0400
Subject: [PATCH 055/546] start to monitor access rule changes

---
 .../dataverse/globus/GlobusServiceBean.java   | 25 ++++++++++++++++---
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 910ee796e0e..ad20b90971b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -206,10 +206,21 @@ public int givePermission(String principalType, String principal, String perm, D
             result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST",
                     gson.toJson(permissions));
 
-            if (result.status == 400) {
+            switch (result.status) {
+            case 400:
+
                 logger.severe("Path " + permissions.getPath() + " is not valid");
-            } else if (result.status == 409) {
+                break;
+            case 409:
                 logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules");
+                break;
+            case 201:
+                JsonObject response = JsonUtil.getJsonObject(result.jsonResponse);
+                if (response != null && response.containsKey("access_id")) {
+                    permissions.setId(response.getString("access_id"));
+                    monitorTemporaryPermissions(permissions, endpoint);
+                    logger.info("Access rule " + permissions.getId() + " was created successfully");
+                }
             }
 
             return result.status;
@@ -226,9 +237,13 @@ public int givePermission(String principalType, String principal, String perm, D
                 logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules");
             }
             logger.info("Result status " + result.status);
+            return result.status;
         }
+    }
 
-        return result.status;
+    private void monitorTemporaryPermissions(Permissions permissions, GlobusEndpoint endpoint) {
+        // TODO Auto-generated method stub
+        
     }
 
     public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException {
@@ -324,6 +339,7 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a
             // Basic
             // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9
             logger.info(authType + " " + authCode);
+            logger.info("For URL: " + url.toString());
             connection.setRequestProperty("Authorization", authType + " " + authCode);
             // connection.setRequestProperty("Content-Type",
             // "application/x-www-form-urlencoded");
@@ -333,6 +349,7 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a
                 connection.setRequestProperty("Accept", "application/json");
                 logger.info(jsonString);
                 connection.setDoOutput(true);
+
                 OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream());
                 wr.write(jsonString);
                 wr.flush();
@@ -1318,7 +1335,7 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
 
         AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
         String clientToken = accessToken.getOtherTokens().get(0).getAccessToken();
-
+logger.info("clientToken: " + clientToken);
         endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath);
 
         return endpoint;

From 6f464bc4697e5b1aee280d4d963c644ca7a80dc4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20ROUCOU?= <jerome.roucou@inrae.fr>
Date: Wed, 4 Oct 2023 17:58:06 +0200
Subject: [PATCH 056/546] Revert print email on modal

---
 src/main/webapp/roles-assign.xhtml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/main/webapp/roles-assign.xhtml b/src/main/webapp/roles-assign.xhtml
index 4b355c74d5c..93b9862c55d 100644
--- a/src/main/webapp/roles-assign.xhtml
+++ b/src/main/webapp/roles-assign.xhtml
@@ -32,7 +32,6 @@
                                         var="roleAssignee" itemLabel="#{roleAssignee.displayInfo.title}" itemValue="#{roleAssignee}" converter="roleAssigneeConverter">
                             <p:column>
                                 <h:outputText value="#{roleAssignee.displayInfo.title}"/>
-                                <h:outputText value=" - #{roleAssignee.displayInfo.emailAddress}"/>
                                 <h:outputText value=" (#{roleAssignee.displayInfo.affiliation})" rendered="#{!empty roleAssignee.displayInfo.affiliation}"/>
                             </p:column>
                             <p:column>

From 4c67f2a636699d51589fa815511ce4e1b3dc9d1f Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 6 Oct 2023 12:13:19 -0400
Subject: [PATCH 057/546] remove inefficient bucket check

---
 .../iq/dataverse/dataaccess/S3AccessIO.java       | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index 822ada0b83e..22216ee5c2b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -108,14 +108,13 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) {
             if(!StringUtil.isEmpty(proxy)&&StringUtil.isEmpty(endpoint)) {
                 logger.severe(driverId + " config error: Must specify a custom-endpoint-url if proxy-url is specified");
             }
-            //Not sure this is needed but moving it from the open method for now since it definitely doesn't need to run every time an object is opened.
-            try {
-                if (bucketName == null || !s3.doesBucketExistV2(bucketName)) {
-                    throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets.");
-                }
-            } catch (SdkClientException sce) {
-                throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage());
-            }
+
+            // FWIW: There used to be a check here to see if the bucket exists.
+            // It was very redundant (checking every time we access any file) and didn't do
+            // much but potentially make the failure (in the unlikely case a bucket doesn't
+            // exist/just disappeared) happen slightly earlier (here versus at the first
+            // file/metadata access).
+                    
         } catch (Exception e) {
             throw new AmazonClientException(
                         "Cannot instantiate a S3 client; check your AWS credentials and region",

From 90dfa42c9090ce9e4cf9dab1e8ed57776137a077 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 11 Oct 2023 16:41:38 -0400
Subject: [PATCH 058/546] Redesigned provider mechanism

---
 .../iq/dataverse/dataaccess/S3AccessIO.java   | 71 ++++++++++++++-----
 1 file changed, 52 insertions(+), 19 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index 22216ee5c2b..ee04bbcb853 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -4,6 +4,7 @@
 import com.amazonaws.ClientConfiguration;
 import com.amazonaws.HttpMethod;
 import com.amazonaws.SdkClientException;
+import com.amazonaws.auth.AWSCredentialsProvider;
 import com.amazonaws.auth.AWSCredentialsProviderChain;
 import com.amazonaws.auth.AWSStaticCredentialsProvider;
 import com.amazonaws.auth.BasicAWSCredentials;
@@ -57,9 +58,11 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Optional;
 import java.util.Random;
 import java.util.function.Predicate;
 import java.util.logging.Logger;
@@ -1180,29 +1183,59 @@ private static AmazonS3 getClient(String driverId) {
             // Boolean is inverted, otherwise setting dataverse.files.<id>.chunked-encoding=false would result in leaving Chunked Encoding enabled
             s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding);
 
-            /**
-             * Pass in a string value if this storage driver should use a non-default AWS S3 profile.
-             * The default is "default" which should work when only one profile exists.
+            /** Configure credentials for the S3 client. There are multiple mechanisms available. 
+             * Role-based/instance credentials are globally defined while the other mechanisms (profile, static)
+             * are defined per store. The logic below assures that 
+             * * if a store specific profile or static credentials are explicitly set, they will be used in preference to the global role-based credentials. 
+             * * if a store specific role-based credentials are explicitly set, they will be used in preference to the global instance credentials,
+             * * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and 
+             * * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found.
              */
-            String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default");
-            ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile);
-    
-            // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env
-            // vars or system properties to provide these, but use the secrets config source provided by Payara.
-            AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(
-                new BasicAWSCredentials(
-                    config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class).orElse(""),
-                    config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class).orElse("")
-                ));
-            
-            //Add role-based provider as in the default provider chain
-            InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance();
+            ArrayList<AWSCredentialsProvider> providers = new ArrayList<>();
+
+            String s3profile = System.getProperty("dataverse.files." + driverId + ".profile");
+            boolean allowInstanceCredentials = true;
+            // Assume that instance credentials should not be used if the profile is
+            // actually set for this store or if static creds are provided (below).
+            if (s3profile != null) {
+                allowInstanceCredentials = false;
+            }
+            // Try to retrieve credentials via Microprofile Config API, too. For production
+            // use, you should not use env vars or system properties to provide these, but 
+            // use the secrets config source provided by Payara.
+            Optional<String> accessKey = config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class);
+            Optional<String> secretKey = config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class);
+            if (accessKey.isPresent() && secretKey.isPresent()) {
+                allowInstanceCredentials = false;
+                AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(
+                        new BasicAWSCredentials(
+                                accessKey.orElse(""),
+                                secretKey.orElse("")));
+                providers.add(staticCredentials);
+            } else if (s3profile == null) {
+                //Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred).
+                s3profile = "default";
+            }
+            if (s3profile != null) {
+                ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile);
+                providers.add(profileCredentials);
+            }
+
+            if (allowInstanceCredentials) {
+                // Add role-based provider as in the default provider chain
+                InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance();
+                providers.add(instanceCredentials);
+            }
             // Add all providers to chain - the first working provider will be used
-            // (role-based is first in the default cred provider chain, so we're just
+            // (role-based is first in the default cred provider chain (if no profile or
+            // static creds are explicitly set for the store), so we're just
             // reproducing that, then profile, then static credentials as the fallback)
-            AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(instanceCredentials, profileCredentials, staticCredentials);
+
+            // As the order is the reverse of how we added providers, we reverse the list here
+            Collections.reverse(providers);
+            AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(providers);
             s3CB.setCredentials(providerChain);
-            
+
             // let's build the client :-)
             AmazonS3 client =  s3CB.build();
             driverClientMap.put(driverId,  client);

From dcca52566958fba3f58698766f9696723fcebfc0 Mon Sep 17 00:00:00 2001
From: qqmyers <jim.myers@computer.org>
Date: Thu, 12 Oct 2023 09:28:42 -0400
Subject: [PATCH 059/546] Good cleanup

Co-authored-by: Oliver Bertuch <poikilotherm@users.noreply.github.com>
---
 .../harvard/iq/dataverse/dataaccess/S3AccessIO.java    | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index ee04bbcb853..a66686ac648 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -1209,22 +1209,20 @@ private static AmazonS3 getClient(String driverId) {
                 allowInstanceCredentials = false;
                 AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(
                         new BasicAWSCredentials(
-                                accessKey.orElse(""),
-                                secretKey.orElse("")));
+                                accessKey.get(),
+                                secretKey.get()));
                 providers.add(staticCredentials);
             } else if (s3profile == null) {
                 //Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred).
                 s3profile = "default";
             }
             if (s3profile != null) {
-                ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile);
-                providers.add(profileCredentials);
+                providers.add(new ProfileCredentialsProvider(s3profile));
             }
 
             if (allowInstanceCredentials) {
                 // Add role-based provider as in the default provider chain
-                InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance();
-                providers.add(instanceCredentials);
+                providers.add(InstanceProfileCredentialsProvider.getInstance());
             }
             // Add all providers to chain - the first working provider will be used
             // (role-based is first in the default cred provider chain (if no profile or

From 4ad95697405512c16ec42b1d242ce620aec2436a Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 16 Oct 2023 16:32:13 -0400
Subject: [PATCH 060/546] partial changes for permission mgmt, etc.

---
 .../harvard/iq/dataverse/api/Datasets.java    |  33 ++-
 .../dataverse/globus/GlobusServiceBean.java   | 218 +++++++++++-------
 .../iq/dataverse/settings/JvmSettings.java    |   1 +
 3 files changed, 155 insertions(+), 97 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 25839544ce9..d3ea1b80696 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3363,6 +3363,15 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
         } catch (WrappedResponse wr) {
             return wr.getResponse();
         }
+        
+        JsonObject jsonObject = null;
+        try {
+            jsonObject = JsonUtil.getJsonObject(jsonData);
+        } catch (Exception ex) {
+            logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage());
+            return badRequest("Error parsing json body");
+
+        }
 
         //------------------------------------
         // (2b) Make sure dataset does not have package file
@@ -3396,7 +3405,7 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
         String requestUrl = SystemConfig.getDataverseSiteUrlStatic();
         
         // Async Call
-        globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser);
+        globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser);
 
         return ok("Async call to Globus Upload started ");
 
@@ -3414,9 +3423,10 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
      */
     @POST
     @AuthRequired
-    @Path("{id}/allowGlobusUpload")
+    @Path("{id}/requestGlobusTransferPaths")
     @Consumes(MediaType.APPLICATION_JSON)
-    public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody
+    @Produces(MediaType.APPLICATION_JSON)
+    public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody
     ) throws IOException, ExecutionException, InterruptedException {
 
 
@@ -3454,15 +3464,18 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar
             
         if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset)
                 .canIssue(UpdateDatasetVersionCommand.class)) {
-
+            try {
             JsonObject params = JsonUtil.getJsonObject(jsonBody);
             String principal = params.getString("principal");
+            int numberOfPaths = params.getInt("numberOfFiles");
+            if(numberOfPaths <=0) {
+                return badRequest("numberOfFiles must be positive");
+            }
 
-            // Async Call
-            int status = globusService.givePermission("identity", principal, "rw", dataset);
-            switch (status) {
+            JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths);
+            switch (response.getInt("status")) {
             case 201:
-                return ok("Permission Granted");
+                return ok(response.getJsonArray("paths"));
             case 400:
                 return badRequest("Unable to grant permission");
             case 409:
@@ -3470,6 +3483,10 @@ public Response allowGlobusUpload(@Context ContainerRequestContext crc, @PathPar
             default:
                 return error(null, "Unexpected error when granting permission");
             }
+            } catch (NullPointerException|ClassCastException e) {
+                return badRequest("Error retrieving principal and numberOfFiles from JSON request body");
+                
+            }
         } else {
             return forbidden("User doesn't have permission to upload to this dataset");
         }
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index ad20b90971b..49572519696 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -1,7 +1,11 @@
 package edu.harvard.iq.dataverse.globus;
 
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
 import com.google.gson.FieldNamingPolicy;
 import com.google.gson.GsonBuilder;
+import com.nimbusds.oauth2.sdk.pkce.CodeVerifier;
+
 import edu.harvard.iq.dataverse.*;
 
 import jakarta.ejb.Asynchronous;
@@ -15,7 +19,9 @@
 import jakarta.json.JsonArray;
 import jakarta.json.JsonArrayBuilder;
 import jakarta.json.JsonObject;
+import jakarta.json.JsonObjectBuilder;
 import jakarta.json.JsonPatch;
+import jakarta.json.JsonValue;
 import jakarta.servlet.http.HttpServletRequest;
 
 import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json;
@@ -29,6 +35,8 @@
 import java.net.URLEncoder;
 import java.sql.Timestamp;
 import java.text.SimpleDateFormat;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
 import java.util.*;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
@@ -48,6 +56,7 @@
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -107,8 +116,10 @@ public void setUserTransferToken(String userTransferToken) {
         this.userTransferToken = userTransferToken;
     }
 
-    private ArrayList<String> checkPermissions(GlobusEndpoint endpoint, String principalType, String principal) throws MalformedURLException {
+    private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions) throws MalformedURLException {
        
+        String principalType="identity";
+        
         URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list");
         MakeRequestResponse result = makeRequest(url, "Bearer",
                 endpoint.getClientToken(), "GET", null);
@@ -118,20 +129,22 @@ private ArrayList<String> checkPermissions(GlobusEndpoint endpoint, String princ
 
             for (int i = 0; i < al.getDATA().size(); i++) {
                 Permissions pr = al.getDATA().get(i);
+                
                 if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath()))
                         && pr.getPrincipalType().equals(principalType)
-                        && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) {
-                    ids.add(pr.getId());
+                        && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))
+                        &&pr.getPermissions().equals(permissions)) {
+                    return pr.getId();
                 } else {
-                    logger.info(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType());
+                    logger.fine(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType());
                     continue;
                 }
             }
         }
-
-        return ids;
+        return null;
     }
-/*
+
+    /*
     public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm)
             throws MalformedURLException {
         if (directory != null && !directory.equals("")) {
@@ -165,47 +178,71 @@ public void updatePermision(AccessToken clientTokenUser, String directory, Strin
         }
     }
 */
-    public void deletePermission(String ruleId, Logger globusLogger) throws MalformedURLException {
-
-        if (ruleId.length() > 0) {
-            AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""));
-
-            globusLogger.info("Start deleting permissions.");
-            String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, "");
-
-            URL url = new URL(
-                    "https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId);
-            MakeRequestResponse result = makeRequest(url, "Bearer",
-                    clientTokenUser.getOtherTokens().get(0).getAccessToken(), "DELETE", null);
-            if (result.status != 200) {
-                globusLogger.warning("Cannot delete access rule " + ruleId);
-            } else {
-                globusLogger.info("Access rule " + ruleId + " was deleted successfully");
+    
+/** Call to delete a globus rule related to the specified dataset.
+ * 
+ * @param ruleId - Globus rule id - assumed to be associated with the dataset's file path (should not be called with a user specified rule id w/o further checking)
+ * @param datasetId - the id of the dataset associated with the rule
+ * @param globusLogger - a separate logger instance, may be null
+ */
+public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) {
+
+    if (ruleId.length() > 0) {
+        if (dataset != null) {
+            GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+            if (endpoint != null) {
+                String accessToken = endpoint.getClientToken();
+                if (globusLogger != null) {
+                    globusLogger.info("Start deleting permissions.");
+                }
+                try {
+                    URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId()
+                            + "/access/" + ruleId);
+                    MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "DELETE", null);
+                    if (result.status != 200) {
+                        if (globusLogger != null) {
+                            globusLogger.warning("Cannot delete access rule " + ruleId);
+                        } else {
+                            // When removed due to a cache ejection, we don't have a globusLogger
+                            logger.warning("Cannot delete access rule " + ruleId);
+                        }
+                    } else {
+                        if (globusLogger != null) {
+                            globusLogger.info("Access rule " + ruleId + " was deleted successfully");
+                        }
+                    }
+                } catch (MalformedURLException ex) {
+                    logger.log(Level.WARNING,
+                            "Failed to delete access rule " + ruleId + " on endpoint " + endpoint.getId(), ex);
+                }
             }
         }
-
     }
+}
 
-    public int givePermission(String principalType, String principal, String perm, Dataset dataset) throws MalformedURLException {
+    public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) {
 
         GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
-        ArrayList<?> rules = checkPermissions(endpoint, principalType, principal);
+        String principalType= "identity";
 
         Permissions permissions = new Permissions();
         permissions.setDATA_TYPE("access");
         permissions.setPrincipalType(principalType);
         permissions.setPrincipal(principal);
         permissions.setPath(endpoint.getBasePath() + "/");
-        permissions.setPermissions(perm);
+        permissions.setPermissions("rw");
 
         Gson gson = new GsonBuilder().create();
         MakeRequestResponse result = null;
-        if (rules.size() == 0) {
             logger.info("Start creating the rule");
+            JsonObjectBuilder response = Json.createObjectBuilder();
+
+            try {
             URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access");
             result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST",
                     gson.toJson(permissions));
 
+            response.add("status", result.status);
             switch (result.status) {
             case 400:
 
@@ -215,35 +252,50 @@ public int givePermission(String principalType, String principal, String perm, D
                 logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules");
                 break;
             case 201:
-                JsonObject response = JsonUtil.getJsonObject(result.jsonResponse);
-                if (response != null && response.containsKey("access_id")) {
-                    permissions.setId(response.getString("access_id"));
-                    monitorTemporaryPermissions(permissions, endpoint);
+                JsonObject globusResponse = JsonUtil.getJsonObject(result.jsonResponse);
+                if (globusResponse != null && globusResponse.containsKey("access_id")) {
+                    permissions.setId(globusResponse.getString("access_id"));
+                    monitorTemporaryPermissions(permissions.getId(), dataset.getId());
                     logger.info("Access rule " + permissions.getId() + " was created successfully");
+                    JsonArrayBuilder pathArray = Json.createArrayBuilder();
+                    for(int i=0;i<numberOfPaths;i++) {
+                        pathArray.add(getUniqueFilePath(endpoint));
+                    
+                    }
+                    response.add("paths", pathArray.build());
+                    
+                } else {
+                    //Shouldn't happen!
+                    logger.warning("Access rule id not returned for dataset " + dataset.getId());
                 }
             }
-
-            return result.status;
-        } else {
-            logger.info("Start Updating the rule");
-            URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access/"
-                    + rules.get(0));
-            result = makeRequest(url, "Bearer", endpoint.getClientToken(), "PUT",
-                    gson.toJson(permissions));
-
-            if (result.status == 400) {
-                logger.severe("Path " + permissions.getPath() + " is not valid");
-            } else if (result.status == 409) {
-                logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules");
+            } catch (MalformedURLException ex) {
+                //Misconfiguration
+                logger.warning("Failed to create access rule URL for " + endpoint.getId());
+                response.add("status", 500);
             }
-            logger.info("Result status " + result.status);
-            return result.status;
-        }
+            return response.build();
     }
 
-    private void monitorTemporaryPermissions(Permissions permissions, GlobusEndpoint endpoint) {
-        // TODO Auto-generated method stub
-        
+    private String getUniqueFilePath(GlobusEndpoint endpoint) {
+        // TODO See if generated identifier exists at globus endpoint
+        return endpoint.getBasePath() + "/" + FileUtil.generateStorageIdentifier();
+    }
+
+    //Single cache of open rules/permission requests
+    private final Cache<String, Long> rulesCache = Caffeine.newBuilder()
+            .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
+            .removalListener((ruleId, datasetId, cause) -> {
+                //Delete rules that expire
+                Dataset dataset = datasetSvc.find(datasetId);
+                deletePermission((String) ruleId, dataset, null);
+              })
+            
+            .build();
+    
+    
+    private void monitorTemporaryPermissions(String ruleId, long datasetId) {
+        rulesCache.put(ruleId, datasetId);
     }
 
     public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException {
@@ -468,6 +520,7 @@ private MakeRequestResponse findDirectory(String directory, String clientToken,
         return result;
     }
 
+    /*
     public boolean giveGlobusPublicPermissions(Dataset dataset)
             throws UnsupportedEncodingException, MalformedURLException {
 
@@ -478,20 +531,6 @@ public boolean giveGlobusPublicPermissions(Dataset dataset)
 
         if (status.status == 200) {
 
-            /*
-             * FilesList fl = parseJson(status.jsonResponse, FilesList.class, false);
-             * ArrayList<FileG> files = fl.getDATA(); if (files != null) { for (FileG file:
-             * files) { if (!file.getName().contains("cached") &&
-             * !file.getName().contains(".thumb")) { int perStatus =
-             * givePermission("all_authenticated_users", "", "r", clientTokenUser, directory
-             * + "/" + file.getName(), globusEndpoint); logger.info("givePermission status "
-             * + perStatus + " for " + file.getName()); if (perStatus == 409) {
-             * logger.info("Permissions already exist or limit was reached for " +
-             * file.getName()); } else if (perStatus == 400) {
-             * logger.info("No file in Globus " + file.getName()); } else if (perStatus !=
-             * 201) { logger.info("Cannot get permission for " + file.getName()); } } } }
-             */
-
             int perStatus = givePermission("all_authenticated_users", "", "r", dataset);
             logger.info("givePermission status " + perStatus);
             if (perStatus == 409) {
@@ -512,7 +551,8 @@ public boolean giveGlobusPublicPermissions(Dataset dataset)
 
         return true;
     }
-
+*/
+    
     // Generates the URL to launch the Globus app
     public String getGlobusAppUrlForDataset(Dataset d) {
         return getGlobusAppUrlForDataset(d, true, null);
@@ -572,7 +612,7 @@ public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) {
     
     @Asynchronous
     @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
-    public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl,
+    public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl,
             AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException {
 
         Integer countAll = 0;
@@ -606,33 +646,33 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
         // ToDo - use DataAccess methods?
         //String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3);
         //datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3);
+        
+        logger.fine("json: " + JsonUtil.prettyPrint(jsonData));
 
-        Thread.sleep(5000);
-
-        JsonObject jsonObject = null;
-        try (StringReader rdr = new StringReader(jsonData)) {
-            jsonObject = Json.createReader(rdr).readObject();
-        } catch (Exception jpe) {
-            jpe.printStackTrace();
-            logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}");
-        }
-        logger.info("json: " + JsonUtil.prettyPrint(jsonObject));
-
-        String taskIdentifier = jsonObject.getString("taskIdentifier");
+        String taskIdentifier = jsonData.getString("taskIdentifier");
 
-        String ruleId = "";
-        try {
-            ruleId = jsonObject.getString("ruleId");
-        } catch (NullPointerException npe) {
-            logger.warning("NPE for jsonData object");
-        }
+        String ruleId = null;
 
+        Thread.sleep(5000);
+        
         // globus task status check
         GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger);
         String taskStatus = getTaskStatus(task);
 
-        if (ruleId.length() > 0) {
-            deletePermission(ruleId, globusLogger);
+        GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+        
+        ruleId = getRuleId(endpoint, task.getOwner_id(), "rw");
+        if(ruleId!=null) {
+            Long datasetId = rulesCache.getIfPresent(ruleId);
+            if(datasetId!=null) {
+             
+            //Will delete rule
+            rulesCache.invalidate(ruleId);
+            } else {
+                //The cache already expired this rule, in which case it's delay not long enough, or we have some other problem
+                logger.warning("Rule " + ruleId + " not found in rulesCache");
+                deletePermission(ruleId, dataset, globusLogger);
+            }
         }
 
         // If success, switch to an EditInProgress lock - do this before removing the
@@ -674,7 +714,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
                 //
 
                 List<String> inputList = new ArrayList<String>();
-                JsonArray filesJsonArray = jsonObject.getJsonArray("files");
+                JsonArray filesJsonArray = jsonData.getJsonArray("files");
 
                 if (filesJsonArray != null) {
                     String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage();
@@ -905,7 +945,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
         String taskStatus = getTaskStatus(task);
 
         if (ruleId.length() > 0) {
-            deletePermission(ruleId, globusLogger);
+            deletePermission(ruleId, dataset, globusLogger);
         }
 
         if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index b4807372b69..f8abe505dca 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -48,6 +48,7 @@ public enum JvmSettings {
     SCOPE_FILES(PREFIX, "files"),
     FILES_DIRECTORY(SCOPE_FILES, "directory"),
     GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"),
+    GLOBUS_RULES_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"),
     FILES(SCOPE_FILES),
     BASE_URL(FILES, "base-url"),
     GLOBUS_TOKEN(FILES, "globus-token"),

From 30395309689949a3fc633e3be5fa4c30cc1f27cd Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 16 Oct 2023 16:33:02 -0400
Subject: [PATCH 061/546] check driver type not id

---
 .../java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
index f4cc7d40120..3bc83538679 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -606,7 +606,7 @@ public static String getDriverPrefix(String driverId) {
     }
     
     public static boolean isDirectUploadEnabled(String driverId) {
-        return (DataAccess.S3.equals(driverId) && Boolean.parseBoolean(System.getProperty("dataverse.files." + DataAccess.S3 + ".upload-redirect"))) ||
+        return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) ||
             Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band"));
     }
     

From 48144a24cb200e285b5419ab29865293eac17e54 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 16 Oct 2023 17:00:51 -0400
Subject: [PATCH 062/546] adding extra logic to skip things like facets and
 highlights in searches, unless specifically requested. (#9635)

---
 .../search/SearchIncludeFragment.java         |  91 ++++--
 .../dataverse/search/SearchServiceBean.java   | 308 +++++++++++-------
 2 files changed, 249 insertions(+), 150 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 2ce06541afa..1e42958fe4e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -120,7 +120,6 @@ public class SearchIncludeFragment implements java.io.Serializable {
     private Long facetCountDatasets = 0L;
     private Long facetCountFiles = 0L;
     Map<String, Long> previewCountbyType = new HashMap<>();
-    private SolrQueryResponse solrQueryResponseAllTypes;
     private String sortField;
     private SortOrder sortOrder;
     private String currentSort;
@@ -132,6 +131,7 @@ public class SearchIncludeFragment implements java.io.Serializable {
     Map<String, String> datasetfieldFriendlyNamesBySolrField = new HashMap<>();
     Map<String, String> staticSolrFieldFriendlyNamesBySolrField = new HashMap<>();
     private boolean solrIsDown = false;
+    private boolean solrIsOverloaded = false; 
     private Map<String, Integer> numberOfFacets = new HashMap<>();
 //    private boolean showUnpublished;
     List<String> filterQueriesDebug = new ArrayList<>();
@@ -279,6 +279,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
 
 
         SolrQueryResponse solrQueryResponse = null;
+        SolrQueryResponse solrQueryResponseSecondPass = null;
 
         List<String> filterQueriesFinal = new ArrayList<>();
         
@@ -311,18 +312,11 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
         String[] parts = selectedTypesString.split(":");
         selectedTypesList.addAll(Arrays.asList(parts));
 
-        List<String> filterQueriesFinalAllTypes = new ArrayList<>();
-        String[] arr = selectedTypesList.toArray(new String[selectedTypesList.size()]);
-        selectedTypesHumanReadable = combine(arr, " OR ");
-        if (!selectedTypesHumanReadable.isEmpty()) {
-            typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")";
-        }
+        
         
         filterQueriesFinal.addAll(filterQueries);
-        filterQueriesFinalAllTypes.addAll(filterQueriesFinal); 
 
-        String allTypesFilterQuery = SearchFields.TYPE + ":(dataverses OR datasets OR files)";
-        filterQueriesFinalAllTypes.add(allTypesFilterQuery);
+        
         filterQueriesFinal.add(typeFilterQuery);
 
         if (page <= 1) {
@@ -363,10 +357,60 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
             // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. 
             // (why exactly do we need it, again?)
             // To get the counts we display in the types facets particulary for unselected types - SEK 08/25/2021
-            solrQueryResponseAllTypes = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalAllTypes, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null);
-            if (solrQueryResponse.hasError()){
-                logger.info(solrQueryResponse.getError());
-                setSolrErrorEncountered(true);
+            // Sure, but we should not waste resources here. We will try to save 
+            // solr some extra work and a) only run this second query IF there is 
+            // one or more unselected type facets; and b) drop all the extra 
+            // parameters from this second query - such as facets and highlights -
+            // that we do not actually need for the purposes of finding these 
+            // extra numbers. -- L.A. 10/16/2023
+            
+            // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560
+            previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), -1L);
+            previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), -1L);
+            previewCountbyType.put(BundleUtil.getStringFromBundle("files"), -1L);
+            
+            
+            // This will populate the type facet counts for the types that are 
+            // currently selected on the collection page:
+            for (FacetCategory facetCategory : solrQueryResponse.getTypeFacetCategories()) {
+                for (FacetLabel facetLabel : facetCategory.getFacetLabel()) {
+                    previewCountbyType.put(facetLabel.getName(), facetLabel.getCount());
+                }
+            }
+            
+            if (selectedTypesList.size() < 3) {
+                // If some types are NOT currently selected, we will need to 
+                // run another query to obtain the numbers of the unselected types:
+                
+                List<String> filterQueriesFinalSecondPass = new ArrayList<>();
+                filterQueriesFinalSecondPass.addAll(filterQueriesFinal);
+                
+                List<String> selectedTypesListSecondPass = new ArrayList<>();
+                
+                for (String dvObjectType : previewCountbyType.keySet()) {
+                    if (previewCountbyType.get(dvObjectType) == -1) {
+                        selectedTypesListSecondPass.add(dvObjectType);
+                    }
+                }
+                
+                String[] arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]);
+                filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")");
+                
+                if (solrQueryResponseSecondPass != null) {
+
+                    solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null);
+                    if (solrQueryResponseSecondPass.hasError()) {
+                        logger.info(solrQueryResponse.getError());
+                        setSolrErrorEncountered(true);
+                    }
+
+                    // And now populate the remaining type facets:
+                    for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) {
+                        for (FacetLabel facetLabel : facetCategory.getFacetLabel()) {
+                            previewCountbyType.put(facetLabel.getName(), facetLabel.getCount());
+                        }
+                    }
+                }
             }
             
         } catch (SearchException ex) {
@@ -446,17 +490,6 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 }
             }
 
-            // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560
-            previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), 0L);
-            previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), 0L);
-            previewCountbyType.put(BundleUtil.getStringFromBundle("files"), 0L);
-            if (solrQueryResponseAllTypes != null) {
-                for (FacetCategory facetCategory : solrQueryResponseAllTypes.getTypeFacetCategories()) {
-                    for (FacetLabel facetLabel : facetCategory.getFacetLabel()) {
-                        previewCountbyType.put(facetLabel.getName(), facetLabel.getCount());
-                    }
-                }
-            }
             
             setDisplayCardValues();
             
@@ -1020,6 +1053,14 @@ public boolean isSolrIsDown() {
     public void setSolrIsDown(boolean solrIsDown) {
         this.solrIsDown = solrIsDown;
     }
+    
+    public boolean isSolrOverloaded() {
+        return solrIsOverloaded;
+    }
+    
+    public void setSolrIsOverloaded(boolean solrIsOverloaded) {
+        this.solrIsOverloaded = solrIsOverloaded; 
+    }
 
     public boolean isRootDv() {
         return rootDv;
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
index 44976d232c2..aa2948eb8cb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
@@ -101,7 +101,7 @@ public class SearchServiceBean {
     public SolrQueryResponse search(DataverseRequest dataverseRequest, List<Dataverse> dataverses, String query, List<String> filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException {
         return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null);
     }
-
+    
     /**
      * Import note: "onlyDatatRelatedToMe" relies on filterQueries for providing
      * access to Private Data for the correct user
@@ -122,6 +122,41 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List<Datavers
      * @param retrieveEntities - look up dvobject entities with .find() (potentially expensive!)
      * @param geoPoint e.g. "35,15"
      * @param geoRadius e.g. "5"
+
+     * @return
+     * @throws SearchException
+     */
+    public SolrQueryResponse search(
+            DataverseRequest dataverseRequest, 
+            List<Dataverse> dataverses, 
+            String query, 
+            List<String> filterQueries, 
+            String sortField, 
+            String sortOrder, 
+            int paginationStart, 
+            boolean onlyDatatRelatedToMe, 
+            int numResultsPerPage,
+            boolean retrieveEntities,
+            String geoPoint,
+            String geoRadius) throws SearchException {
+        return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null, true, true);
+    }
+
+    /**
+     * @param dataverseRequest
+     * @param dataverses
+     * @param query
+     * @param filterQueries
+     * @param sortField
+     * @param sortOrder
+     * @param paginationStart
+     * @param onlyDatatRelatedToMe
+     * @param numResultsPerPage
+     * @param retrieveEntities - look up dvobject entities with .find() (potentially expensive!)
+     * @param geoPoint e.g. "35,15"
+     * @param geoRadius e.g. "5"
+     * @param addFacets boolean
+     * @param addHighlights boolean
      * @return
      * @throws SearchException
      */
@@ -136,7 +171,9 @@ public SolrQueryResponse search(
             int numResultsPerPage,
             boolean retrieveEntities,
             String geoPoint,
-            String geoRadius
+            String geoRadius,
+            boolean addFacets,
+            boolean addHighlights
     ) throws SearchException {
 
         if (paginationStart < 0) {
@@ -157,56 +194,62 @@ public SolrQueryResponse search(
 //            solrQuery.setSort(sortClause);
 //        }
 //        solrQuery.setSort(sortClause);
-        solrQuery.setHighlight(true).setHighlightSnippets(1);
-        Integer fragSize = systemConfig.getSearchHighlightFragmentSize();
-        if (fragSize != null) {
-            solrQuery.setHighlightFragsize(fragSize);
-        }
-        solrQuery.setHighlightSimplePre("<span class=\"search-term-match\">");
-        solrQuery.setHighlightSimplePost("</span>");
+
+        List<DatasetFieldType> datasetFields = datasetFieldService.findAllOrderedById();
         Map<String, String> solrFieldsToHightlightOnMap = new HashMap<>();
-        // TODO: Do not hard code "Name" etc as English here.
-        solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name");
-        solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation");
-        solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type");
-        solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description");
-        solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name");
-        solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label");
-        solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion"));
-        solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions"));
-        solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion"));
-        solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse"));
-        solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes"));
-
-        solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type");
-        solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year");
-        solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId"));
-        solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId"));
-        /**
-         * @todo Dataverse subject and affiliation should be highlighted but
-         * this is commented out right now because the "friendly" names are not
-         * being shown on the dataverse cards. See also
-         * https://github.com/IQSS/dataverse/issues/1431
-         */
+        if (addHighlights) {
+            solrQuery.setHighlight(true).setHighlightSnippets(1);
+            Integer fragSize = systemConfig.getSearchHighlightFragmentSize();
+            if (fragSize != null) {
+                solrQuery.setHighlightFragsize(fragSize);
+            }
+            solrQuery.setHighlightSimplePre("<span class=\"search-term-match\">");
+            solrQuery.setHighlightSimplePost("</span>");
+
+            // TODO: Do not hard code "Name" etc as English here.
+            solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name");
+            solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation");
+            solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type");
+            solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description");
+            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name");
+            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label");
+            solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion"));
+            solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions"));
+            solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion"));
+            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse"));
+            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes"));
+
+            solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type");
+            solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year");
+            solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId"));
+            solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId"));
+            /**
+             * @todo Dataverse subject and affiliation should be highlighted but
+             * this is commented out right now because the "friendly" names are
+             * not being shown on the dataverse cards. See also
+             * https://github.com/IQSS/dataverse/issues/1431
+             */
 //        solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject");
 //        solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation");
-        /**
-         * @todo: show highlight on file card?
-         * https://redmine.hmdc.harvard.edu/issues/3848
-         */
-        solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension");
-        solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag");
-        List<DatasetFieldType> datasetFields = datasetFieldService.findAllOrderedById();
-        for (DatasetFieldType datasetFieldType : datasetFields) {
-            String solrField = datasetFieldType.getSolrField().getNameSearchable();
-            String displayName = datasetFieldType.getDisplayName();
-            solrFieldsToHightlightOnMap.put(solrField, displayName);
-        }
-        for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
-            String solrField = entry.getKey();
-            // String displayName = entry.getValue();
-            solrQuery.addHighlightField(solrField);
+            /**
+             * @todo: show highlight on file card?
+             * https://redmine.hmdc.harvard.edu/issues/3848
+             */
+            solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension");
+            solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag");
+
+            for (DatasetFieldType datasetFieldType : datasetFields) {
+                String solrField = datasetFieldType.getSolrField().getNameSearchable();
+                String displayName = datasetFieldType.getDisplayName();
+                solrFieldsToHightlightOnMap.put(solrField, displayName);
+            }
+            for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
+                String solrField = entry.getKey();
+                // String displayName = entry.getValue();
+                solrQuery.addHighlightField(solrField);
+            }
         }
+        
         solrQuery.setParam("fl", "*,score");
         solrQuery.setParam("qt", "/select");
         solrQuery.setParam("facet", "true");
@@ -214,6 +257,8 @@ public SolrQueryResponse search(
          * @todo: do we need facet.query?
          */
         solrQuery.setParam("facet.query", "*");
+        solrQuery.addFacetField(SearchFields.TYPE); // this one is always performed
+
         for (String filterQuery : filterQueries) {
             solrQuery.addFilterQuery(filterQuery);
         }
@@ -223,70 +268,73 @@ public SolrQueryResponse search(
             // See https://solr.apache.org/guide/8_11/spatial-search.html#bbox
             solrQuery.addFilterQuery("{!bbox sfield=" + SearchFields.GEOLOCATION + "}");
         }
+        
+        List<DataverseMetadataBlockFacet> metadataBlockFacets = new LinkedList<>();
 
-        // -----------------------------------
-        // Facets to Retrieve
-        // -----------------------------------
-        solrQuery.addFacetField(SearchFields.METADATA_TYPES);
-//        solrQuery.addFacetField(SearchFields.HOST_DATAVERSE);
-//        solrQuery.addFacetField(SearchFields.AUTHOR_STRING);
-        solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY);
-        solrQuery.addFacetField(SearchFields.METADATA_SOURCE);
-//        solrQuery.addFacetField(SearchFields.AFFILIATION);
-        solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR);
-//        solrQuery.addFacetField(SearchFields.CATEGORY);
-//        solrQuery.addFacetField(SearchFields.FILE_TYPE_MIME);
-//        solrQuery.addFacetField(SearchFields.DISTRIBUTOR);
-//        solrQuery.addFacetField(SearchFields.KEYWORD);
-        /**
-         * @todo when a new method on datasetFieldService is available
-         * (retrieveFacetsByDataverse?) only show the facets that the dataverse
-         * in question wants to show (and in the right order):
-         * https://redmine.hmdc.harvard.edu/issues/3490
-         *
-         * also, findAll only returns advancedSearchField = true... we should
-         * probably introduce the "isFacetable" boolean rather than caring about
-         * if advancedSearchField is true or false
-         *
-         */
+        if (addFacets) {
+            // -----------------------------------
+            // Facets to Retrieve
+            // -----------------------------------
+            solrQuery.addFacetField(SearchFields.METADATA_TYPES);
+            solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY);
+            solrQuery.addFacetField(SearchFields.METADATA_SOURCE);
+            solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR);
+            /**
+             * @todo when a new method on datasetFieldService is available
+             * (retrieveFacetsByDataverse?) only show the facets that the
+             * dataverse in question wants to show (and in the right order):
+             * https://redmine.hmdc.harvard.edu/issues/3490
+             *
+             * also, findAll only returns advancedSearchField = true... we
+             * should probably introduce the "isFacetable" boolean rather than
+             * caring about if advancedSearchField is true or false
+             *
+             */
 
-        List<DataverseMetadataBlockFacet> metadataBlockFacets = new LinkedList<>();
+            if (dataverses != null) {
+                for (Dataverse dataverse : dataverses) {
+                    if (dataverse != null) {
+                        for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) {
+                            DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType();
+                            solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable());
+                        }
+                        // Get all metadata block facets configured to be displayed
+                        metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets());
+                    }
+                }
+            }
+            
+            solrQuery.addFacetField(SearchFields.FILE_TYPE);
+            /**
+            * @todo: hide the extra line this shows in the GUI... at least it's
+            * last...
+            */
+            solrQuery.addFacetField(SearchFields.FILE_TAG);
+            if (!systemConfig.isPublicInstall()) {
+                solrQuery.addFacetField(SearchFields.ACCESS);
+            }
+        }
+
+        
         //I'm not sure if just adding null here is good for hte permissions system... i think it needs something
         if(dataverses != null) {
             for(Dataverse dataverse : dataverses) {
                 // -----------------------------------
                 // PERMISSION FILTER QUERY
                 // -----------------------------------
-                String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe);
+                String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe, addFacets);
                 if (permissionFilterQuery != null) {
                     solrQuery.addFilterQuery(permissionFilterQuery);
                 }
-                if (dataverse != null) {
-                    for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) {
-                        DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType();
-                        solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable());
-                    }
-                    // Get all metadata block facets configured to be displayed
-                    metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets());
-                }
             }
         } else {
-            String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe);
+            String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe, addFacets);
             if (permissionFilterQuery != null) {
                 solrQuery.addFilterQuery(permissionFilterQuery);
             }
         }
 
-        solrQuery.addFacetField(SearchFields.FILE_TYPE);
-        /**
-         * @todo: hide the extra line this shows in the GUI... at least it's
-         * last...
-         */
-        solrQuery.addFacetField(SearchFields.TYPE);
-        solrQuery.addFacetField(SearchFields.FILE_TAG);
-        if (!systemConfig.isPublicInstall()) {
-            solrQuery.addFacetField(SearchFields.ACCESS);
-        }
+        
         /**
          * @todo: do sanity checking... throw error if negative
          */
@@ -416,34 +464,44 @@ public SolrQueryResponse search(
             Boolean datasetValid = (Boolean) solrDocument.getFieldValue(SearchFields.DATASET_VALID);
             
             List<String> matchedFields = new ArrayList<>();
-            List<Highlight> highlights = new ArrayList<>();
-            Map<SolrField, Highlight> highlightsMap = new HashMap<>();
-            Map<SolrField, List<String>> highlightsMap2 = new HashMap<>();
-            Map<String, Highlight> highlightsMap3 = new HashMap<>();
-            if (queryResponse.getHighlighting().get(id) != null) {
-                for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
-                    String field = entry.getKey();
-                    String displayName = entry.getValue();
-
-                    List<String> highlightSnippets = queryResponse.getHighlighting().get(id).get(field);
-                    if (highlightSnippets != null) {
-                        matchedFields.add(field);
-                        /**
-                         * @todo only SolrField.SolrType.STRING? that's not
-                         * right... knit the SolrField object more into the
-                         * highlighting stuff
-                         */
-                        SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true);
-                        Highlight highlight = new Highlight(solrField, highlightSnippets, displayName);
-                        highlights.add(highlight);
-                        highlightsMap.put(solrField, highlight);
-                        highlightsMap2.put(solrField, highlightSnippets);
-                        highlightsMap3.put(field, highlight);
+            
+            SolrSearchResult solrSearchResult = new SolrSearchResult(query, name);
+            
+            if (addHighlights) {
+                List<Highlight> highlights = new ArrayList<>();
+                Map<SolrField, Highlight> highlightsMap = new HashMap<>();
+                Map<SolrField, List<String>> highlightsMap2 = new HashMap<>();
+                Map<String, Highlight> highlightsMap3 = new HashMap<>();
+                if (queryResponse.getHighlighting().get(id) != null) {
+                    for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
+                        String field = entry.getKey();
+                        String displayName = entry.getValue();
+
+                        List<String> highlightSnippets = queryResponse.getHighlighting().get(id).get(field);
+                        if (highlightSnippets != null) {
+                            matchedFields.add(field);
+                            /**
+                             * @todo only SolrField.SolrType.STRING? that's not
+                             * right... knit the SolrField object more into the
+                             * highlighting stuff
+                             */
+                            SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true);
+                            Highlight highlight = new Highlight(solrField, highlightSnippets, displayName);
+                            highlights.add(highlight);
+                            highlightsMap.put(solrField, highlight);
+                            highlightsMap2.put(solrField, highlightSnippets);
+                            highlightsMap3.put(field, highlight);
+                        }
                     }
+
                 }
 
+                solrSearchResult.setHighlightsAsList(highlights);
+                solrSearchResult.setHighlightsMap(highlightsMap);
+                solrSearchResult.setHighlightsAsMap(highlightsMap3);
             }
-            SolrSearchResult solrSearchResult = new SolrSearchResult(query, name);
+            
+            
             /**
              * @todo put all this in the constructor?
              */
@@ -470,9 +528,7 @@ public SolrQueryResponse search(
             solrSearchResult.setNameSort(nameSort);
             solrSearchResult.setReleaseOrCreateDate(release_or_create_date);
             solrSearchResult.setMatchedFields(matchedFields);
-            solrSearchResult.setHighlightsAsList(highlights);
-            solrSearchResult.setHighlightsMap(highlightsMap);
-            solrSearchResult.setHighlightsAsMap(highlightsMap3);
+            
             Map<String, String> parent = new HashMap<>();
             String description = (String) solrDocument.getFieldValue(SearchFields.DESCRIPTION);
             solrSearchResult.setDescriptionNoSnippet(description);
@@ -863,7 +919,7 @@ public String getCapitalizedName(String name) {
      *
      * @return
      */
-    private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe) {
+    private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe, boolean addFacets) {
 
         User user = dataverseRequest.getUser();
         if (user == null) {
@@ -922,9 +978,11 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ
 
         AuthenticatedUser au = (AuthenticatedUser) user;
 
-        // Logged in user, has publication status facet
-        //
-        solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS);
+        if (addFacets) {
+            // Logged in user, has publication status facet
+            //
+            solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS);
+        }
 
         // ----------------------------------------------------
         // (3) Is this a Super User?

From 6307292d3858bd62144e313de1b5574b55b4fb36 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 17 Oct 2023 13:07:14 -0400
Subject: [PATCH 063/546] more fixes/cleanup #9635

---
 .../search/SearchIncludeFragment.java         |  27 ++--
 .../dataverse/search/SearchServiceBean.java   | 127 ++++++++++--------
 2 files changed, 90 insertions(+), 64 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 1e42958fe4e..958ac0151c6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -308,15 +308,23 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
             this.setRootDv(true);
         }
 
+        filterQueriesFinal.addAll(filterQueries);
+
+        /**
+         * Add type queries, for the types (Dataverses, Datasets, Datafiles) 
+         * currently selected:
+         */
         selectedTypesList = new ArrayList<>();
         String[] parts = selectedTypesString.split(":");
         selectedTypesList.addAll(Arrays.asList(parts));
-
-        
         
-        filterQueriesFinal.addAll(filterQueries);
-
+        logger.info("selected types list size: "+selectedTypesList.size());
         
+        String[] arr = selectedTypesList.toArray(new String[selectedTypesList.size()]);
+        selectedTypesHumanReadable = combine(arr, " OR ");
+        if (!selectedTypesHumanReadable.isEmpty()) {
+            typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")";
+        }        
         filterQueriesFinal.add(typeFilterQuery);
 
         if (page <= 1) {
@@ -383,7 +391,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 // run another query to obtain the numbers of the unselected types:
                 
                 List<String> filterQueriesFinalSecondPass = new ArrayList<>();
-                filterQueriesFinalSecondPass.addAll(filterQueriesFinal);
+                filterQueriesFinalSecondPass.addAll(filterQueries);
                 
                 List<String> selectedTypesListSecondPass = new ArrayList<>();
                 
@@ -393,12 +401,13 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                     }
                 }
                 
-                String[] arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]);
+                arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]);
                 filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")");
-                
+
+                solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null, false, false);
+
                 if (solrQueryResponseSecondPass != null) {
 
-                    solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null);
                     if (solrQueryResponseSecondPass.hasError()) {
                         logger.info(solrQueryResponse.getError());
                         setSolrErrorEncountered(true);
@@ -410,6 +419,8 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                             previewCountbyType.put(facetLabel.getName(), facetLabel.getCount());
                         }
                     }
+                } else {
+                    logger.warning("null solr response from the second pass type query");
                 }
             }
             
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
index aa2948eb8cb..d3ff7e42d15 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
@@ -195,64 +195,11 @@ public SolrQueryResponse search(
 //        }
 //        solrQuery.setSort(sortClause);
 
-        List<DatasetFieldType> datasetFields = datasetFieldService.findAllOrderedById();
-        Map<String, String> solrFieldsToHightlightOnMap = new HashMap<>();
-        if (addHighlights) {
-            solrQuery.setHighlight(true).setHighlightSnippets(1);
-            Integer fragSize = systemConfig.getSearchHighlightFragmentSize();
-            if (fragSize != null) {
-                solrQuery.setHighlightFragsize(fragSize);
-            }
-            solrQuery.setHighlightSimplePre("<span class=\"search-term-match\">");
-            solrQuery.setHighlightSimplePost("</span>");
-
-            // TODO: Do not hard code "Name" etc as English here.
-            solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name");
-            solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation");
-            solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type");
-            solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description");
-            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name");
-            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label");
-            solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion"));
-            solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions"));
-            solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion"));
-            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse"));
-            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes"));
-
-            solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type");
-            solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year");
-            solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId"));
-            solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId"));
-            /**
-             * @todo Dataverse subject and affiliation should be highlighted but
-             * this is commented out right now because the "friendly" names are
-             * not being shown on the dataverse cards. See also
-             * https://github.com/IQSS/dataverse/issues/1431
-             */
-//        solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject");
-//        solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation");
-            /**
-             * @todo: show highlight on file card?
-             * https://redmine.hmdc.harvard.edu/issues/3848
-             */
-            solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension");
-            solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag");
-
-            for (DatasetFieldType datasetFieldType : datasetFields) {
-                String solrField = datasetFieldType.getSolrField().getNameSearchable();
-                String displayName = datasetFieldType.getDisplayName();
-                solrFieldsToHightlightOnMap.put(solrField, displayName);
-            }
-            for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
-                String solrField = entry.getKey();
-                // String displayName = entry.getValue();
-                solrQuery.addHighlightField(solrField);
-            }
-        }
         
         solrQuery.setParam("fl", "*,score");
         solrQuery.setParam("qt", "/select");
         solrQuery.setParam("facet", "true");
+        
         /**
          * @todo: do we need facet.query?
          */
@@ -315,7 +262,61 @@ public SolrQueryResponse search(
             }
         }
 
-        
+        List<DatasetFieldType> datasetFields = datasetFieldService.findAllOrderedById();
+        Map<String, String> solrFieldsToHightlightOnMap = new HashMap<>();
+        if (addHighlights) {
+            solrQuery.setHighlight(true).setHighlightSnippets(1);
+            Integer fragSize = systemConfig.getSearchHighlightFragmentSize();
+            if (fragSize != null) {
+                solrQuery.setHighlightFragsize(fragSize);
+            }
+            solrQuery.setHighlightSimplePre("<span class=\"search-term-match\">");
+            solrQuery.setHighlightSimplePost("</span>");
+
+            // TODO: Do not hard code "Name" etc as English here.
+            solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name");
+            solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation");
+            solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type");
+            solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description");
+            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name");
+            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label");
+            solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion"));
+            solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions"));
+            solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion"));
+            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse"));
+            solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes"));
+
+            solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type");
+            solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year");
+            solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId"));
+            solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId"));
+            /**
+             * @todo Dataverse subject and affiliation should be highlighted but
+             * this is commented out right now because the "friendly" names are
+             * not being shown on the dataverse cards. See also
+             * https://github.com/IQSS/dataverse/issues/1431
+             */
+//        solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject");
+//        solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation");
+            /**
+             * @todo: show highlight on file card?
+             * https://redmine.hmdc.harvard.edu/issues/3848
+             */
+            solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension");
+            solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag");
+
+            for (DatasetFieldType datasetFieldType : datasetFields) {
+                String solrField = datasetFieldType.getSolrField().getNameSearchable();
+                String displayName = datasetFieldType.getDisplayName();
+                solrFieldsToHightlightOnMap.put(solrField, displayName);
+            }
+            for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
+                String solrField = entry.getKey();
+                // String displayName = entry.getValue();
+                solrQuery.addHighlightField(solrField);
+            }
+        }
+
         //I'm not sure if just adding null here is good for hte permissions system... i think it needs something
         if(dataverses != null) {
             for(Dataverse dataverse : dataverses) {
@@ -370,7 +371,7 @@ public SolrQueryResponse search(
 //        solrQuery.addNumericRangeFacet(SearchFields.PRODUCTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan);
 //        solrQuery.addNumericRangeFacet(SearchFields.DISTRIBUTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan);
         solrQuery.setRows(numResultsPerPage);
-        logger.fine("Solr query:" + solrQuery);
+        logger.info("Solr query:" + solrQuery);
 
         // -----------------------------------
         // Make the solr query
@@ -378,8 +379,12 @@ public SolrQueryResponse search(
         QueryResponse queryResponse = null;
         try {
             queryResponse = solrClientService.getSolrClient().query(solrQuery);
+
         } catch (RemoteSolrException ex) {
             String messageFromSolr = ex.getLocalizedMessage();
+            
+            logger.info("message from solr exception: "+messageFromSolr);
+            
             String error = "Search Syntax Error: ";
             String stringToHide = "org.apache.solr.search.SyntaxError: ";
             if (messageFromSolr.startsWith(stringToHide)) {
@@ -393,6 +398,12 @@ public SolrQueryResponse search(
             exceptionSolrQueryResponse.setError(error);
 
             // we can't show anything because of the search syntax error
+            
+            // We probably shouldn't be assuming that this is necessarily a 
+            // "search syntax error" - could be anything else too - ? 
+            
+            
+            
             long zeroNumResultsFound = 0;
             long zeroGetResultsStart = 0;
             List<SolrSearchResult> emptySolrSearchResults = new ArrayList<>();
@@ -408,6 +419,10 @@ public SolrQueryResponse search(
         } catch (SolrServerException | IOException ex) {
             throw new SearchException("Internal Dataverse Search Engine Error", ex);
         }
+        
+        int statusCode = queryResponse.getStatus();
+        
+        logger.info("status code of the query response: "+statusCode);
 
         SolrDocumentList docs = queryResponse.getResults();
         List<SolrSearchResult> solrSearchResults = new ArrayList<>();

From 74eb7c551d209c9e460cbaea5572004b0fcad0bc Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 17 Oct 2023 16:09:32 -0400
Subject: [PATCH 064/546] more fixes (#9635)

---
 .../search/SearchIncludeFragment.java         | 24 +++++++++++++++----
 .../dataverse/search/SearchServiceBean.java   |  2 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 958ac0151c6..177186fce49 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -395,9 +395,23 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 
                 List<String> selectedTypesListSecondPass = new ArrayList<>();
                 
-                for (String dvObjectType : previewCountbyType.keySet()) {
-                    if (previewCountbyType.get(dvObjectType) == -1) {
-                        selectedTypesListSecondPass.add(dvObjectType);
+                // @todo: simplify this!
+                for (String dvObjectTypeLabel : previewCountbyType.keySet()) {
+                    if (previewCountbyType.get(dvObjectTypeLabel) == -1) {
+                        String dvObjectType = null;
+                        
+                        if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("dataverses"))) {
+                            dvObjectType = "dataverses";
+                        } else if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("datasets"))) {
+                            dvObjectType = "datasets";
+                        } else if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("files"))) {
+                            dvObjectType = "files";
+                        }
+                    
+                        if (dvObjectType != null) {
+                            logger.info("adding object type to the second pass query: "+dvObjectType);
+                            selectedTypesListSecondPass.add(dvObjectType);
+                        }
                     }
                 }
                 
@@ -409,13 +423,15 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 if (solrQueryResponseSecondPass != null) {
 
                     if (solrQueryResponseSecondPass.hasError()) {
-                        logger.info(solrQueryResponse.getError());
+                        logger.info(solrQueryResponseSecondPass.getError());
                         setSolrErrorEncountered(true);
                     }
 
                     // And now populate the remaining type facets:
                     for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) {
+                        logger.info("facet category: "+facetCategory.getName());
                         for (FacetLabel facetLabel : facetCategory.getFacetLabel()) {
+                            logger.info("facet label: "+facetLabel.getName());
                             previewCountbyType.put(facetLabel.getName(), facetLabel.getCount());
                         }
                     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
index d3ff7e42d15..18cdbaa6994 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
@@ -423,6 +423,7 @@ public SolrQueryResponse search(
         int statusCode = queryResponse.getStatus();
         
         logger.info("status code of the query response: "+statusCode);
+        ///logger.info("number of hits: "+queryResponse._size());
 
         SolrDocumentList docs = queryResponse.getResults();
         List<SolrSearchResult> solrSearchResults = new ArrayList<>();
@@ -823,6 +824,7 @@ public SolrQueryResponse search(
             facetCategory.setFacetLabel(facetLabelList);
             if (!facetLabelList.isEmpty()) {
                 if (facetCategory.getName().equals(SearchFields.TYPE)) {
+                    logger.info("type facet encountered");
                     // the "type" facet is special, these are not
                     typeFacetCategories.add(facetCategory);
                 } else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) {

From f1e37ae0ff01e1fe0030202be1883f823bb8d080 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 17 Oct 2023 17:26:54 -0400
Subject: [PATCH 065/546] finally working as it should; much simplified/way
 less expensive second pass query sent in order to populate the unchecked type
 count facets. (#9635)

---
 .../iq/dataverse/search/SearchIncludeFragment.java  |  4 +---
 .../iq/dataverse/search/SearchServiceBean.java      | 13 +++++++++----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 177186fce49..47a5621c3d6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -418,7 +418,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]);
                 filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")");
 
-                solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null, false, false);
+                solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false);
 
                 if (solrQueryResponseSecondPass != null) {
 
@@ -429,9 +429,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
 
                     // And now populate the remaining type facets:
                     for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) {
-                        logger.info("facet category: "+facetCategory.getName());
                         for (FacetLabel facetLabel : facetCategory.getFacetLabel()) {
-                            logger.info("facet label: "+facetLabel.getName());
                             previewCountbyType.put(facetLabel.getName(), facetLabel.getCount());
                         }
                     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
index 18cdbaa6994..be3330080c4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
@@ -165,7 +165,8 @@ public SolrQueryResponse search(
             List<Dataverse> dataverses,
             String query,
             List<String> filterQueries,
-            String sortField, String sortOrder,
+            String sortField, 
+            String sortOrder,
             int paginationStart,
             boolean onlyDatatRelatedToMe,
             int numResultsPerPage,
@@ -189,7 +190,11 @@ public SolrQueryResponse search(
 //        SortClause foo = new SortClause("name", SolrQuery.ORDER.desc);
 //        if (query.equals("*") || query.equals("*:*")) {
 //            solrQuery.setSort(new SortClause(SearchFields.NAME_SORT, SolrQuery.ORDER.asc));
-        solrQuery.setSort(new SortClause(sortField, sortOrder));
+        if (sortField != null) {
+            // is it ok not to specify any sort? - there are cases where we 
+            // don't care, and it must cost some extra cycles -- L.A.
+            solrQuery.setSort(new SortClause(sortField, sortOrder));
+        }
 //        } else {
 //            solrQuery.setSort(sortClause);
 //        }
@@ -423,7 +428,8 @@ public SolrQueryResponse search(
         int statusCode = queryResponse.getStatus();
         
         logger.info("status code of the query response: "+statusCode);
-        ///logger.info("number of hits: "+queryResponse._size());
+        logger.info("_size from query response: "+queryResponse._size());
+        logger.info("qtime: "+queryResponse.getQTime());
 
         SolrDocumentList docs = queryResponse.getResults();
         List<SolrSearchResult> solrSearchResults = new ArrayList<>();
@@ -824,7 +830,6 @@ public SolrQueryResponse search(
             facetCategory.setFacetLabel(facetLabelList);
             if (!facetLabelList.isEmpty()) {
                 if (facetCategory.getName().equals(SearchFields.TYPE)) {
-                    logger.info("type facet encountered");
                     // the "type" facet is special, these are not
                     typeFacetCategories.add(facetCategory);
                 } else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) {

From c1a19299e547fbc47322dafde74bc75d2e138d9c Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 18 Oct 2023 13:48:47 -0400
Subject: [PATCH 066/546] a stub for interecepting a "circuit breaker" 503 from
 the server (#9635)

---
 .../dataverse/search/SearchServiceBean.java   | 31 ++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
index be3330080c4..1b92c2a4a46 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
@@ -382,13 +382,35 @@ public SolrQueryResponse search(
         // Make the solr query
         // -----------------------------------
         QueryResponse queryResponse = null;
+        boolean solrTemporarilyUnavailable = false; 
+        
         try {
             queryResponse = solrClientService.getSolrClient().query(solrQuery);
 
         } catch (RemoteSolrException ex) {
             String messageFromSolr = ex.getLocalizedMessage();
             
-            logger.info("message from solr exception: "+messageFromSolr);
+            logger.info("message from tye solr exception: "+messageFromSolr);
+            logger.info("code from the solr exception: "+ex.code());
+            
+            if (queryResponse != null) {
+                logger.info("return code: "+queryResponse.getStatus());
+            }
+            
+            // We probably shouldn't be assuming that this is necessarily a 
+            // "search syntax error", as the code below implies - could be 
+            // something else too - ? 
+            
+            // Specifically, we now rely on the Solr "circuit breaker" mechanism
+            // to start dropping requests with 503, when the service is 
+            // overwhelmed with requests load (with the assumption that this is
+            // a transient condition): 
+            
+            if (ex.code() == 503) {
+                solrTemporarilyUnavailable = true;
+                // actual logic for communicating this state back to the local 
+                // client code TBD (@todo)
+            }
             
             String error = "Search Syntax Error: ";
             String stringToHide = "org.apache.solr.search.SyntaxError: ";
@@ -403,12 +425,7 @@ public SolrQueryResponse search(
             exceptionSolrQueryResponse.setError(error);
 
             // we can't show anything because of the search syntax error
-            
-            // We probably shouldn't be assuming that this is necessarily a 
-            // "search syntax error" - could be anything else too - ? 
-            
-            
-            
+                        
             long zeroNumResultsFound = 0;
             long zeroGetResultsStart = 0;
             List<SolrSearchResult> emptySolrSearchResults = new ArrayList<>();

From ecbb020ed7da390c378fb76f08c9c5fb72677189 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 19 Oct 2023 09:18:12 -0400
Subject: [PATCH 067/546] add/standardize retrieveSizeFromMedia call

---
 .../iq/dataverse/dataaccess/FileAccessIO.java |  33 +-
 .../dataverse/dataaccess/InputStreamIO.java   |   5 +
 .../dataaccess/RemoteOverlayAccessIO.java     |  14 +-
 .../iq/dataverse/dataaccess/S3AccessIO.java   |  21 +-
 .../iq/dataverse/dataaccess/StorageIO.java    | 379 +++++++++---------
 .../dataverse/dataaccess/SwiftAccessIO.java   |   5 +
 6 files changed, 241 insertions(+), 216 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
index d95df1567bd..3e6c802c526 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
@@ -35,8 +35,6 @@
 import java.util.List;
 import java.util.function.Predicate;
 import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
 // Dataverse imports:
@@ -115,7 +113,7 @@ public void open (DataAccessOption... options) throws IOException {
 
                 this.setInputStream(fin);
                 setChannel(fin.getChannel());
-                this.setSize(getLocalFileSize());
+                this.setSize(retrieveSizeFromMedia());
 
                 if (dataFile.getContentType() != null
                         && dataFile.getContentType().equals("text/tab-separated-values")
@@ -506,21 +504,6 @@ public void delete() throws IOException {
     
     // Auxilary helper methods, filesystem access-specific:
     
-    private long getLocalFileSize () {
-        long fileSize = -1;
-
-        try {
-            File testFile = getFileSystemPath().toFile();
-            if (testFile != null) {
-                fileSize = testFile.length();
-            }
-            return fileSize;
-        } catch (IOException ex) {
-            return -1;
-        }
-
-    }
-
     public FileInputStream openLocalFileAsInputStream () {
         FileInputStream in;
 
@@ -742,4 +725,18 @@ public List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOE
         return toDelete;
     }
 
+    @Override
+    public long retrieveSizeFromMedia() {
+        long fileSize = -1;
+        try {
+            File testFile = getFileSystemPath().toFile();
+            if (testFile != null) {
+                fileSize = testFile.length();
+            }
+            return fileSize;
+        } catch (IOException ex) {
+            return -1;
+        }
+    }
+
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java
index be6f9df0254..de392b74cca 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java
@@ -165,4 +165,9 @@ public List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOE
         throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver.");
     }
 
+    @Override
+    public long retrieveSizeFromMedia() throws UnsupportedDataAccessOperationException {
+        throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver.");
+    }
+
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index a9653f2ab68..9c1f5ba23aa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -64,8 +64,6 @@
 public class RemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
-
-    String globusAccessToken = null;
     
     protected StorageIO<DvObject> baseStore = null;
     protected String path = null;
@@ -155,7 +153,7 @@ public void open(DataAccessOption... options) throws IOException {
                     this.setSize(dataFile.getFilesize());
                 } else {
                     logger.fine("Setting size");
-                    this.setSize(retrieveSize());
+                    this.setSize(retrieveSizeFromMedia());
                 }
                 if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
                         && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
@@ -183,7 +181,8 @@ public void open(DataAccessOption... options) throws IOException {
         }
     }
 
-    long retrieveSize() {
+    @Override
+    public long retrieveSizeFromMedia() {
         long size = -1;
         HttpHead head = new HttpHead(baseUrl + "/" + path);
         try {
@@ -383,7 +382,7 @@ public Path getFileSystemPath() throws UnsupportedDataAccessOperationException {
     @Override
     public boolean exists() {
         logger.fine("Exists called");
-        return (retrieveSize() != -1);
+        return (retrieveSizeFromMedia() != -1);
     }
 
     @Override
@@ -502,8 +501,9 @@ protected void configureStores(DataAccessRequest req, String driverId, String st
                     if (index > 0) {
                         storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length());
                     }
-                    // THe base store needs the baseStoreIdentifier and not the relative URL
-                    fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//"));
+                    // The base store needs the baseStoreIdentifier and not the relative URL (if it exists)
+                    int endOfId = storageLocation.indexOf("//");
+                    fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation;
 
                     switch (baseDriverType) {
                     case DataAccess.S3:
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index 822ada0b83e..b0f9f0ffb05 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -207,14 +207,7 @@ public void open(DataAccessOption... options) throws IOException {
 
             
             if (isReadAccess) {
-                key = getMainFileKey();
-                ObjectMetadata objectMetadata = null; 
-                try {
-                    objectMetadata = s3.getObjectMetadata(bucketName, key);
-                } catch (SdkClientException sce) {
-                    throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")");
-                }
-                this.setSize(objectMetadata.getContentLength());
+                this.setSize(retrieveSizeFromMedia());
 
                 if (dataFile.getContentType() != null
                         && dataFile.getContentType().equals("text/tab-separated-values")
@@ -1385,4 +1378,16 @@ public List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOE
         }
         return toDelete;
     }
+
+    @Override
+    public long retrieveSizeFromMedia() throws IOException {
+        key = getMainFileKey();
+        ObjectMetadata objectMetadata = null;
+        try {
+            objectMetadata = s3.getObjectMetadata(bucketName, key);
+        } catch (SdkClientException sce) {
+            throw new IOException("Cannot get S3 object " + key + " (" + sce.getMessage() + ")");
+        }
+        return objectMetadata.getContentLength();
+    }
 }
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
index 3bc83538679..f3c2ef5f513 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -20,7 +20,6 @@
 
 package edu.harvard.iq.dataverse.dataaccess;
 
-
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.Dataverse;
@@ -43,7 +42,6 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-
 /**
  *
  * @author Leonid Andreev
@@ -55,15 +53,15 @@ public abstract class StorageIO<T extends DvObject> {
     public StorageIO() {
 
     }
-    
+
     public StorageIO(String storageLocation, String driverId) {
-      this.driverId=driverId;
+        this.driverId = driverId;
     }
 
     public StorageIO(T dvObject, DataAccessRequest req, String driverId) {
         this.dvObject = dvObject;
         this.req = req;
-        this.driverId=driverId;
+        this.driverId = driverId;
         if (this.req == null) {
             this.req = new DataAccessRequest();
         }
@@ -72,18 +70,19 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) {
         }
     }
 
-    
-    
     // Abstract methods to be implemented by the storage drivers:
 
     public abstract void open(DataAccessOption... option) throws IOException;
 
     protected boolean isReadAccess = false;
     protected boolean isWriteAccess = false;
-    //A  public store is one in which files may be accessible outside Dataverse and therefore accessible without regard to Dataverse's access controls related to restriction and embargoes.
-    //Currently, this is just used to warn users at upload time rather than disable restriction/embargo. 
+    // A public store is one in which files may be accessible outside Dataverse and
+    // therefore accessible without regard to Dataverse's access controls related to
+    // restriction and embargoes.
+    // Currently, this is just used to warn users at upload time rather than disable
+    // restriction/embargo.
     static protected Map<String, Boolean> driverPublicAccessMap = new HashMap<String, Boolean>();
-    
+
     public boolean canRead() {
         return isReadAccess;
     }
@@ -94,115 +93,118 @@ public boolean canWrite() {
 
     public abstract String getStorageLocation() throws IOException;
 
-    // This method will return a Path, if the storage method is a 
-    // local filesystem. Otherwise should throw an IOException. 
+    // This method will return a Path, if the storage method is a
+    // local filesystem. Otherwise should throw an IOException.
     public abstract Path getFileSystemPath() throws IOException;
-        
-    public abstract boolean exists() throws IOException; 
-        
+
+    public abstract boolean exists() throws IOException;
+
     public abstract void delete() throws IOException;
-    
+
     // this method for copies a local Path (for ex., a
     // temp file, into this DataAccess location):
     public abstract void savePath(Path fileSystemPath) throws IOException;
-    
+
     // same, for an InputStream:
     /**
-     * This method copies a local InputStream into this DataAccess location.
-     * Note that the S3 driver implementation of this abstract method is problematic, 
-     * because S3 cannot save an object of an unknown length. This effectively 
-     * nullifies any benefits of streaming; as we cannot start saving until we 
-     * have read the entire stream. 
-     * One way of solving this would be to buffer the entire stream as byte[], 
-     * in memory, then save it... Which of course would be limited by the amount 
-     * of memory available, and thus would not work for streams larger than that. 
-     * So we have eventually decided to save save the stream to a temp file, then 
-     * save to S3. This is slower, but guaranteed to work on any size stream. 
-     * An alternative we may want to consider is to not implement this method 
-     * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, 
-     * similarly to how we handle attempts to open OutputStreams, in this and the 
-     * Swift driver. 
-     * (Not an issue in either FileAccessIO or SwiftAccessIO implementations)
+     * This method copies a local InputStream into this DataAccess location. Note
+     * that the S3 driver implementation of this abstract method is problematic,
+     * because S3 cannot save an object of an unknown length. This effectively
+     * nullifies any benefits of streaming; as we cannot start saving until we have
+     * read the entire stream. One way of solving this would be to buffer the entire
+     * stream as byte[], in memory, then save it... Which of course would be limited
+     * by the amount of memory available, and thus would not work for streams larger
+     * than that. So we have eventually decided to save save the stream to a temp
+     * file, then save to S3. This is slower, but guaranteed to work on any size
+     * stream. An alternative we may want to consider is to not implement this
+     * method in the S3 driver, and make it throw the
+     * UnsupportedDataAccessOperationException, similarly to how we handle attempts
+     * to open OutputStreams, in this and the Swift driver. (Not an issue in either
+     * FileAccessIO or SwiftAccessIO implementations)
      * 
      * @param inputStream InputStream we want to save
-     * @param auxItemTag String representing this Auxiliary type ("extension")
+     * @param auxItemTag  String representing this Auxiliary type ("extension")
      * @throws IOException if anything goes wrong.
-    */
+     */
     public abstract void saveInputStream(InputStream inputStream) throws IOException;
+
     public abstract void saveInputStream(InputStream inputStream, Long filesize) throws IOException;
-    
+
     // Auxiliary File Management: (new as of 4.0.2!)
-    
+
     // An "auxiliary object" is an abstraction of the traditional DVN/Dataverse
-    // mechanism of storing extra files related to the man StudyFile/DataFile - 
-    // such as "saved original" and cached format conversions for tabular files, 
-    // thumbnails for images, etc. - in physical files with the same file 
-    // name but various reserved extensions. 
-   
-    //This function retrieves auxiliary files related to datasets, and returns them as inputstream
-    public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException ;
-    
+    // mechanism of storing extra files related to the man StudyFile/DataFile -
+    // such as "saved original" and cached format conversions for tabular files,
+    // thumbnails for images, etc. - in physical files with the same file
+    // name but various reserved extensions.
+
+    // This function retrieves auxiliary files related to datasets, and returns them
+    // as inputstream
+    public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException;
+
     public abstract Channel openAuxChannel(String auxItemTag, DataAccessOption... option) throws IOException;
-    
-    public abstract long getAuxObjectSize(String auxItemTag) throws IOException; 
-    
-    public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException; 
-    
-    public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException; 
-    
-    public abstract void backupAsAux(String auxItemTag) throws IOException; 
-    
-    public abstract void revertBackupAsAux(String auxItemTag) throws IOException; 
-    
-    // this method copies a local filesystem Path into this DataAccess Auxiliary location:
+
+    public abstract long getAuxObjectSize(String auxItemTag) throws IOException;
+
+    public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException;
+
+    public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException;
+
+    public abstract void backupAsAux(String auxItemTag) throws IOException;
+
+    public abstract void revertBackupAsAux(String auxItemTag) throws IOException;
+
+    // this method copies a local filesystem Path into this DataAccess Auxiliary
+    // location:
     public abstract void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException;
-    
+
     /**
-     * This method copies a local InputStream into this DataAccess Auxiliary location.
-     * Note that the S3 driver implementation of this abstract method is problematic, 
-     * because S3 cannot save an object of an unknown length. This effectively 
-     * nullifies any benefits of streaming; as we cannot start saving until we 
-     * have read the entire stream. 
-     * One way of solving this would be to buffer the entire stream as byte[], 
-     * in memory, then save it... Which of course would be limited by the amount 
-     * of memory available, and thus would not work for streams larger than that. 
-     * So we have eventually decided to save save the stream to a temp file, then 
-     * save to S3. This is slower, but guaranteed to work on any size stream. 
-     * An alternative we may want to consider is to not implement this method 
-     * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, 
-     * similarly to how we handle attempts to open OutputStreams, in this and the 
-     * Swift driver. 
-     * (Not an issue in either FileAccessIO or SwiftAccessIO implementations)
+     * This method copies a local InputStream into this DataAccess Auxiliary
+     * location. Note that the S3 driver implementation of this abstract method is
+     * problematic, because S3 cannot save an object of an unknown length. This
+     * effectively nullifies any benefits of streaming; as we cannot start saving
+     * until we have read the entire stream. One way of solving this would be to
+     * buffer the entire stream as byte[], in memory, then save it... Which of
+     * course would be limited by the amount of memory available, and thus would not
+     * work for streams larger than that. So we have eventually decided to save save
+     * the stream to a temp file, then save to S3. This is slower, but guaranteed to
+     * work on any size stream. An alternative we may want to consider is to not
+     * implement this method in the S3 driver, and make it throw the
+     * UnsupportedDataAccessOperationException, similarly to how we handle attempts
+     * to open OutputStreams, in this and the Swift driver. (Not an issue in either
+     * FileAccessIO or SwiftAccessIO implementations)
      * 
      * @param inputStream InputStream we want to save
-     * @param auxItemTag String representing this Auxiliary type ("extension")
+     * @param auxItemTag  String representing this Auxiliary type ("extension")
      * @throws IOException if anything goes wrong.
-    */
-    public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; 
-    public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException;
-    
-    public abstract List<String>listAuxObjects() throws IOException;
-    
-    public abstract void deleteAuxObject(String auxItemTag) throws IOException; 
-    
+     */
+    public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException;
+
+    public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize)
+            throws IOException;
+
+    public abstract List<String> listAuxObjects() throws IOException;
+
+    public abstract void deleteAuxObject(String auxItemTag) throws IOException;
+
     public abstract void deleteAllAuxObjects() throws IOException;
 
     private DataAccessRequest req;
     private InputStream in = null;
-    private OutputStream out; 
+    private OutputStream out;
     protected Channel channel;
     protected DvObject dvObject;
     protected String driverId;
 
-    /*private int status;*/
+    /* private int status; */
     private long size;
 
     /**
-     * Where in the file to seek to when reading (default is zero bytes, the
-     * start of the file).
+     * Where in the file to seek to when reading (default is zero bytes, the start
+     * of the file).
      */
     private long offset;
-    
+
     private String mimeType;
     private String fileName;
     private String varHeader;
@@ -215,8 +217,8 @@ public boolean canWrite() {
     private String swiftContainerName;
 
     private boolean isLocalFile = false;
-    /*private boolean isRemoteAccess = false;*/
-    /*private boolean isHttpAccess = false;*/
+    /* private boolean isRemoteAccess = false; */
+    /* private boolean isHttpAccess = false; */
     private boolean noVarHeader = false;
 
     // For remote downloads:
@@ -229,13 +231,14 @@ public boolean canWrite() {
     private String remoteUrl;
     protected String remoteStoreName = null;
     protected URL remoteStoreUrl = null;
-    
+
     // For HTTP-based downloads:
-    /*private GetMethod method = null;
-    private Header[] responseHeaders;*/
+    /*
+     * private GetMethod method = null; private Header[] responseHeaders;
+     */
 
     // getters:
-    
+
     public Channel getChannel() throws IOException {
         return channel;
     }
@@ -255,16 +258,15 @@ public ReadableByteChannel getReadChannel() throws IOException {
 
         return (ReadableByteChannel) channel;
     }
-    
-    public DvObject getDvObject()
-    {
+
+    public DvObject getDvObject() {
         return dvObject;
     }
-    
+
     public DataFile getDataFile() {
         return (DataFile) dvObject;
     }
-    
+
     public Dataset getDataset() {
         return (Dataset) dvObject;
     }
@@ -277,9 +279,9 @@ public DataAccessRequest getRequest() {
         return req;
     }
 
-    /*public int getStatus() {
-        return status;
-    }*/
+    /*
+     * public int getStatus() { return status; }
+     */
 
     public long getSize() {
         return size;
@@ -292,9 +294,9 @@ public long getOffset() {
     public InputStream getInputStream() throws IOException {
         return in;
     }
-    
+
     public OutputStream getOutputStream() throws IOException {
-        return out; 
+        return out;
     }
 
     public String getMimeType() {
@@ -317,23 +319,23 @@ public String getRemoteUrl() {
         return remoteUrl;
     }
 
-    public String getTemporarySwiftUrl(){
+    public String getTemporarySwiftUrl() {
         return temporarySwiftUrl;
     }
-    
+
     public String getTempUrlExpiry() {
         return tempUrlExpiry;
     }
-    
+
     public String getTempUrlSignature() {
         return tempUrlSignature;
     }
-    
+
     public String getSwiftFileName() {
         return swiftFileName;
     }
 
-    public String getSwiftContainerName(){
+    public String getSwiftContainerName() {
         return swiftContainerName;
     }
 
@@ -344,34 +346,32 @@ public String getRemoteStoreName() {
     public URL getRemoteStoreUrl() {
         return remoteStoreUrl;
     }
-    
-    /*public GetMethod getHTTPMethod() {
-        return method;
-    }
 
-    public Header[] getResponseHeaders() {
-        return responseHeaders;
-    }*/
+    /*
+     * public GetMethod getHTTPMethod() { return method; }
+     * 
+     * public Header[] getResponseHeaders() { return responseHeaders; }
+     */
 
     public boolean isLocalFile() {
         return isLocalFile;
     }
-    
-    // "Direct Access" StorageIO is used to access a physical storage 
-    // location not associated with any dvObject. (For example, when we 
-    // are deleting a physical file left behind by a DataFile that's 
-    // already been deleted from the database). 
+
+    // "Direct Access" StorageIO is used to access a physical storage
+    // location not associated with any dvObject. (For example, when we
+    // are deleting a physical file left behind by a DataFile that's
+    // already been deleted from the database).
     public boolean isDirectAccess() {
-        return dvObject == null; 
+        return dvObject == null;
     }
 
-    /*public boolean isRemoteAccess() {
-        return isRemoteAccess;
-    }*/
+    /*
+     * public boolean isRemoteAccess() { return isRemoteAccess; }
+     */
 
-    /*public boolean isHttpAccess() {
-        return isHttpAccess;
-    }*/
+    /*
+     * public boolean isHttpAccess() { return isHttpAccess; }
+     */
 
     public boolean isDownloadSupported() {
         return isDownloadSupported;
@@ -398,9 +398,9 @@ public void setRequest(DataAccessRequest dar) {
         req = dar;
     }
 
-    /*public void setStatus(int s) {
-        status = s;
-    }*/
+    /*
+     * public void setStatus(int s) { status = s; }
+     */
 
     public void setSize(long s) {
         size = s;
@@ -421,11 +421,11 @@ public void setOffset(long offset) throws IOException {
     public void setInputStream(InputStream is) {
         in = is;
     }
-    
+
     public void setOutputStream(OutputStream os) {
-        out = os; 
-    } 
-    
+        out = os;
+    }
+
     public void setChannel(Channel c) {
         channel = c;
     }
@@ -450,45 +450,46 @@ public void setRemoteUrl(String u) {
         remoteUrl = u;
     }
 
-    public void setTemporarySwiftUrl(String u){
+    public void setTemporarySwiftUrl(String u) {
         temporarySwiftUrl = u;
     }
-    
-    public void setTempUrlExpiry(Long u){
+
+    public void setTempUrlExpiry(Long u) {
         tempUrlExpiry = String.valueOf(u);
     }
-    
+
     public void setSwiftFileName(String u) {
         swiftFileName = u;
     }
-    
-    public void setTempUrlSignature(String u){
+
+    public void setTempUrlSignature(String u) {
         tempUrlSignature = u;
     }
 
-    public void setSwiftContainerName(String u){
+    public void setSwiftContainerName(String u) {
         swiftContainerName = u;
     }
 
-    /*public void setHTTPMethod(GetMethod hm) {
-        method = hm;
-    }*/
+    /*
+     * public void setHTTPMethod(GetMethod hm) { method = hm; }
+     */
 
-    /*public void setResponseHeaders(Header[] headers) {
-        responseHeaders = headers;
-    }*/
+    /*
+     * public void setResponseHeaders(Header[] headers) { responseHeaders = headers;
+     * }
+     */
 
     public void setIsLocalFile(boolean f) {
         isLocalFile = f;
     }
 
-    /*public void setIsRemoteAccess(boolean r) {
-        isRemoteAccess = r;
-    }*/
+    /*
+     * public void setIsRemoteAccess(boolean r) { isRemoteAccess = r; }
+     */
 
-    /*public void setIsHttpAccess(boolean h) {
-        isHttpAccess = h;
-    }*/
+    /*
+     * public void setIsHttpAccess(boolean h) { isHttpAccess = h; }
+     */
 
     public void setIsDownloadSupported(boolean d) {
         isDownloadSupported = d;
@@ -506,12 +507,11 @@ public void setNoVarHeader(boolean nvh) {
         noVarHeader = nvh;
     }
 
-        // connection management methods:
-    /*public void releaseConnection() {
-        if (method != null) {
-            method.releaseConnection();
-        }
-    }*/
+    // connection management methods:
+    /*
+     * public void releaseConnection() { if (method != null) {
+     * method.releaseConnection(); } }
+     */
 
     public void closeInputStream() {
         if (in != null) {
@@ -528,7 +528,7 @@ public void closeInputStream() {
             }
         }
     }
-    
+
     public String generateVariableHeader(List<DataVariable> dvs) {
         String varHeader = null;
 
@@ -571,14 +571,14 @@ protected boolean isWriteAccessRequested(DataAccessOption... options) throws IOE
         return false;
     }
 
-	public boolean isBelowIngestSizeLimit() {
-		long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1"));
-		if(limit>0 && getSize()>limit) {
-			return false;
-		} else {
-		    return true;
-		}
-	}
+    public boolean isBelowIngestSizeLimit() {
+        long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1"));
+        if (limit > 0 && getSize() > limit) {
+            return false;
+        } else {
+            return true;
+        }
+    }
 
     public boolean downloadRedirectEnabled() {
         return false;
@@ -587,36 +587,38 @@ public boolean downloadRedirectEnabled() {
     public boolean downloadRedirectEnabled(String auxObjectTag) {
         return false;
     }
-    
-    public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException {
+
+    public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName)
+            throws IOException {
         throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type");
     }
-    
 
     public static boolean isPublicStore(String driverId) {
-        //Read once and cache
-        if(!driverPublicAccessMap.containsKey(driverId)) {
-            driverPublicAccessMap.put(driverId, Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public")));
+        // Read once and cache
+        if (!driverPublicAccessMap.containsKey(driverId)) {
+            driverPublicAccessMap.put(driverId,
+                    Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public")));
         }
         return driverPublicAccessMap.get(driverId);
     }
-    
+
     public static String getDriverPrefix(String driverId) {
-        return driverId+ DataAccess.SEPARATOR;
+        return driverId + DataAccess.SEPARATOR;
     }
-    
+
     public static boolean isDirectUploadEnabled(String driverId) {
-        return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) ||
-            Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band"));
+        return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3)
+                && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect")))
+                || Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band"));
     }
-    
-    //Check that storageIdentifier is consistent with store's config
-    //False will prevent direct uploads
+
+    // Check that storageIdentifier is consistent with store's config
+    // False will prevent direct uploads
     static boolean isValidIdentifier(String driverId, String storageId) {
         return false;
     }
-    
-    //Utility to verify the standard UUID pattern for stored files.
+
+    // Utility to verify the standard UUID pattern for stored files.
     protected static boolean usesStandardNamePattern(String identifier) {
 
         Pattern r = Pattern.compile("^[a-f,0-9]{11}-[a-f,0-9]{12}$");
@@ -626,4 +628,15 @@ protected static boolean usesStandardNamePattern(String identifier) {
 
     public abstract List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOException;
 
+    /**
+     * A storage-type-specific mechanism for retrieving the size of a file. Intended
+     * primarily as a way to get the size before it has been recorded in the
+     * database, e.g. during direct/out-of-band transfers but could be useful to
+     * check the db values.
+     * 
+     * @return file size in bytes
+     * @throws IOException 
+     */
+    public abstract long retrieveSizeFromMedia() throws IOException;
+
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
index 6c84009de3e..0d1dab581fe 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
@@ -954,4 +954,9 @@ public List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOE
         }
         return toDelete;
     }
+
+    @Override
+    public long retrieveSizeFromMedia() throws IOException {
+        throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); 
+    }
 }

From 68ab3f3cb6399d4c73bff0bcc84d9687ab369351 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 19 Oct 2023 09:18:58 -0400
Subject: [PATCH 068/546] typos, change hash notice

---
 .../iq/dataverse/globus/GlobusServiceBean.java      | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 49572519696..8aa9915db58 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -4,8 +4,6 @@
 import com.github.benmanes.caffeine.cache.Caffeine;
 import com.google.gson.FieldNamingPolicy;
 import com.google.gson.GsonBuilder;
-import com.nimbusds.oauth2.sdk.pkce.CodeVerifier;
-
 import edu.harvard.iq.dataverse.*;
 
 import jakarta.ejb.Asynchronous;
@@ -21,7 +19,6 @@
 import jakarta.json.JsonObject;
 import jakarta.json.JsonObjectBuilder;
 import jakarta.json.JsonPatch;
-import jakarta.json.JsonValue;
 import jakarta.servlet.http.HttpServletRequest;
 
 import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json;
@@ -662,6 +659,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
         GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
         
         ruleId = getRuleId(endpoint, task.getOwner_id(), "rw");
+        
         if(ruleId!=null) {
             Long datasetId = rulesCache.getIfPresent(ruleId);
             if(datasetId!=null) {
@@ -1095,7 +1093,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger)
         String fullPath = id.split("IDsplit")[1];
         String fileName = id.split("IDsplit")[2];
 
-        // ToDo: what if the file doesnot exists in s3
+        // ToDo: what if the file does not exist in s3
         // ToDo: what if checksum calculation failed
 
         do {
@@ -1107,8 +1105,8 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger)
             } catch (IOException ioex) {
                 count = 3;
                 logger.info(ioex.getMessage());
-                globusLogger.info("DataFile (fullPAth " + fullPath
-                        + ") does not appear to be accessible withing Dataverse: ");
+                globusLogger.info("DataFile (fullPath " + fullPath
+                        + ") does not appear to be accessible within Dataverse: ");
             } catch (Exception ex) {
                 count = count + 1;
                 ex.printStackTrace();
@@ -1119,7 +1117,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger)
         } while (count < 3);
 
         if (checksumVal.length() == 0) {
-            checksumVal = "NULL";
+            checksumVal = "Not available in Dataverse";
         }
 
         String mimeType = calculatemime(fileName);
@@ -1384,4 +1382,5 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
     private static boolean isDataverseManaged(String driverId) {
         return Boolean.getBoolean("dataverse.files." + driverId + ".managed");
     }
+    
 }

From d57b9f048490bcc2a38d8c2fc422e3797bad2fbc Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 19 Oct 2023 09:19:24 -0400
Subject: [PATCH 069/546] add getLocationFromStorage, add tests

---
 .../iq/dataverse/dataaccess/DataAccess.java   | 34 +++++++++++++++----
 .../dataverse/dataaccess/DataAccessTest.java  | 20 +++++++++++
 2 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
index 8387f8110cf..a3345cb7a8c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
@@ -153,12 +153,34 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) {
     }
     
     public static String getStorageIdFromLocation(String location) {
-    	if(location.contains(SEPARATOR)) {
-    		//It's a full location with a driverId, so strip and reapply the driver id
-    		//NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it
-    		return location.substring(0,location.indexOf(SEPARATOR) +3) + location.substring(location.lastIndexOf('/')+1); 
-    	}
-    	return location.substring(location.lastIndexOf('/')+1);
+        if (location.contains(SEPARATOR)) {
+            // It's a full location with a driverId, so strip and reapply the driver id
+            // NOte that this will strip the bucketname out (which s3 uses) but the
+            // S3IOStorage class knows to look at re-insert it
+            return location.substring(0, location.indexOf(SEPARATOR) + 3)
+                    + location.substring(location.lastIndexOf('/') + 1);
+        }
+        return location.substring(location.lastIndexOf('/') + 1);
+    }
+    
+    /** Changes storageidentifiers of the form
+     * s3://bucketname/18b39722140-50eb7d3c5ece or file://18b39722140-50eb7d3c5ece to s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece
+     * and
+     * 18b39722140-50eb7d3c5ece to 10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece
+     * @param id
+     * @param dataset
+     * @return
+     */
+    public static String getLocationFromStorageId(String id, Dataset dataset) {
+        String path= dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/";
+        if (id.contains(SEPARATOR)) {
+            // It's a full location with a driverId, so strip and reapply the driver id
+            // NOte that this will strip the bucketname out (which s3 uses) but the
+            // S3IOStorage class knows to look at re-insert it
+            return id.substring(0, id.indexOf(SEPARATOR) + 3) + path
+                    + id.substring(id.lastIndexOf('/') + 1);
+        }
+        return path + id.substring(id.lastIndexOf('/') + 1);
     }
     
     public static String getDriverType(String driverId) {
diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java
index 1ff914adff9..f7ce061fb24 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java
@@ -59,4 +59,24 @@ void testCreateNewStorageIO_createsFileAccessIObyDefault() throws IOException {
     StorageIO<Dataset> storageIo = DataAccess.createNewStorageIO(dataset, "valid-tag");
     assertTrue(storageIo.getClass().equals(FileAccessIO.class));
   }
+  
+  @Test
+  void testGetLocationFromStorageId() {
+      Dataset d = new Dataset();
+      d.setAuthority("10.5072");
+      d.setIdentifier("FK2/ABCDEF");
+      assertEquals("s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece",
+              DataAccess.getLocationFromStorageId("s3://18b39722140-50eb7d3c5ece", d));
+      assertEquals("10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece",
+              DataAccess.getLocationFromStorageId("18b39722140-50eb7d3c5ece", d));
+
+  }
+  
+  @Test
+  void testGetStorageIdFromLocation() {
+      assertEquals("file://18b39722140-50eb7d3c5ece",
+              DataAccess.getStorageIdFromLocation("file://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece"));
+      assertEquals("s3://18b39722140-50eb7d3c5ece",
+              DataAccess.getStorageIdFromLocation("s3://bucketname:10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece"));
+  }
 }

From 34286830d1cfa4849a82909eaff20528980fd717 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 19 Oct 2023 09:19:53 -0400
Subject: [PATCH 070/546] get size for direct uploads

---
 .../impl/CreateNewDataFilesCommand.java       | 24 +++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
index ac701da1be9..a8be1bd5116 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
@@ -3,18 +3,20 @@
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.DatasetVersion;
 import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException;
 import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
 import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable;
 import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
 import edu.harvard.iq.dataverse.engine.command.CommandContext;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
-//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException;
 import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper;
 import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
 import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.DvObject;
 import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.FileUtil;
@@ -83,7 +85,7 @@ public class CreateNewDataFilesCommand extends AbstractCommand<CreateDataFileRes
     private final String newStorageIdentifier; 
     private final String newCheckSum; 
     private DataFile.ChecksumType newCheckSumType;
-    private final Long newFileSize;
+    private Long newFileSize;
 
     public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UserStorageQuota quota, String newCheckSum) {
         this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, null);
@@ -639,6 +641,24 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
                 }
                 logger.fine("Supplied type: " + suppliedContentType + ", finalType: " + finalType);
             }
+            if (newFileSize == null) {
+                // For direct/out-of-band upload, get the size from the underlying service
+                StorageIO<DvObject> sio;
+                try {
+                    sio = DataAccess.getDirectStorageIO(DataAccess.getLocationFromStorageId(newStorageIdentifier, version.getDataset()));
+
+                    // get file size
+                    // Note - some stores (e.g. AWS S3) only offer eventual consistency and a call
+                    // to get the size immediately after uploading may fail. As of the addition of
+                    // PR#9409 adding storage quotas, we are now requiring size to be available
+                    // earlier. If this is seen, adding
+                    // a delay/retry may help
+                    newFileSize = sio.retrieveSizeFromMedia();
+                } catch (IOException e) {
+                    // If we don't get a file size, a CommandExecutionException will be thrown later in the code
+                    e.printStackTrace();
+                }
+            }
         }
         // Finally, if none of the special cases above were applicable (or 
         // if we were unable to unpack an uploaded file, etc.), we'll just 

From 2adfa8af01124c31ada3f1801dd5f3dac0fd704e Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 19 Oct 2023 09:20:31 -0400
Subject: [PATCH 071/546] refactor, add delete method, etc.

---
 .../dataaccess/GlobusOverlayAccessIO.java     | 157 ++++++++++++------
 1 file changed, 110 insertions(+), 47 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 965dc3c0947..011bb74f720 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -17,11 +17,14 @@
 
 import org.apache.http.client.ClientProtocolException;
 import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.HttpDelete;
 import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
 import org.apache.http.util.EntityUtils;
 
+import jakarta.json.Json;
 import jakarta.json.JsonObject;
+import jakarta.json.JsonObjectBuilder;
 
 /**
  * @author qqmyers
@@ -43,7 +46,6 @@ public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAcce
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO");
 
-    String globusAccessToken = null;
     /*
      * If this is set to true, the store supports Globus transfer in and
      * Dataverse/the globus app manage file locations, access controls, deletion,
@@ -51,35 +53,64 @@ public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAcce
      */
     private boolean dataverseManaged = false;
 
+    private String relativeDirectoryPath;
+    
+    private String endpointPath;
+    
+    private String filename;
+
+    private String endpoint;
+
     public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
-        if (dvObject instanceof DataFile) {
-            globusAccessToken = retrieveGlobusAccessToken();
-        }
         dataverseManaged = isDataverseManaged(this.driverId);
+    }
+
+    private void parsePath() {
+        int filenameStart = path.lastIndexOf("/") + 1;
+        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3);
+        int pathStart = endpointWithBasePath.indexOf("/");
+        logger.info("endpointWithBasePath: " + endpointWithBasePath);
+        endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : "");
+        logger.info("endpointPath: " + endpointPath);
+
+        if (dataverseManaged && (dvObject!=null)) {
+            
+            Dataset ds = null;
+            if (dvObject instanceof Dataset) {
+                ds = (Dataset) dvObject;
+            } else if (dvObject instanceof DataFile) {
+                ds = ((DataFile) dvObject).getOwner();
+            }
+            relativeDirectoryPath = "/" + ds.getAuthority() + "/" + ds.getIdentifier();
+        } else {
+            relativeDirectoryPath = "";
+        }
+        if (filenameStart > 0) {
+            relativeDirectoryPath = relativeDirectoryPath + path.substring(0, filenameStart);
+        }
+        logger.info("relativeDirectoryPath finally: " + relativeDirectoryPath);
+        filename = path.substring(filenameStart);
+        endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
 
-        logger.info("GAT3: " + globusAccessToken);
+        
     }
 
     public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException {
         this.driverId = driverId;
+        configureStores(null, driverId, storageLocation);
         this.dataverseManaged = isDataverseManaged(this.driverId);
         if (dataverseManaged) {
             String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation);
             path = parts[1];
         } else {
             this.setIsLocalFile(false);
-            configureStores(null, driverId, storageLocation);
-
             path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
             validatePath(path);
             logger.fine("Relative path: " + path);
         }
-//ToDo - only when needed?
-        globusAccessToken = retrieveGlobusAccessToken();
-
     }
-
+    
     private String retrieveGlobusAccessToken() {
         // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId);
         String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token");
@@ -101,33 +132,16 @@ private void validatePath(String relPath) throws IOException {
 
     // Call the Globus API to get the file size
     @Override
-    long retrieveSize() {
+    public long retrieveSizeFromMedia() {
+        parsePath();
+        String globusAccessToken = retrieveGlobusAccessToken();
         logger.info("GAT2: " + globusAccessToken);
         // Construct Globus URL
         URI absoluteURI = null;
         try {
-            int filenameStart = path.lastIndexOf("/") + 1;
-            String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3);
-            int pathStart = endpointWithBasePath.indexOf("/");
-            logger.info("endpointWithBasePath: " + endpointWithBasePath);
-            String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : "");
-            logger.info("directoryPath: " + directoryPath);
-
-            if (dataverseManaged && (dvObject!=null)) {
-                Dataset ds = ((DataFile) dvObject).getOwner();
-                directoryPath = directoryPath + "/" + ds.getAuthority() + "/" + ds.getIdentifier();
-                logger.info("directoryPath now: " + directoryPath);
-
-            }
-            if (filenameStart > 0) {
-                directoryPath = directoryPath + path.substring(0, filenameStart);
-            }
-            logger.info("directoryPath finally: " + directoryPath);
-            String filename = path.substring(filenameStart);
-            String endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
 
             absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint
-                    + "/ls?path=" + directoryPath + "&filter=name:" + filename);
+                    + "/ls?path=" + endpointPath + relativeDirectoryPath + "&filter=name:" + filename);
             HttpGet get = new HttpGet(absoluteURI);
 
             logger.info("Token is " + globusAccessToken);
@@ -166,25 +180,63 @@ public InputStream getInputStream() throws IOException {
     @Override
     public void delete() throws IOException {
 
-// Fix
-        // Delete is best-effort - we tell the remote server and it may or may not
-        // implement this call
+        parsePath();
+        // Delete is best-effort - we tell the endpoint to delete don't monitor whether
+        // it succeeds
         if (!isDirectAccess()) {
             throw new IOException("Direct Access IO must be used to permanently delete stored file objects");
         }
+        String globusAccessToken = retrieveGlobusAccessToken();
+        // Construct Globus URL
+        URI absoluteURI = null;
         try {
-            HttpDelete del = new HttpDelete(baseUrl + "/" + path);
-            CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext);
-            try {
-                int code = response.getStatusLine().getStatusCode();
-                switch (code) {
+
+            absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/submission_id");
+            HttpGet get = new HttpGet(absoluteURI);
+
+            logger.info("Token is " + globusAccessToken);
+            get.addHeader("Authorization", "Bearer " + globusAccessToken);
+            CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext);
+            if (response.getStatusLine().getStatusCode() == 200) {
+                // Get reponse as string
+                String responseString = EntityUtils.toString(response.getEntity());
+                logger.info("Response from " + get.getURI().toString() + " is: " + responseString);
+                JsonObject responseJson = JsonUtil.getJsonObject(responseString);
+                String submissionId = responseJson.getString("value");
+                logger.info("submission_id for delete is: " + submissionId);
+                absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/delete");
+                HttpPost post = new HttpPost(absoluteURI);
+                JsonObjectBuilder taskJsonBuilder = Json.createObjectBuilder();
+                taskJsonBuilder.add("submission_id", submissionId).add("DATA_TYPE", "delete").add("endpoint", endpoint)
+                        .add("DATA", Json.createArrayBuilder().add(Json.createObjectBuilder().add("DATA_TYPE", "delete_item").add("path",
+                                endpointPath + relativeDirectoryPath + "/" + filename)));
+                post.setHeader("Content-Type", "application/json");
+                post.addHeader("Authorization", "Bearer " + globusAccessToken);
+                String taskJson= JsonUtil.prettyPrint(taskJsonBuilder.build());
+                logger.info("Sending: " + taskJson);
+                post.setEntity(new StringEntity(taskJson, "utf-8"));
+                CloseableHttpResponse postResponse = getSharedHttpClient().execute(post, localContext);
+                int statusCode=postResponse.getStatusLine().getStatusCode();
+                logger.info("Response :" + statusCode + ": " +postResponse.getStatusLine().getReasonPhrase());
+                switch (statusCode) {
+                case 202:
+                    // ~Success - delete task was accepted
+                    logger.info("Globus delete initiated: " + EntityUtils.toString(postResponse.getEntity()));
+                    break;
                 case 200:
-                    logger.fine("Sent DELETE for " + baseUrl + "/" + path);
+                    // Duplicate - delete task was already accepted
+                    logger.info("Duplicate Globus delete: " + EntityUtils.toString(postResponse.getEntity()));
+                    break;
                 default:
-                    logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code);
+                    logger.warning("Response from " + post.getURI().toString() + " was "
+                            + postResponse.getStatusLine().getStatusCode());
+                    logger.info(EntityUtils.toString(postResponse.getEntity()));
                 }
-            } finally {
-                EntityUtils.consume(response.getEntity());
+
+            } else {
+                logger.warning("Response from " + get.getURI().toString() + " was "
+                        + response.getStatusLine().getStatusCode());
+                logger.info(EntityUtils.toString(response.getEntity()));
             }
         } catch (Exception e) {
             logger.warning(e.getMessage());
@@ -250,6 +302,16 @@ static boolean isValidIdentifier(String driverId, String storageId) {
         return true;
     }
 
+    @Override
+    public String getStorageLocation() throws IOException {
+        parsePath();
+        if (dataverseManaged) {
+            return this.driverId + DataAccess.SEPARATOR + relativeDirectoryPath + "/" + filename;
+        } else {
+            return super.getStorageLocation();
+        }
+    }
+    
     public static void main(String[] args) {
         System.out.println("Running the main method");
         if (args.length > 0) {
@@ -272,7 +334,7 @@ public static void main(String[] args) {
         try {
             GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>(
                     "globus://1234///hdc1/image001.mrc", "globus");
-            logger.info("Size is " + gsio.retrieveSize());
+            logger.info("Size is " + gsio.retrieveSizeFromMedia());
 
         } catch (IOException e) {
             // TODO Auto-generated catch block
@@ -286,7 +348,7 @@ public static void main(String[] args) {
             df.setOwner(ds);
             df.setStorageIdentifier("globus://1234///hdc1/image001.mrc");
             GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>(df, null, "globus");
-            logger.info("Size2 is " + gsio.retrieveSize());
+            logger.info("Size2 is " + gsio.retrieveSizeFromMedia());
 
         } catch (IOException e) {
             // TODO Auto-generated catch block
@@ -294,4 +356,5 @@ public static void main(String[] args) {
         }
 
     }
+    
 }

From bdba5d8ef8a459314d5b8dccab30190461bbfdea Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 19 Oct 2023 14:03:46 -0400
Subject: [PATCH 072/546] implement signedUrls for globus app, refactor

---
 .../edu/harvard/iq/dataverse/api/Admin.java   |  8 +-
 .../harvard/iq/dataverse/api/Datasets.java    | 75 ++++++++++++++++++-
 .../edu/harvard/iq/dataverse/api/Files.java   |  6 +-
 .../externaltools/ExternalToolHandler.java    | 71 +-----------------
 .../dataverse/globus/GlobusServiceBean.java   | 58 +++++++-------
 .../iq/dataverse/util/URLTokenUtil.java       | 65 ++++++++++++++++
 src/main/java/propertyFiles/Bundle.properties |  4 +-
 .../ExternalToolHandlerTest.java              | 11 +--
 .../ExternalToolServiceBeanTest.java          |  4 +-
 9 files changed, 192 insertions(+), 110 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
index fd3b9a89e54..1870c7cb508 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
@@ -96,7 +96,6 @@
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteRoleCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteTemplateCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.RegisterDvObjectCommand;
-import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler;
 import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.userdata.UserListMaker;
@@ -105,6 +104,7 @@
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
 import edu.harvard.iq.dataverse.util.UrlSignerUtil;
 
 import java.io.IOException;
@@ -2418,12 +2418,12 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur
         }
         
         String baseUrl = urlInfo.getString("url");
-        int timeout = urlInfo.getInt(ExternalToolHandler.TIMEOUT, 10);
-        String method = urlInfo.getString(ExternalToolHandler.HTTP_METHOD, "GET");
+        int timeout = urlInfo.getInt(URLTokenUtil.TIMEOUT, 10);
+        String method = urlInfo.getString(URLTokenUtil.HTTP_METHOD, "GET");
         
         String signedUrl = UrlSignerUtil.signUrl(baseUrl, timeout, userId, method, key); 
         
-        return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl));
+        return ok(Json.createObjectBuilder().add(URLTokenUtil.SIGNED_URL, signedUrl));
     }
  
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index d3ea1b80696..aad5a95bd8e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -90,6 +90,7 @@
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.MarkupChecker;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
 import edu.harvard.iq.dataverse.util.bagit.OREMap;
 import edu.harvard.iq.dataverse.util.json.JSONLDUtil;
 import edu.harvard.iq.dataverse.util.json.JsonLDTerm;
@@ -3328,7 +3329,7 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam("
 
     @POST
     @AuthRequired
-    @Path("{id}/addglobusFiles")
+    @Path("{id}/addGlobusFiles")
     @Consumes(MediaType.MULTIPART_FORM_DATA)
     public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
                                             @PathParam("id") String datasetId,
@@ -3411,6 +3412,74 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
 
     }
 
+    /**
+     * Retrieve the parameters and signed URLs required to perform a globus
+     * transfer. This api endpoint is expected to be called as a signed callback
+     * after the globus-dataverse app/other app is launched, but it will accept
+     * other forms of authentication.
+     * 
+     * @param crc
+     * @param datasetId
+     */
+    @GET
+    @AuthRequired
+    @Path("{id}/globusUploadParameters")
+    @Consumes(MediaType.APPLICATION_JSON)
+    @Produces(MediaType.APPLICATION_JSON)
+    public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @QueryParam(value = "locale") String locale)
+    {
+        // -------------------------------------
+        // (1) Get the user from the ContainerRequestContext
+        // -------------------------------------
+        AuthenticatedUser authUser;
+        try {
+            authUser = getRequestAuthenticatedUserOrDie(crc);
+        } catch (WrappedResponse e) {
+            return e.getResponse();
+        }
+        // -------------------------------------
+        // (2) Get the Dataset Id
+        // -------------------------------------
+        Dataset dataset;
+
+        try {
+            dataset = findDatasetOrDie(datasetId);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+        String storeId = dataset.getEffectiveStorageDriverId();
+        if(!DataAccess.getDriverType(storeId).equals(DataAccess.GLOBUS)) {
+            return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled"));
+        }
+        boolean managed = GlobusOverlayAccessIO.isDataverseManaged(storeId);
+        
+        JsonObjectBuilder queryParams = Json.createObjectBuilder();
+        queryParams.add("queryParameters",
+                Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}"))
+                        .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}"))
+                        .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}"))
+                        .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}"))
+                        .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}").add("managed", managed)));
+
+        JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
+        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths")
+                .add(URLTokenUtil.HTTP_METHOD, "POST")
+                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusTransferPaths")
+                .add(URLTokenUtil.TIMEOUT, 300));
+        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles")
+                .add(URLTokenUtil.HTTP_METHOD, "POST")
+                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles")
+                .add(URLTokenUtil.TIMEOUT, 300));
+        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing")
+                .add(URLTokenUtil.HTTP_METHOD, "GET")
+                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
+                .add(URLTokenUtil.TIMEOUT, 300));
+        
+
+        URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
+        return ok(tokenUtil.createPostBody(tokenUtil.getParams(queryParams.build()), allowedApiCalls.build()));
+    }
+    
     /** Requests permissions for a given globus user to upload to the dataset
      * 
      * @param crc
@@ -3915,8 +3984,8 @@ public Response getExternalToolDVParams(@Context ContainerRequestContext crc,
             }
             
 
-            ExternalToolHandler eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale);
-            return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters()))));
+            URLTokenUtil eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale);
+            return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls())));
         } catch (WrappedResponse wr) {
             return wr.getResponse();
         }
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
index 82811162d52..4c2fa8f68ce 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -48,6 +48,8 @@
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.StringUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
+
 import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
@@ -822,10 +824,10 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P
             return error(BAD_REQUEST, "FileMetadata not found.");
         }
 
-        ExternalToolHandler eth = null;
+        URLTokenUtil eth = null;
 
         eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale);
-        return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters()))));
+        return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls())));
     }
     
     @GET
diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
index de4317464e6..36227c2f883 100644
--- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
+++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
@@ -22,12 +22,8 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import jakarta.json.Json;
-import jakarta.json.JsonArray;
-import jakarta.json.JsonArrayBuilder;
 import jakarta.json.JsonNumber;
 import jakarta.json.JsonObject;
-import jakarta.json.JsonObjectBuilder;
 import jakarta.json.JsonString;
 import jakarta.json.JsonValue;
 import jakarta.ws.rs.HttpMethod;
@@ -41,15 +37,10 @@
  */
 public class ExternalToolHandler extends URLTokenUtil {
 
-    private final ExternalTool externalTool;
+    public final ExternalTool externalTool;
 
     private String requestMethod;
-    
-    public static final String HTTP_METHOD="httpMethod";
-    public static final String TIMEOUT="timeOut";
-    public static final String SIGNED_URL="signedUrl";
-    public static final String NAME="name";
-    public static final String URL_TEMPLATE="urlTemplate";
+
     
 
     /**
@@ -134,10 +125,10 @@ public String handleRequest(boolean preview) {
 
         } else {
             // ToDo - if the allowedApiCalls() are defined, could/should we send them to
-            // tools using GET as well?
+            // tools using POST as well?
 
             if (requestMethod.equals(HttpMethod.POST)) {
-                String body = JsonUtil.prettyPrint(createPostBody(params).build());
+                String body = JsonUtil.prettyPrint(createPostBody(params, null).build());
                 try {
                     logger.info("POST Body: " + body);
                     return postFormData(body);
@@ -149,60 +140,6 @@ public String handleRequest(boolean preview) {
         return null;
     }
 
-    public JsonObject getParams(JsonObject toolParameters) {
-        //ToDo - why an array of object each with a single key/value pair instead of one object?
-        JsonArray queryParams = toolParameters.getJsonArray("queryParameters");
-
-        // ToDo return json and print later
-        JsonObjectBuilder paramsBuilder = Json.createObjectBuilder();
-        if (!(queryParams == null) && !queryParams.isEmpty()) {
-            queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> {
-                queryParam.keySet().forEach((key) -> {
-                    String value = queryParam.getString(key);
-                    JsonValue param = getParam(value);
-                    if (param != null) {
-                        paramsBuilder.add(key, param);
-                    }
-                });
-            });
-        }
-        return paramsBuilder.build();
-    }
-
-    public JsonObjectBuilder createPostBody(JsonObject params) {
-        JsonObjectBuilder bodyBuilder = Json.createObjectBuilder();
-        bodyBuilder.add("queryParameters", params);
-        String apiCallStr = externalTool.getAllowedApiCalls();
-        if (apiCallStr != null && !apiCallStr.isBlank()) {
-            JsonArray apiArray = JsonUtil.getJsonArray(externalTool.getAllowedApiCalls());
-            JsonArrayBuilder apisBuilder = Json.createArrayBuilder();
-            apiArray.getValuesAs(JsonObject.class).forEach(((apiObj) -> {
-                logger.fine(JsonUtil.prettyPrint(apiObj));
-                String name = apiObj.getJsonString(NAME).getString();
-                String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString();
-                int timeout = apiObj.getInt(TIMEOUT);
-                String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString();
-                logger.fine("URL Template: " + urlTemplate);
-                urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate;
-                String apiPath = replaceTokensWithValues(urlTemplate);
-                logger.fine("URL WithTokens: " + apiPath);
-                String url = apiPath;
-                // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users)
-                ApiToken apiToken = getApiToken();
-                if (apiToken != null) {
-                    url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(),
-                            httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("")
-                                    + getApiToken().getTokenString());
-                }
-                logger.fine("Signed URL: " + url);
-                apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod)
-                        .add(SIGNED_URL, url).add(TIMEOUT, timeout));
-            }));
-            bodyBuilder.add("signedUrls", apisBuilder);
-        }
-        return bodyBuilder;
-    }
-
     private String postFormData(String allowedApis) throws IOException, InterruptedException {
         String url = null;
         HttpClient client = HttpClient.newHttpClient();
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 8aa9915db58..2c0edd070f3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -20,6 +20,7 @@
 import jakarta.json.JsonObjectBuilder;
 import jakarta.json.JsonPatch;
 import jakarta.servlet.http.HttpServletRequest;
+import jakarta.ws.rs.HttpMethod;
 
 import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json;
 import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray;
@@ -45,6 +46,8 @@
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
+import org.apache.commons.codec.binary.StringUtils;
+
 import com.google.gson.Gson;
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
@@ -58,6 +61,7 @@
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.URLTokenUtil;
+import edu.harvard.iq.dataverse.util.UrlSignerUtil;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 
 @Stateless
@@ -120,7 +124,6 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi
         URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list");
         MakeRequestResponse result = makeRequest(url, "Bearer",
                 endpoint.getClientToken(), "GET", null);
-        ArrayList<String> ids = new ArrayList<String>();
         if (result.status == 200) {
             AccessList al = parseJson(result.jsonResponse, AccessList.class, false);
 
@@ -282,7 +285,7 @@ private String getUniqueFilePath(GlobusEndpoint endpoint) {
     //Single cache of open rules/permission requests
     private final Cache<String, Long> rulesCache = Caffeine.newBuilder()
             .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
-            .removalListener((ruleId, datasetId, cause) -> {
+            .evictionListener((ruleId, datasetId, cause) -> {
                 //Delete rules that expire
                 Dataset dataset = datasetSvc.find(datasetId);
                 deletePermission((String) ruleId, dataset, null);
@@ -575,12 +578,23 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df)
         } catch (Exception e) {
             logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId);
         }
-        //Use URLTokenUtil for params currently in common with external tools. 
+        // Use URLTokenUtil for params currently in common with external tools.
         URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode);
         String appUrl;
         if (upload) {
             appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost")
-                    + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}";
+                    + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}";
+            String callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId()
+                    + "/globusUploadParameters?locale=" + localeCode;
+            if (apiToken != null) {
+                callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(),
+                        HttpMethod.GET,
+                        JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + apiToken.getTokenString());
+            } else {
+                // Shouldn't happen
+                logger.warning("unable to get api token for user: " + user.getIdentifier());
+            }
+            appUrl = appUrl + "&callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback));
         } else {
             if (df == null) {
                 appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost")
@@ -637,39 +651,27 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
             globusLogger = logger;
         }
 
-        globusLogger.info("Starting an globusUpload ");
+        Thread.sleep(5000);
 
-        
-        // ToDo - use DataAccess methods?
-        //String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3);
-        //datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3);
-        
         logger.fine("json: " + JsonUtil.prettyPrint(jsonData));
 
         String taskIdentifier = jsonData.getString("taskIdentifier");
 
-        String ruleId = null;
-
-        Thread.sleep(5000);
-        
         // globus task status check
         GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger);
         String taskStatus = getTaskStatus(task);
 
+        globusLogger.info("Starting an globusUpload ");
+
         GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
-        
-        ruleId = getRuleId(endpoint, task.getOwner_id(), "rw");
-        
-        if(ruleId!=null) {
+        String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw");
+        logger.info("Found rule: " + ruleId);
+        if (ruleId != null) {
             Long datasetId = rulesCache.getIfPresent(ruleId);
-            if(datasetId!=null) {
-             
-            //Will delete rule
-            rulesCache.invalidate(ruleId);
-            } else {
-                //The cache already expired this rule, in which case it's delay not long enough, or we have some other problem
-                logger.warning("Rule " + ruleId + " not found in rulesCache");
-                deletePermission(ruleId, dataset, globusLogger);
+            if (datasetId != null) {
+
+                // Will delete rule
+                rulesCache.invalidate(ruleId);
             }
         }
 
@@ -836,6 +838,10 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                 datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress);
             }
         }
+        if (ruleId != null) {
+            deletePermission(ruleId, dataset, globusLogger);
+            globusLogger.info("Removed upload permission: " + ruleId);
+        }
     }
 
     public String addFilesAsync(String curlCommand, Logger globusLogger)
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java
index 4ae76a7b8db..216237105aa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java
@@ -6,6 +6,10 @@
 import java.util.regex.Pattern;
 
 import jakarta.json.Json;
+import jakarta.json.JsonArray;
+import jakarta.json.JsonArrayBuilder;
+import jakarta.json.JsonObject;
+import jakarta.json.JsonObjectBuilder;
 import jakarta.json.JsonValue;
 
 import edu.harvard.iq.dataverse.DataFile;
@@ -13,6 +17,8 @@
 import edu.harvard.iq.dataverse.FileMetadata;
 import edu.harvard.iq.dataverse.GlobalId;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
 
 public class URLTokenUtil {
 
@@ -22,6 +28,13 @@ public class URLTokenUtil {
     protected final FileMetadata fileMetadata;
     protected ApiToken apiToken;
     protected String localeCode;
+    
+    
+    public static final String HTTP_METHOD="httpMethod";
+    public static final String TIMEOUT="timeOut";
+    public static final String SIGNED_URL="signedUrl";
+    public static final String NAME="name";
+    public static final String URL_TEMPLATE="urlTemplate";
 
     /**
      * File level
@@ -193,6 +206,58 @@ private String getTokenValue(String value) {
         throw new IllegalArgumentException("Cannot replace reserved word: " + value);
     }
     
+    public JsonObjectBuilder createPostBody(JsonObject params, JsonArray allowedApiCalls) {
+        JsonObjectBuilder bodyBuilder = Json.createObjectBuilder();
+        bodyBuilder.add("queryParameters", params);
+        if (allowedApiCalls != null && !allowedApiCalls.isEmpty()) {
+            JsonArrayBuilder apisBuilder = Json.createArrayBuilder();
+            allowedApiCalls.getValuesAs(JsonObject.class).forEach(((apiObj) -> {
+                logger.fine(JsonUtil.prettyPrint(apiObj));
+                String name = apiObj.getJsonString(NAME).getString();
+                String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString();
+                int timeout = apiObj.getInt(TIMEOUT);
+                String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString();
+                logger.fine("URL Template: " + urlTemplate);
+                urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate;
+                String apiPath = replaceTokensWithValues(urlTemplate);
+                logger.fine("URL WithTokens: " + apiPath);
+                String url = apiPath;
+                // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users)
+                ApiToken apiToken = getApiToken();
+                if (apiToken != null) {
+                    url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(),
+                            httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("")
+                                    + getApiToken().getTokenString());
+                }
+                logger.fine("Signed URL: " + url);
+                apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod)
+                        .add(SIGNED_URL, url).add(TIMEOUT, timeout));
+            }));
+            bodyBuilder.add("signedUrls", apisBuilder);
+        }
+        return bodyBuilder;
+    }
+
+    public JsonObject getParams(JsonObject toolParameters) {
+        //ToDo - why an array of object each with a single key/value pair instead of one object?
+        JsonArray queryParams = toolParameters.getJsonArray("queryParameters");
+    
+        // ToDo return json and print later
+        JsonObjectBuilder paramsBuilder = Json.createObjectBuilder();
+        if (!(queryParams == null) && !queryParams.isEmpty()) {
+            queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> {
+                queryParam.keySet().forEach((key) -> {
+                    String value = queryParam.getString(key);
+                    JsonValue param = getParam(value);
+                    if (param != null) {
+                        paramsBuilder.add(key, param);
+                    }
+                });
+            });
+        }
+        return paramsBuilder.build();
+    }
+
     public static String getScriptForUrl(String url) {
         String msg = BundleUtil.getStringFromBundle("externaltools.enable.browser.popups");
         String script = "const newWin = window.open('" + url + "', target='_blank'); if (!newWin || newWin.closed || typeof newWin.closed == \"undefined\") {alert(\"" + msg + "\");}";
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 3497b23eb94..88f819b417b 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -2645,8 +2645,8 @@ datasets.api.privateurl.anonymized.error.released=Can't create a URL for anonymi
 datasets.api.creationdate=Date Created
 datasets.api.modificationdate=Last Modified Date
 datasets.api.curationstatus=Curation Status
-datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this installation of Dataverse.
-datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this installation of Dataverse.
+datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this dataset.
+datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this dataset.
 
 
 
diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java
index 21bb6633204..6f0132e2bc9 100644
--- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java
@@ -10,6 +10,7 @@
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 import edu.harvard.iq.dataverse.util.testing.JvmSetting;
 import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings;
@@ -53,7 +54,7 @@ public void testGetToolUrlWithOptionalQueryParameters() {
         Exception expectedException1 = null;
         String nullLocaleCode = null;
         try {
-            ExternalToolHandler externalToolHandler1 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode);
+            URLTokenUtil externalToolHandler1 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode);
         } catch (Exception ex) {
             expectedException1 = ex;
         }
@@ -71,7 +72,7 @@ public void testGetToolUrlWithOptionalQueryParameters() {
         DataFile dataFile = new DataFile();
         dataFile.setId(42l);
         try {
-            ExternalToolHandler externalToolHandler1 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, nullFileMetadata, nullLocaleCode);
+            URLTokenUtil externalToolHandler1 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, nullFileMetadata, nullLocaleCode);
         } catch (Exception ex) {
             expectedException1 = ex;
         }
@@ -92,7 +93,7 @@ public void testGetToolUrlWithOptionalQueryParameters() {
                 .build().toString());
         Exception expectedException2 = null;
         try {
-            ExternalToolHandler externalToolHandler2 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode);
+            URLTokenUtil externalToolHandler2 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode);
         } catch (Exception ex) {
             expectedException2 = ex;
         }
@@ -225,10 +226,10 @@ public void testGetToolUrlWithAllowedApiCalls() {
         assertTrue(et != null);
         System.out.println("allowedApiCalls et created");
         System.out.println(et.getAllowedApiCalls());
-        ExternalToolHandler externalToolHandler = new ExternalToolHandler(et, ds, at, null);
+        URLTokenUtil externalToolHandler = new ExternalToolHandler(et, ds, at, null);
         System.out.println("allowedApiCalls eth created");
         JsonObject jo = externalToolHandler
-                .createPostBody(externalToolHandler.getParams(JsonUtil.getJsonObject(et.getToolParameters()))).build();
+                .createPostBody(externalToolHandler.getParams(JsonUtil.getJsonObject(et.getToolParameters())), JsonUtil.getJsonArray(et.getAllowedApiCalls())).build();
         assertEquals(1, jo.getJsonObject("queryParameters").getInt("datasetId"));
         String signedUrl = jo.getJsonArray("signedUrls").getJsonObject(0).getString("signedUrl");
         // The date and token will change each time but check for the constant parts of
diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java
index 9337949f605..4f5af8b97b0 100644
--- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java
@@ -9,6 +9,8 @@
 import edu.harvard.iq.dataverse.FileMetadata;
 import edu.harvard.iq.dataverse.GlobalId;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
+
 import java.util.ArrayList;
 import java.util.List;
 import jakarta.json.Json;
@@ -49,7 +51,7 @@ public void testfindAll() {
         externalToolTypes.add(externalToolType);
         ExternalTool.Scope scope = ExternalTool.Scope.FILE;
         ExternalTool externalTool = new ExternalTool("displayName", "toolName", "description", externalToolTypes, scope, "http://foo.com", "{}", DataFileServiceBean.MIME_TYPE_TSV_ALT);
-        ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null);
+        URLTokenUtil externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null);
         List<ExternalTool> externalTools = new ArrayList<>();
         externalTools.add(externalTool);
         List<ExternalTool> availableExternalTools = externalToolService.findExternalToolsByFile(externalTools, dataFile);

From f056d6c051bf784ca4808e8757efa9afcaf7778c Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 19 Oct 2023 15:10:14 -0400
Subject: [PATCH 073/546] minor incremental changes (#9635)

---
 .../search/SearchIncludeFragment.java         | 30 +++++++++++++++----
 .../dataverse/search/SearchServiceBean.java   |  6 ++--
 .../dataverse/search/SolrQueryResponse.java   | 10 ++++++-
 src/main/webapp/search-include-fragment.xhtml | 24 +++++++++++++--
 4 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 47a5621c3d6..14274a09399 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -131,7 +131,8 @@ public class SearchIncludeFragment implements java.io.Serializable {
     Map<String, String> datasetfieldFriendlyNamesBySolrField = new HashMap<>();
     Map<String, String> staticSolrFieldFriendlyNamesBySolrField = new HashMap<>();
     private boolean solrIsDown = false;
-    private boolean solrIsOverloaded = false; 
+    private boolean solrIsTemporarilyUnavailable = false; 
+    private boolean solrFacetsDisabled = false;
     private Map<String, Integer> numberOfFacets = new HashMap<>();
 //    private boolean showUnpublished;
     List<String> filterQueriesDebug = new ArrayList<>();
@@ -361,6 +362,14 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
             if (solrQueryResponse.hasError()){
                 logger.info(solrQueryResponse.getError());
                 setSolrErrorEncountered(true);
+            } 
+            // Solr "temporarily unavailable" is the condition triggered by 
+            // receiving a 503 from the search engine, that is in turn a result
+            // of one of the Solr "circuit breakers" being triggered by excessive
+            // load. We treat this condition as distinct from "Solr is down", 
+            // on the assumption that it is transitive. 
+            if (solrQueryResponse.isSolrTemporarilyUnavailable()) {
+                setSolrTemporarilyUnavailable(true);
             }
             // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. 
             // (why exactly do we need it, again?)
@@ -386,7 +395,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 }
             }
             
-            if (selectedTypesList.size() < 3) {
+            if (selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable()) {
                 // If some types are NOT currently selected, we will need to 
                 // run another query to obtain the numbers of the unselected types:
                 
@@ -1079,14 +1088,23 @@ public void setSolrIsDown(boolean solrIsDown) {
         this.solrIsDown = solrIsDown;
     }
     
-    public boolean isSolrOverloaded() {
-        return solrIsOverloaded;
+    public boolean isSolrTemporarilyUnavailable() {
+        return solrIsTemporarilyUnavailable;
     }
     
-    public void setSolrIsOverloaded(boolean solrIsOverloaded) {
-        this.solrIsOverloaded = solrIsOverloaded; 
+    public void setSolrTemporarilyUnavailable(boolean solrIsTemporarilyUnavailable) {
+        this.solrIsTemporarilyUnavailable = solrIsTemporarilyUnavailable; 
     }
 
+    public boolean isFacetsDisabled() {
+        return solrFacetsDisabled;
+    }
+    
+    public void setFacetsDisabled(boolean solrFacetsDisabled) {
+        this.solrFacetsDisabled = solrFacetsDisabled; 
+    }
+    
+    
     public boolean isRootDv() {
         return rootDv;
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
index 1b92c2a4a46..6e410488794 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
@@ -382,7 +382,6 @@ public SolrQueryResponse search(
         // Make the solr query
         // -----------------------------------
         QueryResponse queryResponse = null;
-        boolean solrTemporarilyUnavailable = false; 
         
         try {
             queryResponse = solrClientService.getSolrClient().query(solrQuery);
@@ -397,6 +396,8 @@ public SolrQueryResponse search(
                 logger.info("return code: "+queryResponse.getStatus());
             }
             
+            SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery);
+
             // We probably shouldn't be assuming that this is necessarily a 
             // "search syntax error", as the code below implies - could be 
             // something else too - ? 
@@ -407,9 +408,9 @@ public SolrQueryResponse search(
             // a transient condition): 
             
             if (ex.code() == 503) {
-                solrTemporarilyUnavailable = true;
                 // actual logic for communicating this state back to the local 
                 // client code TBD (@todo)
+                exceptionSolrQueryResponse.setSolrTemporarilyUnavailable(true);
             }
             
             String error = "Search Syntax Error: ";
@@ -421,7 +422,6 @@ public SolrQueryResponse search(
                 error += messageFromSolr;
             }
             logger.info(error);
-            SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery);
             exceptionSolrQueryResponse.setError(error);
 
             // we can't show anything because of the search syntax error
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java
index 893099ff08d..27e79cb1fc2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java
@@ -26,6 +26,7 @@ public class SolrQueryResponse {
     private String error;
     private Map<String, Long> dvObjectCounts = new HashMap<>();
     private Map<String, Long> publicationStatusCounts = new HashMap<>();
+    private boolean solrTemporarilyUnavailable = false;
 
     public static String DATAVERSES_COUNT_KEY = "dataverses_count";
     public static String DATASETS_COUNT_KEY = "datasets_count";
@@ -91,7 +92,14 @@ public JsonObjectBuilder getPublicationStatusCountsAsJSON(){
         }
         return this.getMapCountsAsJSON(publicationStatusCounts);
     }
-       
+    
+    public boolean isSolrTemporarilyUnavailable() {
+        return solrTemporarilyUnavailable;
+    }
+    
+    public void setSolrTemporarilyUnavailable(boolean solrTemporarilyUnavailable) {
+        this.solrTemporarilyUnavailable = solrTemporarilyUnavailable;
+    }
     
     public JsonObjectBuilder getDvObjectCountsAsJSON(){
         
diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml
index 718df813348..8397a14136e 100644
--- a/src/main/webapp/search-include-fragment.xhtml
+++ b/src/main/webapp/search-include-fragment.xhtml
@@ -88,12 +88,24 @@
             <pre><h:outputText value="#{SearchIncludeFragment.searchException.cause.message}"/></pre>
         </div>
     </div>
+    
+    
+    <!-- ... And the message block for when SOLR IS DOWN *TEMPORARILY* -->
+    <div class="row" jsf:rendered="#{SearchIncludeFragment.solrTemporarilyUnavailable}">
+        <div class="col-sm-12">
+            <div class="bg-danger padding-12 margin-bottom">
+                <!-- @todo: move this text into the bundle -->
+                <h:outputText value="Search Engine service (Solr) is temporarily unavailable because of load issues. Please try again later. " styleClass="highlightBold"/>
+            </div>
+            <pre><h:outputText value="Note that all the datasets that are part of this collection are accessible via direct links and registered DOIs"/></pre>
+        </div>
+    </div>
 
     <!-- Search Results / SOLR IS UP -->
-    <div id="search-row-panel" class="row row-offcanvas row-offcanvas-left" jsf:rendered="#{!SearchIncludeFragment.solrIsDown}">
+    <div id="search-row-panel" class="row row-offcanvas row-offcanvas-left" jsf:rendered="#{!(SearchIncludeFragment.solrIsDown or SearchIncludeFragment.solrTemporarilyUnavailable)}">
         <div id="dv-sidebar" class="col-sm-4 col-md-3 sidebar-offcanvas">
             <!--TYPE FACET (DATAVERSES, DATASETS, FILES)-->
-            <h:form id="facetType">
+            <h:form id="facetType" rendered="#{!SearchIncludeFragment.facetsDisabled}">
                 <!--DATAVERSE TOGGLE-->
                 <div class="clearfix">
                     <h:outputLink value="#{widgetWrapper.wrapURL(page)}" styleClass="facetTypeChBox facetLink #{SearchIncludeFragment.selectedTypesList.contains('dataverses') ? 'facetSelected': ''}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('dataverses') == null}">
@@ -193,7 +205,7 @@
             </h:form>
 
             <!--NON-TYPE FACETS-->
-            <h:form id="facetCategoryForm" rendered="#{DataversePage.searchResultsCount > 0}">
+            <h:form id="facetCategoryForm" rendered="#{(DataversePage.searchResultsCount > 0) and !SearchIncludeFragment.facetsDisabled}">
                 <p:dataList id="facetCategoryList" value="#{DataversePage.facetCategoryList}" var="facetCategory">
                     <h:outputText value="#{facetCategory.friendlyName}" styleClass="facetCategoryName"/>
                     <p:dataList value="#{facetCategory.facetLabel}" var="facetLabel" rows="#{DataversePage.getNumberOfFacets(facetCategory.name,5)}">
@@ -251,6 +263,12 @@
                     </ui:fragment>
                 </p:dataList>
             </h:form>
+            <!-- "facets unavailable" message: -->
+            <h:form id="facetsUnavailable" rendered="#{SearchIncludeFragment.facetsDisabled}">
+                <div class="clearfix">
+                    <h:outputText value="Facets temporarily unavailable" styleClass="facetTypeLink"/>
+                </div>
+            </h:form>
         </div>
         <div id="dv-main" class="col-sm-8 col-md-9">
             <!--DEBUG BEGIN-->

From 8f53aff901e678f3a8746b8a273c0e4ec811df5b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 19 Oct 2023 15:17:54 -0400
Subject: [PATCH 074/546] send endpoint, refactor getting config params

---
 .../harvard/iq/dataverse/api/Datasets.java    |  6 +--
 .../iq/dataverse/dataaccess/DataAccess.java   |  6 +--
 .../iq/dataverse/dataaccess/FileAccessIO.java |  3 +-
 .../dataaccess/GlobusOverlayAccessIO.java     | 25 ++++++++---
 .../dataaccess/RemoteOverlayAccessIO.java     | 37 +++++++++-------
 .../iq/dataverse/dataaccess/S3AccessIO.java   | 34 +++++++++------
 .../iq/dataverse/dataaccess/StorageIO.java    | 42 ++++++++++++++++---
 .../dataverse/dataaccess/SwiftAccessIO.java   | 35 ++++++++++------
 8 files changed, 131 insertions(+), 57 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index aad5a95bd8e..73c00c1d21b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3452,14 +3452,14 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
             return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled"));
         }
         boolean managed = GlobusOverlayAccessIO.isDataverseManaged(storeId);
-        
+        String endpoint = GlobusOverlayAccessIO.getEndpointId(storeId);
         JsonObjectBuilder queryParams = Json.createObjectBuilder();
         queryParams.add("queryParameters",
                 Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}"))
                         .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}"))
                         .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}"))
-                        .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}"))
-                        .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}").add("managed", managed)));
+                        .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}")).add(Json.createObjectBuilder()
+                                .add("datasetPid", "{datasetPid}").add("managed", managed).add("endpoint", endpoint)));
 
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths")
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
index a3345cb7a8c..4eaa5f9269c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
@@ -48,7 +48,7 @@ public DataAccess() {
     public static final String S3 = "s3";
     static final String SWIFT = "swift";
     static final String REMOTE = "remote";
-    static final String GLOBUS = "globus";
+    public static final String GLOBUS = "globus";
     static final String TMP = "tmp";
     public static final String SEPARATOR = "://";
     //Default to "file" is for tests only
@@ -187,7 +187,7 @@ public static String getDriverType(String driverId) {
     	if(driverId.isEmpty() || driverId.equals("tmp")) {
     		return "tmp";
     	}
-    	return System.getProperty("dataverse.files." + driverId + ".type", "Undefined");
+    	return StorageIO.getConfigParamForDriver(driverId, StorageIO.TYPE, "Undefined");
     }
     
     //This 
@@ -195,7 +195,7 @@ public static String getDriverPrefix(String driverId) throws IOException {
         if(driverId.isEmpty() || driverId.equals("tmp")) {
             return "tmp" + SEPARATOR;
         }
-        String storageType = System.getProperty("dataverse.files." + driverId + ".type", "Undefined");
+        String storageType = StorageIO.getConfigParamForDriver(driverId, StorageIO.TYPE, "Undefined");
         switch(storageType) {
         case FILE:
             return FileAccessIO.getDriverPrefix(driverId);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
index 3e6c802c526..f2a1312a150 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
@@ -53,6 +53,7 @@
 public class FileAccessIO<T extends DvObject> extends StorageIO<T> {
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.FileAccessIO");
+    public static final String DIRECTORY = "directory";
 
 
     public FileAccessIO() {
@@ -578,7 +579,7 @@ private String getDatasetDirectory() throws IOException {
     
     
     protected String getFilesRootDirectory() {
-        String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory", "/tmp/files");
+        String filesRootDirectory = getConfigParam(DIRECTORY, "/tmp/files");
         return filesRootDirectory;
     }
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 011bb74f720..502340feb7c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -46,6 +46,9 @@ public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAcce
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO");
 
+    static final String GLOBUS_TOKEN = "globus-token";
+    static final String MANAGED = "managed";
+
     /*
      * If this is set to true, the store supports Globus transfer in and
      * Dataverse/the globus app manage file locations, access controls, deletion,
@@ -68,7 +71,7 @@ public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId)
 
     private void parsePath() {
         int filenameStart = path.lastIndexOf("/") + 1;
-        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3);
+        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf(DataAccess.SEPARATOR) + 3);
         int pathStart = endpointWithBasePath.indexOf("/");
         logger.info("endpointWithBasePath: " + endpointWithBasePath);
         endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : "");
@@ -112,13 +115,15 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE
     }
     
     private String retrieveGlobusAccessToken() {
-        // String globusToken = JvmSettings.GLOBUS_TOKEN.lookup(driverId);
-        String globusToken = System.getProperty("dataverse.files." + this.driverId + ".globus-token");
+        String globusToken = getConfigParam(GLOBUS_TOKEN);
+        
 
         AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
         return accessToken.getOtherTokens().get(0).getAccessToken();
     }
 
+
+
     private void validatePath(String relPath) throws IOException {
         try {
             URI absoluteURI = new URI(baseUrl + "/" + relPath);
@@ -255,7 +260,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
 //Fix
         // ToDo - support remote auxiliary Files
         if (auxiliaryTag == null) {
-            String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key");
+            String secretKey = getConfigParam(SECRET_KEY);
             if (secretKey == null) {
                 return baseUrl + "/" + path;
             } else {
@@ -267,12 +272,12 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
     }
 
     public static boolean isDataverseManaged(String driverId) {
-        return Boolean.getBoolean("dataverse.files." + driverId + ".managed");
+        return Boolean.parseBoolean(getConfigParamForDriver(driverId, MANAGED));
     }
 
     static boolean isValidIdentifier(String driverId, String storageId) {
         String baseIdentifier = storageId.substring(storageId.lastIndexOf("//") + 2);
-        String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
+        String baseUrl = getConfigParamForDriver(driverId, BASE_URL);
         if (baseUrl == null) {
             return false;
         }
@@ -312,6 +317,14 @@ public String getStorageLocation() throws IOException {
         }
     }
     
+    public static String getEndpointId(String driverId) {
+        String baseUrl = getConfigParamForDriver(driverId, BASE_URL);
+        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf(DataAccess.SEPARATOR) + 3);
+        int pathStart = endpointWithBasePath.indexOf("/");
+        return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
+        
+    }
+    
     public static void main(String[] args) {
         System.out.println("Running the main method");
         if (args.length > 0) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index 9c1f5ba23aa..aafab038ae2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -64,6 +64,13 @@
 public class RemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
+
+    static final String BASE_URL = "base-url";
+    static final String BASE_STORE = "base-store";
+    static final String SECRET_KEY = "secret-key";
+    static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes";
+    static final String REMOTE_STORE_NAME = "remote-store-name";
+    static final String REMOTE_STORE_URL = "remote-store-url";
     
     protected StorageIO<DvObject> baseStore = null;
     protected String path = null;
@@ -404,7 +411,7 @@ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException
 
     @Override
     public boolean downloadRedirectEnabled() {
-        String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect");
+        String optionValue = getConfigParam(StorageIO.DOWNLOAD_REDIRECT);
         if ("true".equalsIgnoreCase(optionValue)) {
             return true;
         }
@@ -421,7 +428,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
 
         // ToDo - support remote auxiliary Files
         if (auxiliaryTag == null) {
-            String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key");
+            String secretKey = getConfigParam(SECRET_KEY);
             if (secretKey == null) {
                 return baseUrl + "/" + path;
             } else {
@@ -434,7 +441,7 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
     }
 
     int getUrlExpirationMinutes() {
-        String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes");
+        String optionValue = getConfigParam(URL_EXPIRATION_MINUTES);
         if (optionValue != null) {
             Integer num;
             try {
@@ -450,7 +457,7 @@ int getUrlExpirationMinutes() {
     }
 
     protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
-        baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url");
+        baseUrl = getConfigParam(BASE_URL);
         if (baseUrl == null) {
             throw new IOException("dataverse.files." + this.driverId + ".base-url is required");
         } else {
@@ -467,7 +474,7 @@ protected void configureStores(DataAccessRequest req, String driverId, String st
         if (baseStore == null) {
             String baseDriverId = getBaseStoreIdFor(driverId);
             String fullStorageLocation = null;
-            String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type",
+            String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE,
                     DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
 
             if (dvObject instanceof Dataset) {
@@ -480,17 +487,17 @@ protected void configureStores(DataAccessRequest req, String driverId, String st
                     switch (baseDriverType) {
                     case DataAccess.S3:
                         fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/"
+                                + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/"
                                 + fullStorageLocation;
                         break;
                     case DataAccess.FILE:
                         fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files")
+                                + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files")
                                 + "/" + fullStorageLocation;
                         break;
                     default:
                         logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
-                                + System.getProperty("dataverse.files." + baseDriverId + ".type"));
+                                + getConfigParamForDriver(baseDriverId, StorageIO.TYPE));
                         throw new IOException("Not supported");
                     }
 
@@ -508,17 +515,17 @@ protected void configureStores(DataAccessRequest req, String driverId, String st
                     switch (baseDriverType) {
                     case DataAccess.S3:
                         fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/"
+                                + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/"
                                 + fullStorageLocation;
                         break;
                     case DataAccess.FILE:
                         fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files")
+                                + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files")
                                 + "/" + fullStorageLocation;
                         break;
                     default:
                         logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
-                                + System.getProperty("dataverse.files." + baseDriverId + ".type"));
+                                + getConfigParamForDriver(baseDriverId, StorageIO.TYPE));
                         throw new IOException("Not supported");
                     }
                 }
@@ -528,9 +535,9 @@ protected void configureStores(DataAccessRequest req, String driverId, String st
                 ((S3AccessIO<?>) baseStore).setMainDriver(false);
             }
         }
-        remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name");
+        remoteStoreName = getConfigParam(REMOTE_STORE_NAME);
         try {
-            remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url"));
+            remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL));
         } catch (MalformedURLException mfue) {
             logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId);
         }
@@ -623,7 +630,7 @@ public void saveInputStream(InputStream inputStream, Long filesize) throws IOExc
 
     static boolean isValidIdentifier(String driverId, String storageId) {
         String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2);
-        String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
+        String baseUrl = getConfigParamForDriver(driverId, BASE_URL);
         try {
             URI absoluteURI = new URI(baseUrl + "/" + urlPath);
             if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
@@ -638,7 +645,7 @@ static boolean isValidIdentifier(String driverId, String storageId) {
     }
 
     public static String getBaseStoreIdFor(String driverId) {
-        return System.getProperty("dataverse.files." + driverId + ".base-store");
+        return getConfigParamForDriver(driverId, BASE_STORE);
     }
 
     @Override
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index b0f9f0ffb05..decc65943ec 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -88,6 +88,16 @@ public class S3AccessIO<T extends DvObject> extends StorageIO<T> {
 
     private static final Config config = ConfigProvider.getConfig();
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO");
+    static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes";
+    static final String CUSTOM_ENDPOINT_URL = "custom-endpoint-url";
+    static final String PROXY_URL = "proxy-url";
+    static final String BUCKET_NAME = "bucket-name";
+    static final String MIN_PART_SIZE = "min-part-size";
+    static final String CUSTOM_ENDPOINT_REGION = "custom-endpoint-region";
+    static final String PATH_STYLE_ACCESS = "path-style-access";
+    static final String PAYLOAD_SIGNING = "payload-signing";
+    static final String CHUNKED_ENCODING = "chunked-encoding";
+    static final String PROFILE = "profile";
     
     private boolean mainDriver = true;
 
@@ -103,8 +113,8 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) {
             minPartSize = getMinPartSize(driverId);
             s3=getClient(driverId);
             tm=getTransferManager(driverId);
-            endpoint = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", "");
-            proxy = System.getProperty("dataverse.files." + driverId + ".proxy-url", "");
+            endpoint = getConfigParam(CUSTOM_ENDPOINT_URL, "");
+            proxy = getConfigParam(PROXY_URL, "");
             if(!StringUtil.isEmpty(proxy)&&StringUtil.isEmpty(endpoint)) {
                 logger.severe(driverId + " config error: Must specify a custom-endpoint-url if proxy-url is specified");
             }
@@ -842,7 +852,7 @@ private static String getMainFileKey(String baseKey, String storageIdentifier, S
 
     @Override
     public boolean downloadRedirectEnabled() {
-        String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect");
+        String optionValue = getConfigParam(DOWNLOAD_REDIRECT);
         if ("true".equalsIgnoreCase(optionValue)) {
             return true;
         }
@@ -1066,7 +1076,7 @@ public JsonObjectBuilder generateTemporaryS3UploadUrls(String globalId, String s
     }
     
     int getUrlExpirationMinutes() {
-        String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); 
+        String optionValue = getConfigParam(URL_EXPIRATION_MINUTES); 
         if (optionValue != null) {
             Integer num; 
             try {
@@ -1082,7 +1092,7 @@ int getUrlExpirationMinutes() {
     }
     
     private static String getBucketName(String driverId) {
-        return System.getProperty("dataverse.files." + driverId + ".bucket-name");
+        return getConfigParamForDriver(driverId, BUCKET_NAME);
     }
     
     private static long getMinPartSize(String driverId) {
@@ -1090,7 +1100,7 @@ private static long getMinPartSize(String driverId) {
         // (minimum allowed is 5*1024**2 but it probably isn't worth the complexity starting at ~5MB. Also -  confirmed that they use base 2 definitions)
         long min = 5 * 1024 * 1024l; 
 
-        String partLength = System.getProperty("dataverse.files." + driverId + ".min-part-size");
+        String partLength = getConfigParamForDriver(driverId, MIN_PART_SIZE);
         try {
             if (partLength != null) {
                 long val = Long.parseLong(partLength);
@@ -1139,12 +1149,12 @@ private static AmazonS3 getClient(String driverId) {
              * Pass in a URL pointing to your S3 compatible storage.
              * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html
              */
-            String s3CEUrl = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", "");
+            String s3CEUrl = getConfigParamForDriver(driverId, CUSTOM_ENDPOINT_URL, "");
             /**
              * Pass in a region to use for SigV4 signing of requests.
              * Defaults to "dataverse" as it is not relevant for custom S3 implementations.
              */
-            String s3CERegion = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-region", "dataverse");
+            String s3CERegion = getConfigParamForDriver(driverId, CUSTOM_ENDPOINT_REGION, "dataverse");
 
             // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones.
             if (!s3CEUrl.isEmpty()) {
@@ -1154,7 +1164,7 @@ private static AmazonS3 getClient(String driverId) {
              * Pass in a boolean value if path style access should be used within the S3 client.
              * Anything but case-insensitive "true" will lead to value of false, which is default value, too.
              */
-            Boolean s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".path-style-access", "false"));
+            Boolean s3pathStyleAccess = Boolean.parseBoolean(getConfigParamForDriver(driverId, PATH_STYLE_ACCESS, "false"));
             // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false
             s3CB.withPathStyleAccessEnabled(s3pathStyleAccess);
 
@@ -1162,12 +1172,12 @@ private static AmazonS3 getClient(String driverId) {
              * Pass in a boolean value if payload signing should be used within the S3 client.
              * Anything but case-insensitive "true" will lead to value of false, which is default value, too.
              */
-            Boolean s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".payload-signing","false"));
+            Boolean s3payloadSigning = Boolean.parseBoolean(getConfigParamForDriver(driverId, PAYLOAD_SIGNING,"false"));
             /**
              * Pass in a boolean value if chunked encoding should not be used within the S3 client.
              * Anything but case-insensitive "false" will lead to value of true, which is default value, too.
              */
-            Boolean s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".chunked-encoding","true"));
+            Boolean s3chunkedEncoding = Boolean.parseBoolean(getConfigParamForDriver(driverId, CHUNKED_ENCODING,"true"));
             // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false
             s3CB.setPayloadSigningEnabled(s3payloadSigning);
             // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true
@@ -1178,7 +1188,7 @@ private static AmazonS3 getClient(String driverId) {
              * Pass in a string value if this storage driver should use a non-default AWS S3 profile.
              * The default is "default" which should work when only one profile exists.
              */
-            String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default");
+            String s3profile = getConfigParamForDriver(driverId, PROFILE,"default");
             ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile);
     
             // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
index f3c2ef5f513..fda7e919a0a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -50,6 +50,13 @@
 
 public abstract class StorageIO<T extends DvObject> {
 
+    static final String INGEST_SIZE_LIMIT = "ingestsizelimit";
+    static final String PUBLIC = "public";
+    static final String TYPE = "type";
+    static final String UPLOAD_REDIRECT = "upload-redirect";
+    static final String UPLOAD_OUT_OF_BAND = "upload-out-of-band";
+    protected static final String DOWNLOAD_REDIRECT = "download-redirect";
+
     public StorageIO() {
 
     }
@@ -572,7 +579,7 @@ protected boolean isWriteAccessRequested(DataAccessOption... options) throws IOE
     }
 
     public boolean isBelowIngestSizeLimit() {
-        long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1"));
+        long limit = Long.parseLong(getConfigParam(INGEST_SIZE_LIMIT, "-1"));
         if (limit > 0 && getSize() > limit) {
             return false;
         } else {
@@ -597,7 +604,7 @@ public static boolean isPublicStore(String driverId) {
         // Read once and cache
         if (!driverPublicAccessMap.containsKey(driverId)) {
             driverPublicAccessMap.put(driverId,
-                    Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public")));
+                    Boolean.parseBoolean(getConfigParamForDriver(driverId, PUBLIC)));
         }
         return driverPublicAccessMap.get(driverId);
     }
@@ -607,9 +614,9 @@ public static String getDriverPrefix(String driverId) {
     }
 
     public static boolean isDirectUploadEnabled(String driverId) {
-        return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3)
-                && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect")))
-                || Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band"));
+        return (getConfigParamForDriver(driverId, TYPE).equals(DataAccess.S3)
+                && Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_REDIRECT)))
+                || Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_OUT_OF_BAND));
     }
 
     // Check that storageIdentifier is consistent with store's config
@@ -638,5 +645,30 @@ protected static boolean usesStandardNamePattern(String identifier) {
      * @throws IOException 
      */
     public abstract long retrieveSizeFromMedia() throws IOException;
+    
+    
+    /* Convenience methods to get a driver-specific parameter
+     * 
+     * - with or without a default
+     * - static or per object
+     * 
+     * @param parameterName
+     * @return the parameter value
+     */
+    
+    protected String getConfigParam(String parameterName) {
+        return getConfigParam(parameterName, null);
+    }
+
+    protected String getConfigParam(String parameterName, String defaultValue) {
+        return getConfigParamForDriver(this.driverId, parameterName, defaultValue);
+    }
+
+    protected static String getConfigParamForDriver(String driverId, String parameterName) {
+        return getConfigParamForDriver(driverId, parameterName, null);
+    }
+    protected static String getConfigParamForDriver(String driverId, String parameterName, String defaultValue) {
+        return System.getProperty("dataverse.files." + driverId + "." + parameterName, defaultValue);
+    }
 
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
index 0d1dab581fe..105a60ab418 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
@@ -50,6 +50,17 @@ public class SwiftAccessIO<T extends DvObject> extends StorageIO<T> {
     private String swiftLocation; 
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO");
+    private static final String IS_PUBLIC_CONTAINER = "isPublicContainer";
+    private static final String FOLDER_PATH_SEPARATOR = "folderPathSeparator";
+    private static final String DEFAULT_ENDPOINT = "defaultEndpoint";
+    private static final String TEMPORARY_URL_EXPIRY_TIME = "temporaryUrlExpiryTime";
+    private static final String AUTH_URL = "authUrl";
+    private static final String USERNAME = "username";
+    private static final String PASSWORD = "password";
+    private static final String TENANT = "tenant";
+    private static final String AUTH_TYPE = "authType";
+    private static final String HASH_KEY = "hashKey";
+    private static final String ENDPOINT = "endpoint";
 
 	public SwiftAccessIO() {
 		//Partially functional StorageIO object - constructor only for testing
@@ -70,10 +81,10 @@ public SwiftAccessIO(String swiftLocation, String driverId) {
     }
 
     private void readSettings() {
-    	isPublicContainer = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".isPublicContainer", "true"));
-        swiftFolderPathSeparator = System.getProperty("dataverse.files." + this.driverId + ".folderPathSeparator", "_");
-        swiftDefaultEndpoint = System.getProperty("dataverse.files." + this.driverId + ".defaultEndpoint");
-        tempUrlExpires = Integer.parseInt(System.getProperty("dataverse.files." + this.driverId + ".temporaryUrlExpiryTime", "60"));
+    	isPublicContainer = Boolean.parseBoolean(getConfigParam(IS_PUBLIC_CONTAINER, "true"));
+        swiftFolderPathSeparator = getConfigParam(FOLDER_PATH_SEPARATOR, "_");
+        swiftDefaultEndpoint = getConfigParam(DEFAULT_ENDPOINT);
+        tempUrlExpires = Integer.parseInt(getConfigParam(TEMPORARY_URL_EXPIRY_TIME, "60"));
 		
 	}
 
@@ -740,12 +751,12 @@ private StoredObject openSwiftAuxFile(boolean writeAccess, String auxItemTag) th
     }
 
     Account authenticateWithSwift(String swiftEndPoint) throws IOException {
-        String swiftEndPointAuthUrl = System.getProperty("dataverse.files." + this.driverId + ".authUrl." + swiftEndPoint);
-        String swiftEndPointUsername = System.getProperty("dataverse.files." + this.driverId + ".username." + swiftEndPoint);
-        String swiftEndPointSecretKey = System.getProperty("dataverse.files." + this.driverId + ".password." + swiftEndPoint);
-        String swiftEndPointTenantName = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint);
-        String swiftEndPointAuthMethod = System.getProperty("dataverse.files." + this.driverId + ".authType." + swiftEndPoint);
-        String swiftEndPointTenantId = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint);
+        String swiftEndPointAuthUrl = getConfigParam(AUTH_URL + "." + swiftEndPoint);
+        String swiftEndPointUsername = getConfigParam(USERNAME + "." + swiftEndPoint);
+        String swiftEndPointSecretKey = getConfigParam(PASSWORD + "." + swiftEndPoint);
+        String swiftEndPointTenantName = getConfigParam(TENANT + "." + swiftEndPoint);
+        String swiftEndPointAuthMethod = getConfigParam(AUTH_TYPE + "." + swiftEndPoint);
+        String swiftEndPointTenantId = getConfigParam(TENANT + "." + swiftEndPoint);
 
         if (swiftEndPointAuthUrl == null || swiftEndPointUsername == null || swiftEndPointSecretKey == null
                 || "".equals(swiftEndPointAuthUrl) || "".equals(swiftEndPointUsername) || "".equals(swiftEndPointSecretKey)) {
@@ -814,7 +825,7 @@ private String getSwiftFileURI(StoredObject fileObject) throws IOException {
     private String hmac = null;
     public String generateTempUrlSignature(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException {
         if (hmac == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) {
-            String secretKey = System.getProperty("dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint);
+            String secretKey = getConfigParam(HASH_KEY + "." + swiftEndPoint);
             if (secretKey == null) {
                 throw new IOException("Please input a hash key under dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint);
             }
@@ -841,7 +852,7 @@ public long generateTempUrlExpiry(int duration, long currentTime) {
 
     private String temporaryUrl = null;
     private String generateTemporarySwiftUrl(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException {
-        String baseUrl = System.getProperty("dataverse.files." + this.driverId + ".endpoint." + swiftEndPoint);
+        String baseUrl = getConfigParam(ENDPOINT + "." + swiftEndPoint);
         String path = "/v1/" + containerName + "/" + objectName;
         
         if (temporaryUrl == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) {

From 56192f83b95239dade92a83e6600d4f6be172119 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 20 Oct 2023 10:41:07 -0400
Subject: [PATCH 075/546] remove unused params, send identifiers and paths, try
 scheduler, etc.

---
 .../harvard/iq/dataverse/api/Datasets.java    | 28 +++++++++------
 .../iq/dataverse/dataaccess/DataAccess.java   | 24 +++++++++++++
 .../iq/dataverse/dataaccess/S3AccessIO.java   |  4 +++
 .../iq/dataverse/dataaccess/StorageIO.java    |  5 +++
 .../dataverse/globus/GlobusServiceBean.java   | 36 ++++++++++++-------
 5 files changed, 75 insertions(+), 22 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 73c00c1d21b..b3913c8aee4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3451,15 +3451,25 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
         if(!DataAccess.getDriverType(storeId).equals(DataAccess.GLOBUS)) {
             return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled"));
         }
+        
+        URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
+
         boolean managed = GlobusOverlayAccessIO.isDataverseManaged(storeId);
         String endpoint = GlobusOverlayAccessIO.getEndpointId(storeId);
+
         JsonObjectBuilder queryParams = Json.createObjectBuilder();
         queryParams.add("queryParameters",
                 Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}"))
                         .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}"))
                         .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}"))
-                        .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}")).add(Json.createObjectBuilder()
-                                .add("datasetPid", "{datasetPid}").add("managed", managed).add("endpoint", endpoint)));
+                        .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}"))
+                        .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}")));
+        JsonObject substitutedParams = tokenUtil.getParams(queryParams.build());
+        JsonObjectBuilder params = Json.createObjectBuilder();
+        substitutedParams.keySet().forEach((key) -> {
+            params.add(key, substitutedParams.get(key));
+        });
+        params.add("managed", Boolean.toString(managed)).add("endpoint", endpoint);
 
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths")
@@ -3470,14 +3480,12 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
                 .add(URLTokenUtil.HTTP_METHOD, "POST")
                 .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles")
                 .add(URLTokenUtil.TIMEOUT, 300));
-        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing")
-                .add(URLTokenUtil.HTTP_METHOD, "GET")
-                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
-                .add(URLTokenUtil.TIMEOUT, 300));
-        
+        allowedApiCalls.add(
+                Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET")
+                        .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
+                        .add(URLTokenUtil.TIMEOUT, 300));
 
-        URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
-        return ok(tokenUtil.createPostBody(tokenUtil.getParams(queryParams.build()), allowedApiCalls.build()));
+        return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build()));
     }
     
     /** Requests permissions for a given globus user to upload to the dataset
@@ -3544,7 +3552,7 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP
             JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths);
             switch (response.getInt("status")) {
             case 201:
-                return ok(response.getJsonArray("paths"));
+                return ok(response.getJsonObject("paths"));
             case 400:
                 return badRequest("Unable to grant permission");
             case 409:
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
index 4eaa5f9269c..4a4d3f57f83 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
@@ -403,4 +403,28 @@ public static boolean isValidDirectStorageIdentifier(String storageId) {
         }
         return false;
     }
+
+
+
+    public static String getNewStorageIdentifier(String driverId) {
+        String storageType = DataAccess.getDriverType(driverId);
+        if (storageType.equals("tmp") || storageType.equals("Undefined")) {
+            return null;
+        }
+        switch (storageType) {
+        case FILE:
+            return FileAccessIO.getNewIdentifier(driverId);
+        case SWIFT:
+            return SwiftAccessIO.getNewIdentifier(driverId);
+        case S3:
+            return S3AccessIO.getNewIdentifier(driverId);
+        case REMOTE:
+            return RemoteOverlayAccessIO.getNewIdentifier(driverId);
+        case GLOBUS:
+            return GlobusOverlayAccessIO.getNewIdentifier(driverId);
+        default:
+            logger.warning("Request to validate for storage driver: " + driverId);
+        }
+        return null;
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index decc65943ec..7fcb2c9dd36 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -1400,4 +1400,8 @@ public long retrieveSizeFromMedia() throws IOException {
         }
         return objectMetadata.getContentLength();
     }
+    
+    public static String getNewIdentifier(String driverId) {
+        return driverId + DataAccess.SEPARATOR + getConfigParamForDriver(driverId, BUCKET_NAME) + ":" + FileUtil.generateStorageIdentifier();
+    }
 }
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
index fda7e919a0a..14fc9254c59 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -25,6 +25,7 @@
 import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.DvObject;
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
+import edu.harvard.iq.dataverse.util.FileUtil;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -670,5 +671,9 @@ protected static String getConfigParamForDriver(String driverId, String paramete
     protected static String getConfigParamForDriver(String driverId, String parameterName, String defaultValue) {
         return System.getProperty("dataverse.files." + driverId + "." + parameterName, defaultValue);
     }
+    
+    public static String getNewIdentifier(String driverId) {
+        return driverId + DataAccess.SEPARATOR + FileUtil.generateStorageIdentifier();
+    }
 
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 2c0edd070f3..35cb748068b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -2,6 +2,7 @@
 
 import com.github.benmanes.caffeine.cache.Cache;
 import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.Scheduler;
 import com.google.gson.FieldNamingPolicy;
 import com.google.gson.GsonBuilder;
 import edu.harvard.iq.dataverse.*;
@@ -36,6 +37,7 @@
 import java.time.Duration;
 import java.time.temporal.ChronoUnit;
 import java.util.*;
+import java.util.Map.Entry;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
@@ -257,12 +259,16 @@ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int
                     permissions.setId(globusResponse.getString("access_id"));
                     monitorTemporaryPermissions(permissions.getId(), dataset.getId());
                     logger.info("Access rule " + permissions.getId() + " was created successfully");
-                    JsonArrayBuilder pathArray = Json.createArrayBuilder();
+                    
+                    String driverId = dataset.getEffectiveStorageDriverId();
+                    JsonObjectBuilder paths = Json.createObjectBuilder();
                     for(int i=0;i<numberOfPaths;i++) {
-                        pathArray.add(getUniqueFilePath(endpoint));
+                        String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId);
+                        int lastIndex = Math.max(storageIdentifier.lastIndexOf("/"), storageIdentifier.lastIndexOf(":")); 
+                        paths.add(storageIdentifier, endpoint.getBasePath() + "/" + storageIdentifier.substring(lastIndex + 1));
                     
                     }
-                    response.add("paths", pathArray.build());
+                    response.add("paths", paths.build());
                     
                 } else {
                     //Shouldn't happen!
@@ -277,18 +283,23 @@ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int
             return response.build();
     }
 
-    private String getUniqueFilePath(GlobusEndpoint endpoint) {
+    private Entry<String,String> getUniqueFilePath(GlobusEndpoint endpoint) {
         // TODO See if generated identifier exists at globus endpoint
-        return endpoint.getBasePath() + "/" + FileUtil.generateStorageIdentifier();
+        String sid=FileUtil.generateStorageIdentifier();
+        String path = endpoint.getBasePath() + "/" + FileUtil.generateStorageIdentifier();
+        return null;
     }
 
     //Single cache of open rules/permission requests
     private final Cache<String, Long> rulesCache = Caffeine.newBuilder()
-            .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
+//            .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
+            .expireAfterWrite(Duration.of(1, ChronoUnit.MINUTES))
+            .scheduler(Scheduler.systemScheduler())
             .evictionListener((ruleId, datasetId, cause) -> {
                 //Delete rules that expire
+                logger.info("Rule " + ruleId + " expired");
                 Dataset dataset = datasetSvc.find(datasetId);
-                deletePermission((String) ruleId, dataset, null);
+                deletePermission((String) ruleId, dataset, logger);
               })
             
             .build();
@@ -583,7 +594,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df)
         String appUrl;
         if (upload) {
             appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost")
-                    + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}";
+                    + "/upload?dvLocale={localeCode}";
             String callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId()
                     + "/globusUploadParameters?locale=" + localeCode;
             if (apiToken != null) {
@@ -611,7 +622,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df)
                         + rawStorageId + "&fileName=" + df.getCurrentName();
             }
         }
-        String finalUrl = tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix;
+        String finalUrl = tokenUtil.replaceTokensWithValues(appUrl);
         logger.info("Calling app: " + finalUrl);
         return finalUrl;
     }
@@ -827,9 +838,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                 globusLogger.info("Files failures: " + countError.toString());
                 globusLogger.info("Finished upload via Globus job.");
 
-                if (fileHandlerSuceeded) {
-                    fileHandler.close();
-                }
+
 
             } catch (Exception e) {
                 logger.info("Exception from globusUpload call ");
@@ -842,6 +851,9 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
             deletePermission(ruleId, dataset, globusLogger);
             globusLogger.info("Removed upload permission: " + ruleId);
         }
+        if (fileHandlerSuceeded) {
+            fileHandler.close();
+        }
     }
 
     public String addFilesAsync(String curlCommand, Logger globusLogger)

From ec119a4afe29ca6731fc61a940a1d75930ccc129 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 20 Oct 2023 15:12:46 -0400
Subject: [PATCH 076/546] initial changes to allow xfer to S3 store

---
 .../harvard/iq/dataverse/api/Datasets.java    | 17 ++++++---
 .../dataaccess/GlobusAccessibleStore.java     | 36 ++++++++++++++++++
 .../dataaccess/GlobusOverlayAccessIO.java     | 31 +++++----------
 .../iq/dataverse/dataaccess/S3AccessIO.java   |  2 +-
 .../dataverse/globus/GlobusServiceBean.java   | 38 +++++++------------
 5 files changed, 71 insertions(+), 53 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index b3913c8aee4..1cfd4e3bef0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -65,7 +65,7 @@
 import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
 import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO;
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
-import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
 import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
 import edu.harvard.iq.dataverse.dataaccess.S3AccessIO;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
@@ -3448,14 +3448,15 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
             return wr.getResponse();
         }
         String storeId = dataset.getEffectiveStorageDriverId();
-        if(!DataAccess.getDriverType(storeId).equals(DataAccess.GLOBUS)) {
+        //acceptsGlobusTransfers should only be true for an S3 or globus store
+        if(!GlobusAccessibleStore.acceptsGlobusTransfers(storeId) && !GlobusAccessibleStore.allowsGlobusReferences(storeId)) {
             return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled"));
         }
         
         URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
 
-        boolean managed = GlobusOverlayAccessIO.isDataverseManaged(storeId);
-        String endpoint = GlobusOverlayAccessIO.getEndpointId(storeId);
+        boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId);
+        String transferEndpoint = GlobusAccessibleStore.getEndpointId(storeId);
 
         JsonObjectBuilder queryParams = Json.createObjectBuilder();
         queryParams.add("queryParameters",
@@ -3469,7 +3470,11 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
         substitutedParams.keySet().forEach((key) -> {
             params.add(key, substitutedParams.get(key));
         });
-        params.add("managed", Boolean.toString(managed)).add("endpoint", endpoint);
+        if(transferEndpoint!= null) {
+            params.add("managed", Boolean.toString(managed)).add("endpoint", transferEndpoint);
+        } else {
+            //ToDO: Reference endpoints
+        }
 
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths")
@@ -3535,7 +3540,7 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP
             return wr.getResponse();
         }
 
-        if(!GlobusOverlayAccessIO.isDataverseManaged(dataset.getEffectiveStorageDriverId())) {
+        if(!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) {
             return badRequest("This dataset does not have managed Globus storage");
         }
             
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
new file mode 100644
index 00000000000..ca029947462
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
@@ -0,0 +1,36 @@
+package edu.harvard.iq.dataverse.dataaccess;
+
+public interface GlobusAccessibleStore {
+
+    static final String MANAGED = "managed";
+    static final String GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH = "globus-transfer-endpoint-with-basepath";
+    static final String GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS = "globus-reference-endpoints-with-basepaths";
+    static final String GLOBUS_TOKEN = "globus-token";
+    
+    public static boolean isDataverseManaged(String driverId) {
+        return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(driverId, MANAGED));
+    }
+    
+    public static String getEndpointId(String driverId) {
+        String baseUrl = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH);
+        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf(DataAccess.SEPARATOR) + 3);
+        int pathStart = endpointWithBasePath.indexOf("/");
+        return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
+        
+    }
+    
+    public static boolean acceptsGlobusTransfers(String storeId) {
+        if(StorageIO.getConfigParamForDriver(storeId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH) != null) {
+            return true;
+        }
+        return false;
+    }
+
+    public static boolean allowsGlobusReferences(String storeId) {
+        if(StorageIO.getConfigParamForDriver(storeId, GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS) != null) {
+            return true;
+        }
+        return false;
+    }
+    
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 502340feb7c..97884bd5722 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -42,13 +42,9 @@
  * baseUrl: globus://<globusEndpointId/basePath>
  * 
  */
-public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAccessIO<T> {
-
+public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAccessIO<T> implements GlobusAccessibleStore {
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO");
 
-    static final String GLOBUS_TOKEN = "globus-token";
-    static final String MANAGED = "managed";
-
     /*
      * If this is set to true, the store supports Globus transfer in and
      * Dataverse/the globus app manage file locations, access controls, deletion,
@@ -66,7 +62,7 @@ public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAcce
 
     public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
-        dataverseManaged = isDataverseManaged(this.driverId);
+        dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId);
     }
 
     private void parsePath() {
@@ -102,7 +98,7 @@ private void parsePath() {
     public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException {
         this.driverId = driverId;
         configureStores(null, driverId, storageLocation);
-        this.dataverseManaged = isDataverseManaged(this.driverId);
+        this.dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId);
         if (dataverseManaged) {
             String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation);
             path = parts[1];
@@ -115,7 +111,7 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE
     }
     
     private String retrieveGlobusAccessToken() {
-        String globusToken = getConfigParam(GLOBUS_TOKEN);
+        String globusToken = getConfigParam(GlobusAccessibleStore.GLOBUS_TOKEN);
         
 
         AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
@@ -179,7 +175,11 @@ public long retrieveSizeFromMedia() {
     
     @Override
     public InputStream getInputStream() throws IOException {
-        throw new IOException("Not implemented");
+        if(Boolean.parseBoolean(getConfigParam("endpoint-maps-to-base-store"))) {
+            return baseStore.getInputStream();
+        } else {
+            throw new IOException("Not implemented");
+        }
     }
     
     @Override
@@ -271,10 +271,6 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
         }
     }
 
-    public static boolean isDataverseManaged(String driverId) {
-        return Boolean.parseBoolean(getConfigParamForDriver(driverId, MANAGED));
-    }
-
     static boolean isValidIdentifier(String driverId, String storageId) {
         String baseIdentifier = storageId.substring(storageId.lastIndexOf("//") + 2);
         String baseUrl = getConfigParamForDriver(driverId, BASE_URL);
@@ -283,7 +279,7 @@ static boolean isValidIdentifier(String driverId, String storageId) {
         }
         // Internally managed endpoints require standard name pattern (submitted via
         // /addFile(s) api)
-        if (isDataverseManaged(driverId)) {
+        if (GlobusAccessibleStore.isDataverseManaged(driverId)) {
             boolean hasStandardName = usesStandardNamePattern(baseIdentifier);
             if (hasStandardName) {
                 return true;
@@ -317,13 +313,6 @@ public String getStorageLocation() throws IOException {
         }
     }
     
-    public static String getEndpointId(String driverId) {
-        String baseUrl = getConfigParamForDriver(driverId, BASE_URL);
-        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf(DataAccess.SEPARATOR) + 3);
-        int pathStart = endpointWithBasePath.indexOf("/");
-        return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
-        
-    }
     
     public static void main(String[] args) {
         System.out.println("Running the main method");
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index 7fcb2c9dd36..6cf19feda86 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -84,7 +84,7 @@
 /* 
     Amazon AWS S3 driver
  */
-public class S3AccessIO<T extends DvObject> extends StorageIO<T> {
+public class S3AccessIO<T extends DvObject> extends StorageIO<T> implements GlobusAccessibleStore {
 
     private static final Config config = ConfigProvider.getConfig();
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO");
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 35cb748068b..558125e90d1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -37,7 +37,6 @@
 import java.time.Duration;
 import java.time.temporal.ChronoUnit;
 import java.util.*;
-import java.util.Map.Entry;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executor;
@@ -56,6 +55,7 @@
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.authorization.users.User;
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
 import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
@@ -283,17 +283,9 @@ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int
             return response.build();
     }
 
-    private Entry<String,String> getUniqueFilePath(GlobusEndpoint endpoint) {
-        // TODO See if generated identifier exists at globus endpoint
-        String sid=FileUtil.generateStorageIdentifier();
-        String path = endpoint.getBasePath() + "/" + FileUtil.generateStorageIdentifier();
-        return null;
-    }
-
     //Single cache of open rules/permission requests
     private final Cache<String, Long> rulesCache = Caffeine.newBuilder()
-//            .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
-            .expireAfterWrite(Duration.of(1, ChronoUnit.MINUTES))
+            .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
             .scheduler(Scheduler.systemScheduler())
             .evictionListener((ruleId, datasetId, cause) -> {
                 //Delete rules that expire
@@ -306,6 +298,7 @@ private Entry<String,String> getUniqueFilePath(GlobusEndpoint endpoint) {
     
     
     private void monitorTemporaryPermissions(String ruleId, long datasetId) {
+        logger.info("Adding rule " + ruleId + " for dataset " + datasetId);
         rulesCache.put(ruleId, datasetId);
     }
 
@@ -519,6 +512,7 @@ static class MakeRequestResponse {
 
     }
 
+    /* unused - may be needed for S3 case
     private MakeRequestResponse findDirectory(String directory, String clientToken, String globusEndpoint)
             throws MalformedURLException {
         URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path="
@@ -530,7 +524,8 @@ private MakeRequestResponse findDirectory(String directory, String clientToken,
 
         return result;
     }
-
+*/
+    
     /*
     public boolean giveGlobusPublicPermissions(Dataset dataset)
             throws UnsupportedEncodingException, MalformedURLException {
@@ -582,10 +577,8 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df)
                 apiToken = authSvc.generateApiTokenForUser((AuthenticatedUser) user);
             }
         }
-        String storePrefix = "";
         String driverId = d.getEffectiveStorageDriverId();
         try {
-            storePrefix = DataAccess.getDriverPrefix(driverId);
         } catch (Exception e) {
             logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId);
         }
@@ -765,13 +758,10 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                         countAll++;
                         String storageIdentifier = fileJsonObject.getString("storageIdentifier");
                         String fileName = fileJsonObject.getString("fileName");
-                        String directoryLabel = fileJsonObject.getString("directoryLabel");
                         String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier);
                         //If this is an S3 store, we need to split out the bucket name
                         String[] bits = parts[1].split(":");
-                        String bucketName = "";
                         if(bits.length > 1) {
-                            bucketName = bits[0];
                         }
                         String fileId = bits[bits.length - 1];
                         
@@ -1070,7 +1060,7 @@ public JsonObject calculateMissingMetadataFields(List<String> inputList, Logger
                     .collect(Collectors.toList());
         });
 
-        CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> {
+        CompletableFuture<?> completableFuture = allCompletableFuture.thenApply(files -> {
             return files.stream().map(d -> json(d)).collect(toJsonArray());
         });
 
@@ -1361,15 +1351,17 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
         }
         String driverId = dataset.getEffectiveStorageDriverId();
         GlobusEndpoint endpoint = null;
-        String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
+        
+        //ToDo - consolidate with GlobusOverlayAccessIO.parsePath()
+        String baseUrl = System.getProperty("dataverse.files." + driverId + "." + GlobusAccessibleStore.GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH);
 
         String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3);
         int pathStart = endpointWithBasePath.indexOf("/");
         logger.info("endpointWithBasePath: " + endpointWithBasePath);
-        String directoryPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : "");
+        String directoryPath = (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "");
         logger.info("directoryPath: " + directoryPath);
 
-        if (GlobusOverlayAccessIO.isDataverseManaged(driverId) && (dataset!=null)) {
+        if (GlobusAccessibleStore.isDataverseManaged(driverId) && (dataset!=null)) {
             directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage();
             logger.info("directoryPath now: " + directoryPath);
 
@@ -1387,7 +1379,7 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
         
         logger.info("endpointId: " + endpointId);
         
-        String globusToken = System.getProperty("dataverse.files." + driverId + ".globus-token");
+        String globusToken = System.getProperty("dataverse.files." + driverId + "." + GlobusAccessibleStore.GLOBUS_TOKEN);
 
         AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
         String clientToken = accessToken.getOtherTokens().get(0).getAccessToken();
@@ -1397,8 +1389,4 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
         return endpoint;
     }
     
-    private static boolean isDataverseManaged(String driverId) {
-        return Boolean.getBoolean("dataverse.files." + driverId + ".managed");
-    }
-    
 }

From bfa4756921d42fb72de34a0b238ae0a0632b113a Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 20 Oct 2023 15:54:55 -0400
Subject: [PATCH 077/546] remove old settings, use store specific creds to
 monitor task

---
 .../source/developers/big-data-support.rst      |  2 +-
 .../source/installation/config.rst              | 17 +----------------
 .../harvard/iq/dataverse/SettingsWrapper.java   |  6 ++----
 .../edu/harvard/iq/dataverse/api/Access.java    |  5 +++--
 .../dataverse/api/DownloadInstanceWriter.java   |  5 ++---
 .../iq/dataverse/globus/GlobusServiceBean.java  | 17 ++++++++---------
 .../dataverse/settings/SettingsServiceBean.java | 12 ------------
 .../harvard/iq/dataverse/util/SystemConfig.java |  5 -----
 8 files changed, 17 insertions(+), 52 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst
index 04885571a01..1917967b3f3 100644
--- a/doc/sphinx-guides/source/developers/big-data-support.rst
+++ b/doc/sphinx-guides/source/developers/big-data-support.rst
@@ -168,7 +168,7 @@ As described in that document, Globus transfers can be initiated by choosing the
 
 An overview of the control and data transfer interactions between components was presented at the 2022 Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video <https://youtu.be/3ek7F_Dxcjk?t=5289>`_ around the 1 hr 28 min mark.
 
-See also :ref:`Globus settings <:GlobusBasicToken>`.
+See also :ref:`Globus settings <:GlobusSettings>`.
 
 Data Capture Module (DCM)
 -------------------------
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index f4e66262ef1..ae0596c1e50 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -3940,22 +3940,7 @@ The URL of an LDN Inbox to which the LDN Announce workflow step will send messag
 
 The list of parent dataset field names for which the LDN Announce workflow step should send messages. See :doc:`/developers/workflows` for details.
 
-.. _:GlobusBasicToken:
-
-:GlobusBasicToken
-+++++++++++++++++
-
-GlobusBasicToken encodes credentials for Globus integration. See :ref:`globus-support` for details.
-
-:GlobusEndpoint
-+++++++++++++++
-
-GlobusEndpoint is Globus endpoint id used with Globus integration. See :ref:`globus-support` for details.
-
-:GlobusStores
-+++++++++++++
-
-A comma-separated list of the S3 stores that are configured to support Globus integration. See :ref:`globus-support` for details.
+.. _:GlobusSettings:
 
 :GlobusAppUrl
 +++++++++++++
diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
index 0a1d0effc03..8b7f732d03f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
@@ -6,6 +6,7 @@
 package edu.harvard.iq.dataverse;
 
 import edu.harvard.iq.dataverse.branding.BrandingUtil;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.Setting;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
@@ -333,10 +334,7 @@ public boolean isGlobusFileDownload() {
     }
     
     public boolean isGlobusEnabledStorageDriver(String driverId) {
-        if (globusStoreList == null) {
-            globusStoreList = systemConfig.getGlobusStoresList();
-        }
-        return globusStoreList.contains(driverId);
+        return (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId));
     }
     
     public String getGlobusAppUrl() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
index 1aa3f4ffde6..b0129d0ad1e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
@@ -46,6 +46,7 @@
 import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.dataaccess.DataFileZipper;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
 import edu.harvard.iq.dataverse.dataaccess.OptionalAccessService;
 import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
@@ -327,8 +328,8 @@ public Response datafile(@Context ContainerRequestContext crc, @PathParam("fileI
             dInfo.addServiceAvailable(new OptionalAccessService("preprocessed", "application/json", "format=prep", "Preprocessed data in JSON"));
             dInfo.addServiceAvailable(new OptionalAccessService("subset", "text/tab-separated-values", "variables=&lt;LIST&gt;", "Column-wise Subsetting"));
         }
-        
-        if(systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList().contains(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) {
+        String driverId = DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier());
+        if(systemConfig.isGlobusFileDownload() && (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId))) {
             dInfo.addServiceAvailable(new OptionalAccessService("GlobusTransfer", df.getContentType(), "format=GlobusTransfer", "Download via Globus"));
         }
         
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index af681234e82..d645dc3307f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -206,9 +206,8 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         redirect_url_str = null;
                     }
                 }
-                
-                if (systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList()
-                        .contains(DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier()))) {
+                String driverId = DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier());
+                if (systemConfig.isGlobusFileDownload() && (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId))) {
                     if (di.getConversionParam() != null) {
                         if (di.getConversionParam().equals("format")) {
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 558125e90d1..4f65eb5d316 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -317,12 +317,12 @@ public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId
         return false;
     }
 
-    public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger globusLogger) throws MalformedURLException {
+    public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException {
 
         URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId);
 
         MakeRequestResponse result = makeRequest(url, "Bearer",
-                clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null);
+                accessToken, "GET", null);
 
         GlobusTask task = null;
 
@@ -661,13 +661,14 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
 
         String taskIdentifier = jsonData.getString("taskIdentifier");
 
+        GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
         // globus task status check
-        GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger);
+        GlobusTask task = globusStatusCheck(endpoint, taskIdentifier, globusLogger);
         String taskStatus = getTaskStatus(task);
 
         globusLogger.info("Starting an globusUpload ");
 
-        GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+
         String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw");
         logger.info("Found rule: " + ruleId);
         if (ruleId != null) {
@@ -947,7 +948,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
         }
 
         // globus task status check
-        GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger);
+        GlobusTask task = globusStatusCheck(null, taskIdentifier, globusLogger);
         String taskStatus = getTaskStatus(task);
 
         if (ruleId.length() > 0) {
@@ -976,7 +977,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
 
     Executor executor = Executors.newFixedThreadPool(10);
 
-    private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException {
+    private GlobusTask globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) throws MalformedURLException {
         boolean taskCompletion = false;
         String status = "";
         GlobusTask task = null;
@@ -985,9 +986,7 @@ private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws
             try {
                 globusLogger.info("checking globus transfer task   " + taskId);
                 Thread.sleep(pollingInterval * 1000);
-                AccessToken clientTokenUser = getClientToken(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""));
-                // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId);
-                task = getTask(clientTokenUser, taskId, globusLogger);
+                task = getTask(endpoint.getClientToken(), taskId, globusLogger);
                 if (task != null) {
                     status = task.getStatus();
                     if (status != null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index 0aa403a5116..f2b52ec9533 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -464,18 +464,6 @@ Whether Harvesting (OAI) service is enabled
          */
         ExportInstallationAsDistributorOnlyWhenNotSet,
 
-        /**
-         * Basic Globus Token for Globus Application
-         */
-        GlobusBasicToken,
-        /**
-         * GlobusEndpoint is Globus endpoint for Globus application
-         */
-        GlobusEndpoint,
-        /** 
-         * Comma separated list of Globus enabled stores
-         */
-        GlobusStores,
         /** Globus App URL
          * 
          */
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index 079cbaa999d..e40f55fedd8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -948,11 +948,6 @@ public boolean isGlobusFileDownload() {
         return (isGlobusDownload() && settingsService.isTrueForKey(SettingsServiceBean.Key.GlobusSingleFileTransfer, false));
     }
 
-    public List<String> getGlobusStoresList() {
-    String globusStores = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusStores, "");
-    return Arrays.asList(globusStores.split("\\s*,\\s*"));
-    }
-
     private Boolean getMethodAvailable(String method, boolean upload) {
         String methods = settingsService.getValueForKey(
                 upload ? SettingsServiceBean.Key.UploadMethods : SettingsServiceBean.Key.DownloadMethods);

From 92e59176e5c3ca248e4dd0ee26dd6432173fdaa0 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 24 Oct 2023 15:17:44 -0400
Subject: [PATCH 078/546] adding rel="nofollow" to the links throughout the
 collection page. #9635

---
 src/main/webapp/search-include-fragment.xhtml | 46 +++++++++----------
 src/main/webapp/search-include-pager.xhtml    | 10 ++--
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml
index 8397a14136e..80a0a69b950 100644
--- a/src/main/webapp/search-include-fragment.xhtml
+++ b/src/main/webapp/search-include-fragment.xhtml
@@ -36,7 +36,7 @@
 
                         <p:remoteCommand name="submitsearch" action="#{SearchIncludeFragment.searchRedirect(dataverseRedirectPage, DataversePage.dataverse)}"/>
                     </div>
-                    <h:outputLink id="advsearchlink" value="#{widgetWrapper.wrapURL('/search/advanced.xhtml')}">
+                    <h:outputLink rel="nofollow" id="advsearchlink" value="#{widgetWrapper.wrapURL('/search/advanced.xhtml')}">
                         <f:param name="dataverseIdentifier" value="#{SearchIncludeFragment.dataverse.alias}"/>
                         <h:outputText value="#{bundle['dataverse.search.advancedSearch']}"/>
                         <!--<f:param name="q" value="#{SearchIncludeFragment.query}" disable="#{empty SearchIncludeFragment.query}"/>-->
@@ -108,7 +108,7 @@
             <h:form id="facetType" rendered="#{!SearchIncludeFragment.facetsDisabled}">
                 <!--DATAVERSE TOGGLE-->
                 <div class="clearfix">
-                    <h:outputLink value="#{widgetWrapper.wrapURL(page)}" styleClass="facetTypeChBox facetLink #{SearchIncludeFragment.selectedTypesList.contains('dataverses') ? 'facetSelected': ''}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('dataverses') == null}">
+                    <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}" styleClass="facetTypeChBox facetLink #{SearchIncludeFragment.selectedTypesList.contains('dataverses') ? 'facetSelected': ''}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('dataverses') == null}">
                         <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                         <f:param name="types" value="#{SearchIncludeFragment.getNewSelectedTypes('dataverses')}"/>
                         <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -121,7 +121,7 @@
                         <p:selectBooleanCheckbox value="#{SearchIncludeFragment.selectedTypesList.contains('dataverses') ? true : false}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('dataverses') == null ? true : false}" ariaLabel="#{bundle['facet.collection.label']}"/>
                     </h:outputLink>
                     <!--DATAVERSES ONLY-->
-                    <h:outputLink value="#{widgetWrapper.wrapURL(page)}" disabled="#{false}" styleClass="facetTypeLink chkbox-label-margin #{SearchIncludeFragment.selectedTypesList.contains('dataverses') ? 'facetSelected': ''}">
+                    <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}" disabled="#{false}" styleClass="facetTypeLink chkbox-label-margin #{SearchIncludeFragment.selectedTypesList.contains('dataverses') ? 'facetSelected': ''}">
                         <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                         <f:param name="types" value="dataverses"/>
                         <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -140,7 +140,7 @@
                 </div>
                 <!--DATASETS TOGGLE-->
                 <div class="clearfix">
-                    <h:outputLink value="#{widgetWrapper.wrapURL(page)}" styleClass="facetTypeChBox facetLink #{SearchIncludeFragment.selectedTypesList.contains('datasets') ? 'facetSelected': ''}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('datasets') == null}">
+                    <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}" styleClass="facetTypeChBox facetLink #{SearchIncludeFragment.selectedTypesList.contains('datasets') ? 'facetSelected': ''}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('datasets') == null}">
                         <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                         <f:param name="types" value="#{SearchIncludeFragment.getNewSelectedTypes('datasets')}"/>
                         <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -153,7 +153,7 @@
                         <p:selectBooleanCheckbox value="#{SearchIncludeFragment.selectedTypesList.contains('datasets') ? true : false}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('datasets') == null ? true : false}" ariaLabel="#{bundle['facet.dataset.label']}"/>
                     </h:outputLink>
                     <!--DATASETS ONLY-->
-                    <h:outputLink value="#{widgetWrapper.wrapURL(page)}" disabled="#{false}" styleClass="facetTypeLink chkbox-label-margin #{SearchIncludeFragment.selectedTypesList.contains('datasets') ? 'facetSelected': ''}">
+                    <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}" disabled="#{false}" styleClass="facetTypeLink chkbox-label-margin #{SearchIncludeFragment.selectedTypesList.contains('datasets') ? 'facetSelected': ''}">
                         <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                         <f:param name="types" value="datasets"/>
                         <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -172,7 +172,7 @@
                 </div>
                 <!--FILES TOGGLE-->
                 <div class="clearfix">
-                    <h:outputLink value="#{widgetWrapper.wrapURL(page)}" styleClass="facetTypeChBox facetLink #{SearchIncludeFragment.selectedTypesList.contains('files') ? 'facetSelected': ''}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('files') == null}">
+                    <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}" styleClass="facetTypeChBox facetLink #{SearchIncludeFragment.selectedTypesList.contains('files') ? 'facetSelected': ''}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('files') == null}">
                         <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                         <f:param name="types" value="#{SearchIncludeFragment.getNewSelectedTypes('files')}"/>
                         <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -185,7 +185,7 @@
                         <p:selectBooleanCheckbox value="#{SearchIncludeFragment.selectedTypesList.contains('files') ? true : false}" disabled="#{SearchIncludeFragment.getNewSelectedTypes('files') == null ? true : false}" ariaLabel="#{bundle['facet.datafile.label']}"/>
                     </h:outputLink>
                     <!--FILES ONLY-->
-                    <h:outputLink value="#{widgetWrapper.wrapURL(page)}" disabled="#{false}" styleClass="facetTypeLink chkbox-label-margin #{SearchIncludeFragment.selectedTypesList.contains('files') ? 'facetSelected': ''}">
+                    <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}" disabled="#{false}" styleClass="facetTypeLink chkbox-label-margin #{SearchIncludeFragment.selectedTypesList.contains('files') ? 'facetSelected': ''}">
                         <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                         <f:param name="types" value="files"/>
                         <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -209,7 +209,7 @@
                 <p:dataList id="facetCategoryList" value="#{DataversePage.facetCategoryList}" var="facetCategory">
                     <h:outputText value="#{facetCategory.friendlyName}" styleClass="facetCategoryName"/>
                     <p:dataList value="#{facetCategory.facetLabel}" var="facetLabel" rows="#{DataversePage.getNumberOfFacets(facetCategory.name,5)}">
-                        <h:outputLink value="#{widgetWrapper.wrapURL(page)}" rendered="#{!DataversePage.filterQueries.contains(facetLabel.filterQuery)}" styleClass="facetLink">
+                        <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}" rendered="#{!DataversePage.filterQueries.contains(facetLabel.filterQuery)}" styleClass="facetLink">
                             <h:outputText value="#{facetLabel.name}">
                                 <c:if test="#{!cvocConf.isEmpty()}">
                                     <f:passThroughAttribute name="lang" value="#{DatasetPage.getFieldLanguage(cvocConf.get(facetCategory.datasetFieldTypeId).getString('languages'))}" />
@@ -230,7 +230,7 @@
                             <f:param name="order" value="#{DataversePage.sortOrder}"/>
                         </h:outputLink>
 
-                        <h:outputLink value="#{widgetWrapper.wrapURL(page)}" rendered="#{DataversePage.filterQueries.contains(facetLabel.filterQuery)}" styleClass="facetLink facetSelected">
+                        <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}" rendered="#{DataversePage.filterQueries.contains(facetLabel.filterQuery)}" styleClass="facetLink facetSelected">
                             <h:outputText value="#{facetLabel.name}">
                                 <c:if test="#{!cvocConf.isEmpty()}">
                                     <f:passThroughAttribute name="lang" value="#{DatasetPage.getFieldLanguage(cvocConf.get(facetCategory.datasetFieldTypeId).getString('languages'))}" />
@@ -308,7 +308,7 @@
                     </ui:fragment>
                     <ui:fragment rendered="#{SearchIncludeFragment.errorFromSolr != null}">
                         <p>
-                            <a data-toggle="collapse" data-parent="#emptyResults" href="#technicalDetails" aria-expanded="true" aria-controls="technicalDetails">[+] #{bundle['dataverse.results.empty.link.technicalDetails']}</a>
+                            <a rel="nofollow" data-toggle="collapse" data-parent="#emptyResults" href="#technicalDetails" aria-expanded="true" aria-controls="technicalDetails">[+] #{bundle['dataverse.results.empty.link.technicalDetails']}</a>
                         </p>
                         <div id="technicalDetails" class="collapse">
                             <pre><h:outputText value="#{SearchIncludeFragment.errorFromSolr}"/></pre>
@@ -379,7 +379,7 @@
                 <ui:repeat value="#{SearchIncludeFragment.filterQueries}" var="filterQuery" rendered="#{!SearchIncludeFragment.wasSolrErrorEncountered()}">
                     <ui:param name="friendlyNames" value="#{SearchIncludeFragment.getFriendlyNamesFromFilterQuery(filterQuery)}"/>
                     <ui:param name="filterType" value="#{SearchIncludeFragment.getTypeFromFilterQuery(filterQuery)}"/>
-                    <h:outputLink value="#{widgetWrapper.wrapURL(page)}" styleClass="facetLink facetSelected" rendered="#{not empty friendlyNames
+                    <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}" styleClass="facetLink facetSelected" rendered="#{not empty friendlyNames
                                 and filterType != DataversePage.searchFieldType and filterType != DataversePage.searchFieldSubtree}">
                         <h:outputText value="#{friendlyNames.get(0)}: "/>
                         <h:outputText value="#{friendlyNames.get(1)}">
@@ -417,7 +417,7 @@
                         </button>
                         <ul class="dropdown-menu pull-right text-left">
                             <li>
-                                <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                                <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                                     <h:outputText value="#{bundle['dataverse.results.btn.sort.option.nameAZ']}" styleClass="#{SearchIncludeFragment.sortedByNameAsc ? 'highlightBold' : ''}"/>
                                     <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                                     <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -430,7 +430,7 @@
                                 </h:outputLink>
                             </li>
                             <li>
-                                <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                                <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                                     <h:outputText value="#{bundle['dataverse.results.btn.sort.option.nameZA']}" styleClass="#{SearchIncludeFragment.sortedByNameDesc ? 'highlightBold' : ''}"/>
                                     <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                                     <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -443,7 +443,7 @@
                                 </h:outputLink>
                             </li>
                             <li>
-                                <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                                <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                                     <h:outputText value="#{bundle['dataverse.results.btn.sort.option.newest']}" styleClass="#{SearchIncludeFragment.sortedByReleaseDateDesc? 'highlightBold' : ''}"/>
                                     <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                                     <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -456,7 +456,7 @@
                                 </h:outputLink>
                             </li>
                             <li>
-                                <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                                <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                                     <h:outputText value="#{bundle['dataverse.results.btn.sort.option.oldest']}" styleClass="#{SearchIncludeFragment.sortedByReleaseDateAsc? 'highlightBold' : ''}"/>
                                     <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                                     <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -470,7 +470,7 @@
                             </li>
                             <ui:fragment rendered="#{SearchIncludeFragment.mode == SearchIncludeFragment.searchModeString}">
                                 <li>
-                                    <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                                    <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                                         <h:outputText value="#{bundle['dataverse.results.btn.sort.option.relevance']}" styleClass="#{SearchIncludeFragment.sortedByRelevance ? 'highlightBold' : ''}"/>
                                         <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                                         <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -516,7 +516,7 @@
                             <h:outputText value="#{bundle['dataset.versionUI.unpublished']}" styleClass="label label-warning" rendered="#{result.unpublishedState}"/>
                         </div>
                         <div class="card-preview-icon-block text-center">
-                            <h:outputLink value="#{!SearchIncludeFragment.rootDv and !result.isInTree ? dvUrl : widgetWrapper.wrapURL(dvUrl)}" target="#{!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView ? '_blank' : ''}">
+                            <h:outputLink rel="nofollow" value="#{!SearchIncludeFragment.rootDv and !result.isInTree ? dvUrl : widgetWrapper.wrapURL(dvUrl)}" target="#{!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView ? '_blank' : ''}">
                                 <img src="#{result.imageUrl}" jsf:rendered="#{!empty result.imageUrl}" alt="#{of:format1(bundle['alt.logo'], result.name)}"/>
                                 <span class="icon-dataverse text-brand" jsf:rendered="#{empty result.imageUrl}"/>
                                 <f:passThroughAttribute name="aria-label" value="#{of:format1(bundle['alt.logo'], result.name)}"/>
@@ -526,7 +526,7 @@
                         <h:outputText value="#{result.dateToDisplayOnCard}" styleClass="text-muted" style="margin-right:.5em;"/>
 
                         <c:set var="dvParentUrl" value="/dataverse/#{result.dataverseParentAlias}"/>
-                        <h:outputLink value="#{!SearchIncludeFragment.rootDv and !result.isInTree ? dvParentUrl : widgetWrapper.wrapURL(dvParentUrl)}" target="#{!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView ? '_blank' : ''}" rendered="#{result.parent.id != SearchIncludeFragment.dataverse.id}">
+                        <h:outputLink rel="nofollow" value="#{!SearchIncludeFragment.rootDv and !result.isInTree ? dvParentUrl : widgetWrapper.wrapURL(dvParentUrl)}" target="#{!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView ? '_blank' : ''}" rendered="#{result.parent.id != SearchIncludeFragment.dataverse.id}">
                             <h:outputText value="#{result.parent.get('name')}"/>
                         </h:outputLink>
 
@@ -579,7 +579,7 @@
                             <h:outputText value="#{bundle['incomplete']}" styleClass="label label-danger" rendered="#{!SearchIncludeFragment.isValid(result)}"/>
                         </div>
                         <div class="card-preview-icon-block text-center">
-                            <a href="#{!SearchIncludeFragment.rootDv and !result.isInTree ? result.datasetUrl : widgetWrapper.wrapURL(result.datasetUrl)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}" aria-label="#{result.title}">
+                            <a rel="nofollow" href="#{!SearchIncludeFragment.rootDv and !result.isInTree ? result.datasetUrl : widgetWrapper.wrapURL(result.datasetUrl)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}" aria-label="#{result.title}">
                                 <img src="#{result.imageUrl}" jsf:rendered="#{!empty result.imageUrl and !result.deaccessionedState}" alt="#{result.title}"/>
                                 <span class="icon-dataset" jsf:rendered="#{empty result.imageUrl or result.deaccessionedState}"/>
                             </a>
@@ -590,7 +590,7 @@
                         <h:outputText styleClass="text-muted" value=" - " rendered="#{result.parent.id != SearchIncludeFragment.dataverse.id}"/>
                         
                         <c:set var="dvParentAlias" value="/dataverse/#{result.dataverseAlias}"/>
-                        <h:outputLink value="#{!SearchIncludeFragment.rootDv and !result.isInTree ? dvParentAlias : widgetWrapper.wrapURL(dvParentAlias)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}" rendered="#{result.parent.id != SearchIncludeFragment.dataverse.id}">
+                        <h:outputLink rel="nofollow" value="#{!SearchIncludeFragment.rootDv and !result.isInTree ? dvParentAlias : widgetWrapper.wrapURL(dvParentAlias)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}" rendered="#{result.parent.id != SearchIncludeFragment.dataverse.id}">
                             <h:outputText value="#{result.parent.get('name')}"/>
                         </h:outputLink>
 
@@ -642,7 +642,7 @@
 
                             <span class="text-danger glyphicon glyphicon-lock" data-toggle="tooltip" title="#{bundle.restricted}" jsf:rendered="#{result.entity.restricted and !permissionsWrapper.hasDownloadFilePermission(result.entity)}"/>
                             <span class="text-success icon-unlock" data-toggle="tooltip" title="#{bundle.restrictedaccess}" jsf:rendered="#{result.entity.restricted and permissionsWrapper.hasDownloadFilePermission(result.entity)}"/>
-                            <a href="#{!SearchIncludeFragment.rootDv and !result.isInTree ? (result.harvested ? result.fileDatasetUrl : result.fileUrl) : widgetWrapper.wrapURL(result.harvested ? result.fileDatasetUrl : result.fileUrl)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}">
+                            <a rel="nofollow" href="#{!SearchIncludeFragment.rootDv and !result.isInTree ? (result.harvested ? result.fileDatasetUrl : result.fileUrl) : widgetWrapper.wrapURL(result.harvested ? result.fileDatasetUrl : result.fileUrl)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}">
                                 <h:outputText value="#{result.name}" style="padding:4px 0;" rendered="#{result.nameHighlightSnippet == null}"/>
                                 <h:outputText value="#{result.nameHighlightSnippet}" style="padding:4px 0;" rendered="#{result.nameHighlightSnippet != null}" escape="false"/>
                                 <h:outputText value=" (#{result.entityId})" style="padding:4px 0;" rendered="#{dataverseSession.debug}"/></a>
@@ -654,7 +654,7 @@
                         </div>
 
                         <div class="card-preview-icon-block text-center">
-                            <a href="#{!SearchIncludeFragment.rootDv and !result.isInTree ? (result.harvested ? result.fileDatasetUrl : result.fileUrl) : widgetWrapper.wrapURL(result.harvested ? result.fileDatasetUrl : result.fileUrl)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}" aria-label="#{result.name}">
+                            <a rel="nofollow" href="#{!SearchIncludeFragment.rootDv and !result.isInTree ? (result.harvested ? result.fileDatasetUrl : result.fileUrl) : widgetWrapper.wrapURL(result.harvested ? result.fileDatasetUrl : result.fileUrl)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}" aria-label="#{result.name}">
                                 <img src="#{result.imageUrl}" jsf:rendered="#{!empty result.imageUrl}" alt="#{result.name}"/>
                                 <span class="icon-#{dataFileServiceBean.getFileThumbnailClass(result.entity)} text-muted h1"
                                       jsf:rendered="#{empty result.imageUrl}"/>
@@ -663,7 +663,7 @@
 
                         <div class="text-muted">
                             <h:outputText value="#{result.dateToDisplayOnCard} - "/>
-                            <a href="#{!SearchIncludeFragment.rootDv and !result.isInTree ? result.fileDatasetUrl : widgetWrapper.wrapURL(result.fileDatasetUrl)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}">
+                            <a rel="nofollow" href="#{!SearchIncludeFragment.rootDv and !result.isInTree ? result.fileDatasetUrl : widgetWrapper.wrapURL(result.fileDatasetUrl)}" target="#{(!SearchIncludeFragment.rootDv and !result.isInTree and widgetWrapper.widgetView) or result.harvested ? '_blank' : ''}">
                                 <h:outputText value="#{result.parent.get('name')}"/></a>
                         </div>
 
diff --git a/src/main/webapp/search-include-pager.xhtml b/src/main/webapp/search-include-pager.xhtml
index ad8fb316dda..88714099c00 100644
--- a/src/main/webapp/search-include-pager.xhtml
+++ b/src/main/webapp/search-include-pager.xhtml
@@ -15,7 +15,7 @@
                 <ul class="pagination">
                     <!--should take you to page 1-->
                     <li class="#{SearchIncludeFragment.page == '1' ? 'disabled' : ''}">
-                        <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                        <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                             <h:outputText value="&#171;"/>
                             <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                             <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -29,7 +29,7 @@
                         </h:outputLink>
                     </li>
                     <li class="#{SearchIncludeFragment.page == '1' ? 'disabled' : ''}">
-                        <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                        <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                             <h:outputText value="&lt; #{bundle.previous}"/>
                             <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                             <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -46,7 +46,7 @@
                                end="#{Math:min(SearchIncludeFragment.totalPages,SearchIncludeFragment.page+Math:max(2,5-SearchIncludeFragment.page))}"
                                varStatus="pageStatus">
                         <li class="#{SearchIncludeFragment.page == pageStatus.index ? 'active' : ''}">
-                            <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                            <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                                 <h:outputText value="#{pageStatus.index}">
                                     <f:convertNumber pattern="###,###" />
                                 </h:outputText>
@@ -65,7 +65,7 @@
                         </li>
                     </c:forEach>
                     <li class="#{SearchIncludeFragment.page == SearchIncludeFragment.totalPages ? 'disabled' : ''}">
-                        <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                        <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                             <h:outputText value="#{bundle.next} &gt;"/>
                             <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                             <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">
@@ -79,7 +79,7 @@
                         </h:outputLink>
                     </li>
                     <li class="#{SearchIncludeFragment.page == SearchIncludeFragment.totalPages ? 'disabled' : ''}">
-                        <h:outputLink value="#{widgetWrapper.wrapURL(page)}">
+                        <h:outputLink rel="nofollow" value="#{widgetWrapper.wrapURL(page)}">
                             <h:outputText value="&#187;"/>
                             <f:param name="q" value="#{SearchIncludeFragment.query}"/>
                             <c:forEach items="#{SearchIncludeFragment.filterQueries}" var="clickedFilterQuery" varStatus="status">

From 09472413bf23c3ba97eacb79114dd67ba84d19af Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Tue, 24 Oct 2023 15:37:13 -0400
Subject: [PATCH 079/546] #10038 document preserving access logging and other
 configuration in 6.0 upgrade steps

---
 doc/release-notes/6.0-release-notes.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/doc/release-notes/6.0-release-notes.md b/doc/release-notes/6.0-release-notes.md
index df916216f5b..858cd604bda 100644
--- a/doc/release-notes/6.0-release-notes.md
+++ b/doc/release-notes/6.0-release-notes.md
@@ -166,6 +166,22 @@ If you are running Payara as a non-root user (and you should be!), **remember no
 
    Your `:MDCLogPath` database setting might be pointing to a Payara 5 directory such as `/usr/local/payara5/glassfish/domains/domain1/logs`. If so, edit this to be Payara 6. You'll probably want to copy your logs over as well.
 
+1. If you've enabled access logging or any other site-specific configuration, be sure to preserve them. For instance, the default domain.xml includes
+
+   ```
+        <http-service>
+        <access-log></access-log>
+   ```
+
+   but you may wish to include
+
+   ```
+        <http-service access-logging-enabled="true">
+        <access-log format="%client.name% %datetime% %request% %status% %response.length% %header.user-agent% %header.referer% %cookie.JSESSIONID% %header.x-forwarded-for%"></access-log>
+   ```
+
+   Be sure to keep a previous copy of your domain.xml for reference.
+
 1. Update systemd unit file (or other init system) from `/usr/local/payara5` to `/usr/local/payara6`, if applicable.
 
 1. Start Payara.

From 643c55594f4dc3ccd654cc0438d688684c6e5f02 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 25 Oct 2023 10:50:45 -0400
Subject: [PATCH 080/546] an off switch for the facets on the collection page.
 (#9635)

---
 .../search/SearchIncludeFragment.java         | 23 +++++++++++--------
 .../settings/SettingsServiceBean.java         |  7 +++++-
 src/main/java/propertyFiles/Bundle.properties |  3 +++
 src/main/webapp/search-include-fragment.xhtml |  9 ++++----
 4 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 14274a09399..6ba5817bcdd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -22,6 +22,7 @@
 import edu.harvard.iq.dataverse.ThumbnailServiceWrapper;
 import edu.harvard.iq.dataverse.WidgetWrapper;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import java.time.LocalDate;
 import java.util.ArrayList;
@@ -132,7 +133,6 @@ public class SearchIncludeFragment implements java.io.Serializable {
     Map<String, String> staticSolrFieldFriendlyNamesBySolrField = new HashMap<>();
     private boolean solrIsDown = false;
     private boolean solrIsTemporarilyUnavailable = false; 
-    private boolean solrFacetsDisabled = false;
     private Map<String, Integer> numberOfFacets = new HashMap<>();
 //    private boolean showUnpublished;
     List<String> filterQueriesDebug = new ArrayList<>();
@@ -358,7 +358,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
             DataverseRequest dataverseRequest = new DataverseRequest(session.getUser(), httpServletRequest);
             List<Dataverse> dataverses = new ArrayList<>();
             dataverses.add(dataverse);
-            solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null);
+            solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null, !isFacetsDisabled(), true);
             if (solrQueryResponse.hasError()){
                 logger.info(solrQueryResponse.getError());
                 setSolrErrorEncountered(true);
@@ -395,7 +395,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 }
             }
             
-            if (selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable()) {
+            if (selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable() && !isFacetsDisabled()) {
                 // If some types are NOT currently selected, we will need to 
                 // run another query to obtain the numbers of the unselected types:
                 
@@ -1096,15 +1096,20 @@ public void setSolrTemporarilyUnavailable(boolean solrIsTemporarilyUnavailable)
         this.solrIsTemporarilyUnavailable = solrIsTemporarilyUnavailable; 
     }
 
+    /**
+     * Indicates that the fragment should not be requesting facets in Solr 
+     * searches and rendering them on the page.
+     * @return true if disabled; false by default 
+     */
     public boolean isFacetsDisabled() {
-        return solrFacetsDisabled;
-    }
-    
-    public void setFacetsDisabled(boolean solrFacetsDisabled) {
-        this.solrFacetsDisabled = solrFacetsDisabled; 
+        // The method is used in rendered="..." logic. So we are using 
+        // SettingsWrapper to make sure we are not looking it up repeatedly 
+        // (settings are not expensive to look up, but 
+        // still).
+        
+        return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false);
     }
     
-    
     public boolean isRootDv() {
         return rootDv;
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index 0aa403a5116..fdd286bd5dc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -608,7 +608,12 @@ Whether Harvesting (OAI) service is enabled
         /*
          * True/false(default) option deciding whether file PIDs can be enabled per collection - using the Dataverse/collection set attribute API call.
          */
-        AllowEnablingFilePIDsPerCollection
+        AllowEnablingFilePIDsPerCollection,
+        /**
+         * Allows an instance admin to disable Solr search facets on the collection
+         * and dataset pages instantly
+         */
+        DisableSolrFacets
         ;
 
         @Override
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 79887f7e76c..af920a66191 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -1006,6 +1006,9 @@ dataverse.results.btn.sort.option.relevance=Relevance
 dataverse.results.cards.foundInMetadata=Found in Metadata Fields:
 dataverse.results.cards.files.tabularData=Tabular Data
 dataverse.results.solrIsDown=Please note: Due to an internal error, browsing and searching is not available.
+dataverse.results.solrIsTemporarilyUnavailable=Search Engine service (Solr) is temporarily unavailable because of high load. Please try again later.
+dataverse.results.solrIsTemporarilyUnavailable.extraText=Note that all the datasets that are part of this collection are accessible via direct links and registered DOIs.
+dataverse.results.solrFacetsDisabled=Facets temporarily unavailable.
 dataverse.theme.title=Theme
 dataverse.theme.inheritCustomization.title=For this dataverse, use the same theme as the parent dataverse.
 dataverse.theme.inheritCustomization.label=Inherit Theme
diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml
index 80a0a69b950..af568170157 100644
--- a/src/main/webapp/search-include-fragment.xhtml
+++ b/src/main/webapp/search-include-fragment.xhtml
@@ -94,10 +94,9 @@
     <div class="row" jsf:rendered="#{SearchIncludeFragment.solrTemporarilyUnavailable}">
         <div class="col-sm-12">
             <div class="bg-danger padding-12 margin-bottom">
-                <!-- @todo: move this text into the bundle -->
-                <h:outputText value="Search Engine service (Solr) is temporarily unavailable because of load issues. Please try again later. " styleClass="highlightBold"/>
+                <h:outputText value="#{bundle['dataverse.results.solrIsTemporarilyUnavailable']}" styleClass="highlightBold"/>
             </div>
-            <pre><h:outputText value="Note that all the datasets that are part of this collection are accessible via direct links and registered DOIs"/></pre>
+            <pre><h:outputText value="#{bundle['dataverse.results.solrIsTemporarilyUnavailable.extraText']}"/></pre>
         </div>
     </div>
 
@@ -263,10 +262,10 @@
                     </ui:fragment>
                 </p:dataList>
             </h:form>
-            <!-- "facets unavailable" message: -->
+            <!-- "facets temporarily unavailable" message: -->
             <h:form id="facetsUnavailable" rendered="#{SearchIncludeFragment.facetsDisabled}">
                 <div class="clearfix">
-                    <h:outputText value="Facets temporarily unavailable" styleClass="facetTypeLink"/>
+                    <h:outputText value="#{bundle['dataverse.results.solrFacetsDisabled']}" styleClass="facetTypeLink"/>
                 </div>
             </h:form>
         </div>

From 59bc5ad1eb9f5e2cb05182d4c6320d20bbc28e65 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 25 Oct 2023 15:59:42 -0400
Subject: [PATCH 081/546] a couple of places in the api where we can save solr
 the trouble of generating facets. #9635

---
 src/main/java/edu/harvard/iq/dataverse/api/Search.java    | 4 +++-
 .../edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java | 8 ++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java
index c760534ca7b..71e2865ca4d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java
@@ -157,7 +157,9 @@ public Response search(
                         numResultsPerPage,
                         true, //SEK get query entities always for search API additional Dataset Information 6300  12/6/2019
                         geoPoint,
-                        geoRadius
+                        geoRadius,
+                        showFacets, // facets are expensive, no need to ask for them if not requested
+                        showRelevance // no need for highlights unless requested either
                 );
             } catch (SearchException ex) {
                 Throwable cause = ex;
diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java
index e9898031343..0a64f42d840 100644
--- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java
+++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java
@@ -39,7 +39,6 @@
 import jakarta.ws.rs.Path;
 import jakarta.ws.rs.Produces;
 import jakarta.ws.rs.QueryParam;
-import jakarta.ws.rs.DefaultValue;
 import jakarta.ws.rs.container.ContainerRequestContext;
 import jakarta.ws.rs.core.Context;
 
@@ -226,7 +225,12 @@ private SolrQueryResponse getTotalCountsFromSolr(DataverseRequest dataverseReque
                     //SearchFields.RELEASE_OR_CREATE_DATE, SortBy.DESCENDING,
                     0, //paginationStart,
                     true, // dataRelatedToMe
-                    SearchConstants.NUM_SOLR_DOCS_TO_RETRIEVE //10 // SearchFields.NUM_SOLR_DOCS_TO_RETRIEVE
+                    SearchConstants.NUM_SOLR_DOCS_TO_RETRIEVE, //10 // SearchFields.NUM_SOLR_DOCS_TO_RETRIEVE
+                    true, 
+                    null,
+                    null,
+                    false, // no need to request facets here ...
+                    false  // ... same for highlights
             );
         } catch (SearchException ex) {
             logger.severe("Search for total counts failed with filter query");

From 25c180a81fdf9a84d1a13e67889f191cf85fa988 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 26 Oct 2023 10:55:25 -0400
Subject: [PATCH 082/546] guide entry, etc. #9635

---
 doc/release-notes/9635-solr-improvements.md   |  4 ++
 .../source/installation/prerequisites.rst     | 19 +++++++++
 .../search/SearchIncludeFragment.java         | 42 ++++++-------------
 .../dataverse/search/SearchServiceBean.java   | 15 +++----
 4 files changed, 40 insertions(+), 40 deletions(-)
 create mode 100644 doc/release-notes/9635-solr-improvements.md

diff --git a/doc/release-notes/9635-solr-improvements.md b/doc/release-notes/9635-solr-improvements.md
new file mode 100644
index 00000000000..ad55ee3afe6
--- /dev/null
+++ b/doc/release-notes/9635-solr-improvements.md
@@ -0,0 +1,4 @@
+- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
+
+Please see the "Installing Solr" section of the Installation Prerequisites guide.
+
diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst
index 1847f1b8f63..a14a5cd19ab 100644
--- a/doc/sphinx-guides/source/installation/prerequisites.rst
+++ b/doc/sphinx-guides/source/installation/prerequisites.rst
@@ -211,6 +211,25 @@ Finally, you need to tell Solr to create the core "collection1" on startup::
 
         echo "name=collection1" > /usr/local/solr/solr-9.3.0/server/solr/collection1/core.properties
 
+Dataverse collection ("dataverse") page uses Solr very heavily. On a busy instance this may cause the search engine to become the performance bottleneck, making these pages take increasingly longer to load, potentially affecting the overall performance of the application and/or causing Solr itself to crash. If this is observed on your instance, we recommend uncommenting the following lines in the ``<circuitBreaker ...>`` section of the ``solrconfig.xml`` file::
+
+  <str name="memEnabled">true</str>
+  <str name="memThreshold">75</str>
+
+and::
+
+  <str name="cpuEnabled">true</str>
+  <str name="cpuThreshold">75</str>
+
+This will activate Solr "circuit breaker" mechanisms that make it start dropping incoming requests with the HTTP code 503 when it starts experiencing load issues. As of Dataverse 6.1, the collection page will recognize this condition and display a customizeable message to the users informing them that the search engine is unavailable because of heavy load, with the assumption that the condition is transitive and suggesting that they try again later. This is still an inconvenience to the users, but still a more graceful handling of the problem, rather than letting the pages time out or causing crashes. You may need to experiment and adjust the threshold values defined in the lines above. 
+
+If this becomes a common issue, another temporary workaround an admin may choose to use is to enable the following setting::
+
+  curl -X PUT -d true "http://localhost:8080/api/admin/settings/:DisableSolrFacets"
+
+This will make the collection show the search results without the usual search facets on the left side of the page. Another customizeable message will be shown in that column informing the users that facets are temporarily unavailable. Generating these facets is more resource-intensive for Solr than the main search results themselves, so applying this measure will significantly reduce the load on the search engine. 
+
+
 Solr Init Script
 ================
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 6ba5817bcdd..400f10cc375 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -318,9 +318,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
         selectedTypesList = new ArrayList<>();
         String[] parts = selectedTypesString.split(":");
         selectedTypesList.addAll(Arrays.asList(parts));
-        
-        logger.info("selected types list size: "+selectedTypesList.size());
-        
+                
         String[] arr = selectedTypesList.toArray(new String[selectedTypesList.size()]);
         selectedTypesHumanReadable = combine(arr, " OR ");
         if (!selectedTypesHumanReadable.isEmpty()) {
@@ -382,9 +380,9 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
             // extra numbers. -- L.A. 10/16/2023
             
             // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560
-            previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), -1L);
-            previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), -1L);
-            previewCountbyType.put(BundleUtil.getStringFromBundle("files"), -1L);
+            previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), 0L);
+            previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), 0L);
+            previewCountbyType.put(BundleUtil.getStringFromBundle("files"), 0L);
             
             
             // This will populate the type facet counts for the types that are 
@@ -397,34 +395,18 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
             
             if (selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable() && !isFacetsDisabled()) {
                 // If some types are NOT currently selected, we will need to 
-                // run another query to obtain the numbers of the unselected types:
+                // run a second search to obtain the numbers of the unselected types:
                 
                 List<String> filterQueriesFinalSecondPass = new ArrayList<>();
                 filterQueriesFinalSecondPass.addAll(filterQueries);
-                
-                List<String> selectedTypesListSecondPass = new ArrayList<>();
-                
-                // @todo: simplify this!
-                for (String dvObjectTypeLabel : previewCountbyType.keySet()) {
-                    if (previewCountbyType.get(dvObjectTypeLabel) == -1) {
-                        String dvObjectType = null;
-                        
-                        if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("dataverses"))) {
-                            dvObjectType = "dataverses";
-                        } else if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("datasets"))) {
-                            dvObjectType = "datasets";
-                        } else if (dvObjectTypeLabel.equals(BundleUtil.getStringFromBundle("files"))) {
-                            dvObjectType = "files";
-                        }
-                    
-                        if (dvObjectType != null) {
-                            logger.info("adding object type to the second pass query: "+dvObjectType);
-                            selectedTypesListSecondPass.add(dvObjectType);
-                        }
+                   
+                arr = new String[3 - selectedTypesList.size()];
+                int c = 0; 
+                for (String dvObjectType : Arrays.asList("dataverses", "datasets", "files")) {
+                    if (!selectedTypesList.contains(dvObjectType)) {
+                        arr[c++] = dvObjectType;
                     }
                 }
-                
-                arr = selectedTypesListSecondPass.toArray(new String[selectedTypesListSecondPass.size()]);
                 filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")");
 
                 solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false);
@@ -432,7 +414,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 if (solrQueryResponseSecondPass != null) {
 
                     if (solrQueryResponseSecondPass.hasError()) {
-                        logger.info(solrQueryResponseSecondPass.getError());
+                        logger.fine(solrQueryResponseSecondPass.getError());
                         setSolrErrorEncountered(true);
                     }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
index 6e410488794..51bf3bee30b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java
@@ -376,7 +376,7 @@ public SolrQueryResponse search(
 //        solrQuery.addNumericRangeFacet(SearchFields.PRODUCTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan);
 //        solrQuery.addNumericRangeFacet(SearchFields.DISTRIBUTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan);
         solrQuery.setRows(numResultsPerPage);
-        logger.info("Solr query:" + solrQuery);
+        logger.fine("Solr query:" + solrQuery);
 
         // -----------------------------------
         // Make the solr query
@@ -389,12 +389,7 @@ public SolrQueryResponse search(
         } catch (RemoteSolrException ex) {
             String messageFromSolr = ex.getLocalizedMessage();
             
-            logger.info("message from tye solr exception: "+messageFromSolr);
-            logger.info("code from the solr exception: "+ex.code());
-            
-            if (queryResponse != null) {
-                logger.info("return code: "+queryResponse.getStatus());
-            }
+            logger.fine("message from the solr exception: "+messageFromSolr+"; code: "+ex.code());
             
             SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery);
 
@@ -444,9 +439,9 @@ public SolrQueryResponse search(
         
         int statusCode = queryResponse.getStatus();
         
-        logger.info("status code of the query response: "+statusCode);
-        logger.info("_size from query response: "+queryResponse._size());
-        logger.info("qtime: "+queryResponse.getQTime());
+        logger.fine("status code of the query response: "+statusCode);
+        logger.fine("_size from query response: "+queryResponse._size());
+        logger.fine("qtime: "+queryResponse.getQTime());
 
         SolrDocumentList docs = queryResponse.getResults();
         List<SolrSearchResult> solrSearchResults = new ArrayList<>();

From abd07549e301bd3cf681b89ebab040e4a348fa91 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 26 Oct 2023 11:31:23 -0400
Subject: [PATCH 083/546] documented the "disable facets" setting in the config
 section of the guide as well. #9635

---
 doc/sphinx-guides/source/installation/config.rst | 16 +++++++++++++++-
 .../source/installation/prerequisites.rst        |  2 +-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index ce8876b012c..edc241664a0 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -2868,7 +2868,6 @@ To enable setting file-level PIDs per collection::
 
 When :AllowEnablingFilePIDsPerCollection is true, setting File PIDs to be enabled/disabled for a given collection can be done via the Native API - see :ref:`collection-attributes-api` in the Native API Guide.
 
-
 .. _:IndependentHandleService:
 
 :IndependentHandleService
@@ -3103,6 +3102,21 @@ If ``:SolrFullTextIndexing`` is set to true, the content of files of any size wi
 
 ``curl -X PUT -d 314572800 http://localhost:8080/api/admin/settings/:SolrMaxFileSizeForFullTextIndexing``
 
+
+.. _:DisableSolrFacets:
+
+:DisableSolrFacets
+++++++++++++++++++
+
+Setting this to ``true`` will make the collection ("dataverse") page start showing search results without the usual search facets on the left side of the page. A message will be shown in that column informing the users that facets are temporarily unavailable. Generating the facets is more resource-intensive for Solr than the main search results themselves, so applying this measure will significantly reduce the load on the search engine when its performance becomes an issue.
+
+This setting can be used in combination with the "circuit breaker" mechanism on the Solr side (see the "Installing Solr" section of the Installation Prerequisites guide). An admin can choose to enable it, or even create an automated system for enabling it in response to Solr beginning to drop incoming requests with the HTTP code 503.
+
+To enable the setting::
+
+  curl -X PUT -d true "http://localhost:8080/api/admin/settings/:DisableSolrFacets"
+
+
 .. _:SignUpUrl:
 
 :SignUpUrl
diff --git a/doc/sphinx-guides/source/installation/prerequisites.rst b/doc/sphinx-guides/source/installation/prerequisites.rst
index a14a5cd19ab..7dbdfd30e02 100644
--- a/doc/sphinx-guides/source/installation/prerequisites.rst
+++ b/doc/sphinx-guides/source/installation/prerequisites.rst
@@ -227,7 +227,7 @@ If this becomes a common issue, another temporary workaround an admin may choose
 
   curl -X PUT -d true "http://localhost:8080/api/admin/settings/:DisableSolrFacets"
 
-This will make the collection show the search results without the usual search facets on the left side of the page. Another customizeable message will be shown in that column informing the users that facets are temporarily unavailable. Generating these facets is more resource-intensive for Solr than the main search results themselves, so applying this measure will significantly reduce the load on the search engine. 
+This will make the collection page show the search results without the usual search facets on the left side of the page. Another customizeable message will be shown in that column informing the users that facets are temporarily unavailable. Generating these facets is more resource-intensive for Solr than the main search results themselves, so applying this measure will significantly reduce the load on the search engine. 
 
 
 Solr Init Script

From a5bc9a1933cc25f3394f57ba93e7f6330d48023a Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Fri, 27 Oct 2023 23:50:26 +0200
Subject: [PATCH 084/546] Use JsonUtil.getJsonObject in AbstractApiBean

This fixes #10054.
Like before, JsonException may still be thrown.
Since this is a RuntimeException, I only mention it in the Javadoc.
---
 .../iq/dataverse/api/AbstractApiBean.java     | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
index 0a0861fa1c9..027f9e0fcb1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
@@ -47,9 +47,9 @@
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.json.JsonParser;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
 import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
 import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean;
-import java.io.StringReader;
 import java.net.URI;
 import java.util.Arrays;
 import java.util.Collections;
@@ -62,9 +62,9 @@
 import jakarta.json.Json;
 import jakarta.json.JsonArray;
 import jakarta.json.JsonArrayBuilder;
+import jakarta.json.JsonException;
 import jakarta.json.JsonObject;
 import jakarta.json.JsonObjectBuilder;
-import jakarta.json.JsonReader;
 import jakarta.json.JsonValue;
 import jakarta.json.JsonValue.ValueType;
 import jakarta.persistence.EntityManager;
@@ -132,23 +132,21 @@ public Response refineResponse( String message ) {
          * In the common case of the wrapped response being of type JSON,
          * return the message field it has (if any).
          * @return the content of a message field, or {@code null}.
+         * @throws JsonException when JSON parsing fails.
          */
         String getWrappedMessageWhenJson() {
             if ( response.getMediaType().equals(MediaType.APPLICATION_JSON_TYPE) ) {
                 Object entity = response.getEntity();
                 if ( entity == null ) return null;
 
-                String json = entity.toString();
-                try ( StringReader rdr = new StringReader(json) ){
-                    JsonReader jrdr = Json.createReader(rdr);
-                    JsonObject obj = jrdr.readObject();
-                    if ( obj.containsKey("message") ) {
-                        JsonValue message = obj.get("message");
-                        return message.getValueType() == ValueType.STRING ? obj.getString("message") : message.toString();
-                    } else {
-                        return null;
-                    }
+                JsonObject obj = JsonUtil.getJsonObject(entity.toString());
+                if ( obj.containsKey("message") ) {
+                    JsonValue message = obj.get("message");
+                    return message.getValueType() == ValueType.STRING ? obj.getString("message") : message.toString();
+                } else {
+                    return null;
                 }
+
             } else {
                 return null;
             }

From 0fabe3ead61aa47bddb385f92d6b69474dd82668 Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Sat, 28 Oct 2023 00:23:59 +0200
Subject: [PATCH 085/546] Remove Gson from JsonUtil

---
 .../iq/dataverse/util/json/JsonUtil.java      | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
index cf8b64520de..2c780fa9417 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
@@ -1,9 +1,9 @@
 package edu.harvard.iq.dataverse.util.json;
 
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-import com.google.gson.JsonObject;
-
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
 import java.io.StringReader;
 import java.io.StringWriter;
 import java.util.HashMap;
@@ -11,6 +11,7 @@
 import java.util.logging.Logger;
 import jakarta.json.Json;
 import jakarta.json.JsonArray;
+import jakarta.json.JsonObject;
 import jakarta.json.JsonReader;
 import jakarta.json.JsonWriter;
 import jakarta.json.JsonWriterFactory;
@@ -26,11 +27,8 @@ public class JsonUtil {
      */
     public static String prettyPrint(String jsonString) {
         try {
-            com.google.gson.JsonParser jsonParser = new com.google.gson.JsonParser();
-            JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject();
-            Gson gson = new GsonBuilder().setPrettyPrinting().create();
-            String prettyJson = gson.toJson(jsonObject);
-            return prettyJson;
+            JsonObject jsonObject = getJsonObject(jsonString);
+            return prettyPrint(jsonObject);
         } catch (Exception ex) {
             logger.info("Returning original string due to exception: " + ex);
             return jsonString;
@@ -48,7 +46,7 @@ public static String prettyPrint(JsonArray jsonArray) {
         return stringWriter.toString();
     }
 
-    public static String prettyPrint(jakarta.json.JsonObject jsonObject) {
+    public static String prettyPrint(JsonObject jsonObject) {
         Map<String, Boolean> config = new HashMap<>();
         config.put(JsonGenerator.PRETTY_PRINTING, true);
         JsonWriterFactory jsonWriterFactory = Json.createWriterFactory(config);
@@ -59,7 +57,7 @@ public static String prettyPrint(jakarta.json.JsonObject jsonObject) {
         return stringWriter.toString();
     }
     
-    public static jakarta.json.JsonObject getJsonObject(String serializedJson) {
+    public static JsonObject getJsonObject(String serializedJson) {
         try (StringReader rdr = new StringReader(serializedJson)) {
             try (JsonReader jsonReader = Json.createReader(rdr)) {
                 return jsonReader.readObject();
@@ -67,7 +65,7 @@ public static jakarta.json.JsonObject getJsonObject(String serializedJson) {
         }
     }
     
-    public static jakarta.json.JsonArray getJsonArray(String serializedJson) {
+    public static JsonArray getJsonArray(String serializedJson) {
         try (StringReader rdr = new StringReader(serializedJson)) {
             try (JsonReader jsonReader = Json.createReader(rdr)) {
                 return jsonReader.readArray();

From 37725d99a0612c20a6af2210107e148c6b28398e Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Sat, 28 Oct 2023 00:25:56 +0200
Subject: [PATCH 086/546] Get JSON objects from files and inputstreams

---
 .../iq/dataverse/util/json/JsonUtil.java      | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
index 2c780fa9417..371eb7e543e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
@@ -65,6 +65,31 @@ public static JsonObject getJsonObject(String serializedJson) {
         }
     }
     
+    /**
+     * Return the contents of the {@link InputStream} as a JSON object.
+     * @param stream the input stream to read from
+     * @throws JsonException when parsing fails.
+     */
+    public static JsonObject getJsonObject(InputStream stream) {
+        try (JsonReader jsonReader = Json.createReader(stream)) {
+            return jsonReader.readObject();
+        }
+    }
+    
+    /**
+     * Return the contents of the file as a JSON object.
+     * @param fileName the name of the file to read from
+     * @throws FileNotFoundException when the file cannot be opened for reading
+     * @throws JsonException when parsing fails.
+     */
+    public static JsonObject getJsonObjectFromFile(String fileName) throws IOException {
+        try (FileReader rdr = new FileReader(fileName)) {
+            try (JsonReader jsonReader = Json.createReader(rdr)) {
+                return jsonReader.readObject();
+            }
+        }
+    }
+    
     public static JsonArray getJsonArray(String serializedJson) {
         try (StringReader rdr = new StringReader(serializedJson)) {
             try (JsonReader jsonReader = Json.createReader(rdr)) {

From 82b5edf72d249d1dcd48209dcf19c0fad6bb375d Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Sat, 28 Oct 2023 00:36:17 +0200
Subject: [PATCH 087/546] Remove superfluous exception declaration

---
 .../java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
index 6b48dbf8415..5f30de8e932 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
@@ -12,7 +12,6 @@
 import java.io.FileReader;
 import java.io.IOException;
 import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Iterator;
 import java.util.List;
@@ -135,7 +134,7 @@ public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @Q
 
     @POST
     @Path("{id}/updateCitationsForDataset")
-    public Response updateCitationsForDataset(@PathParam("id") String id) throws MalformedURLException, IOException {
+    public Response updateCitationsForDataset(@PathParam("id") String id) throws IOException {
         try {
             Dataset dataset = findDatasetOrDie(id);
             String persistentId = dataset.getGlobalId().toString();

From 69aa6735712532b87833a471ecb52f2f3ff33144 Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Sat, 28 Oct 2023 00:36:57 +0200
Subject: [PATCH 088/546] Use logger to log

---
 .../java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
index 5f30de8e932..e65d8da4b76 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
@@ -101,7 +101,7 @@ public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @Quer
             }
 
         } catch (IOException ex) {
-            System.out.print(ex.getMessage());
+            logger.log(Level.WARNING, ex.getMessage());
             return error(Status.BAD_REQUEST, "IOException: " + ex.getLocalizedMessage());
         }
         String msg = "Dummy Data has been added to dataset " + id;
@@ -125,7 +125,7 @@ public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @Q
             }
 
         } catch (IOException ex) {
-            System.out.print(ex.getMessage());
+            logger.log(Level.WARNING, ex.getMessage());
             return error(Status.BAD_REQUEST, "IOException: " + ex.getLocalizedMessage());
         }
         String msg = "Usage Metrics Data has been added to all datasets from file  " + reportOnDisk;

From 8cca38944664a4c8bd3b868f2dc1f36ab2841018 Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Sat, 28 Oct 2023 00:37:44 +0200
Subject: [PATCH 089/546] Delegate JSON parsing from input streams and files

See #10056
---
 .../iq/dataverse/api/MakeDataCountApi.java    | 35 ++++++++-----------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
index e65d8da4b76..25f3696fe6b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
@@ -8,8 +8,8 @@
 import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
 
-import java.io.FileReader;
 import java.io.IOException;
 import java.net.HttpURLConnection;
 import java.net.URL;
@@ -82,23 +82,18 @@ public Response sendDataToHub() {
     @Path("{id}/addUsageMetricsFromSushiReport")
     public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) {
 
-        JsonObject report;
-
-        try (FileReader reader = new FileReader(reportOnDisk)) {
-            report = Json.createReader(reader).readObject();
-            Dataset dataset;
-            try {
-                dataset = findDatasetOrDie(id);
-                List<DatasetMetrics> datasetMetrics = datasetMetricsService.parseSushiReport(report, dataset);
-                if (!datasetMetrics.isEmpty()) {
-                    for (DatasetMetrics dm : datasetMetrics) {
-                        datasetMetricsService.save(dm);
-                    }
+        try {
+            JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk);
+            Dataset dataset = findDatasetOrDie(id);
+            List<DatasetMetrics> datasetMetrics = datasetMetricsService.parseSushiReport(report, dataset);
+            if (!datasetMetrics.isEmpty()) {
+                for (DatasetMetrics dm : datasetMetrics) {
+                    datasetMetricsService.save(dm);
                 }
-            } catch (WrappedResponse ex) {
-                Logger.getLogger(MakeDataCountApi.class.getName()).log(Level.SEVERE, null, ex);
-                return error(Status.BAD_REQUEST, "Wrapped response: " + ex.getLocalizedMessage());
             }
+        } catch (WrappedResponse ex) {
+            logger.log(Level.SEVERE, null, ex);
+            return error(Status.BAD_REQUEST, "Wrapped response: " + ex.getLocalizedMessage());
 
         } catch (IOException ex) {
             logger.log(Level.WARNING, ex.getMessage());
@@ -112,10 +107,8 @@ public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @Quer
     @Path("/addUsageMetricsFromSushiReport")
     public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) {
 
-        JsonObject report;
-
-        try (FileReader reader = new FileReader(reportOnDisk)) {
-            report = Json.createReader(reader).readObject();
+        try {
+            JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk);
 
             List<DatasetMetrics> datasetMetrics = datasetMetricsService.parseSushiReport(report, null);
             if (!datasetMetrics.isEmpty()) {
@@ -157,7 +150,7 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE
                     logger.warning("Failed to get citations from " + url.toString());
                     return error(Status.fromStatusCode(status), "Failed to get citations from " + url.toString());
                 }
-                JsonObject report = Json.createReader(connection.getInputStream()).readObject();
+                JsonObject report = JsonUtil.getJsonObject(connection.getInputStream());
                 JsonObject links = report.getJsonObject("links");
                 JsonArray data = report.getJsonArray("data");
                 Iterator<JsonValue> iter = data.iterator();

From 1d1163b1be2fd35e66ec64cf5b91c2ee6d5d7bee Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Sat, 28 Oct 2023 00:41:41 +0200
Subject: [PATCH 090/546] Get JSON object from stream using JsonUtil

---
 .../engine/command/impl/GetProvJsonCommand.java          | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java
index 2de2adff099..b068c0126dd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java
@@ -9,12 +9,12 @@
 import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.logging.Logger;
-import jakarta.json.Json;
 import jakarta.json.JsonObject;
-import jakarta.json.JsonReader;
 
 @RequiredPermissions(Permission.EditDataset)
 public class GetProvJsonCommand extends AbstractCommand<JsonObject> {
@@ -37,9 +37,8 @@ public JsonObject execute(CommandContext ctxt) throws CommandException {
             StorageIO<DataFile> dataAccess = dataFile.getStorageIO();
             InputStream inputStream = dataAccess.getAuxFileAsInputStream(provJsonExtension);
             JsonObject jsonObject = null;
-            if(null != inputStream) {
-                JsonReader jsonReader = Json.createReader(inputStream);
-                jsonObject = jsonReader.readObject();
+            if (null != inputStream) {
+                jsonObject = JsonUtil.getJsonObject(inputStream);
             }
             return jsonObject;
         } catch (IOException ex) {

From ff79e205fcb681e1a6487dd3136bb415a0992487 Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Sat, 28 Oct 2023 01:23:48 +0200
Subject: [PATCH 091/546] Expect 4 space indentation

This may become flaky if the indentation is dependent on implementation.
GSON apparently keeps empty objects in one line
and uses two spaces for indentation,
whereas I see slightly different outputs.
---
 .../harvard/iq/dataverse/util/json/JsonUtilTest.java   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java
index 725862db7ba..3e4f9a690d2 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java
@@ -3,17 +3,17 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import org.junit.jupiter.api.Test;
 
-public class JsonUtilTest {
+class JsonUtilTest {
 
     @Test
-    public void testPrettyPrint() {
-        JsonUtil jsonUtil = new JsonUtil();
+    void testPrettyPrint() {
         String nullString = null;
         assertEquals(null, JsonUtil.prettyPrint(nullString));
         assertEquals("", JsonUtil.prettyPrint(""));
         assertEquals("junk", JsonUtil.prettyPrint("junk"));
-        assertEquals("{}", JsonUtil.prettyPrint("{}"));
-        assertEquals("{\n" + "  \"foo\": \"bar\"\n" + "}", JsonUtil.prettyPrint("{\"foo\": \"bar\"}"));
+        assertEquals("{\n}", JsonUtil.prettyPrint("{}"));
+        assertEquals("[\n    \"junk\"\n]", JsonUtil.prettyPrint("[\"junk\"]"));
+        assertEquals("{\n" + "    \"foo\": \"bar\"\n" + "}", JsonUtil.prettyPrint("{\"foo\": \"bar\"}"));
     }
 
 }

From 91e572623e4020c72744f1cb4c10f78aaef16518 Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Sat, 28 Oct 2023 01:24:47 +0200
Subject: [PATCH 092/546] Add private constructor to JsonUtil

It only has static methods and should not be instantiated.
---
 src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
index 371eb7e543e..d50cf6f124e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
@@ -21,6 +21,8 @@ public class JsonUtil {
 
     private static final Logger logger = Logger.getLogger(JsonUtil.class.getCanonicalName());
 
+    private JsonUtil() {}
+
     /**
      * Make an attempt at pretty printing a String but will return the original
      * string if it isn't JSON or if there is any exception.

From 785964df44317df84618bf217d479c41fdb6f46a Mon Sep 17 00:00:00 2001
From: bencomp <ben@companjen.name>
Date: Sat, 28 Oct 2023 01:25:25 +0200
Subject: [PATCH 093/546] Pretty-print JSON arrays from strings too

---
 .../java/edu/harvard/iq/dataverse/util/json/JsonUtil.java  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
index d50cf6f124e..8ea0e6f0ace 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
@@ -29,8 +29,11 @@ private JsonUtil() {}
      */
     public static String prettyPrint(String jsonString) {
         try {
-            JsonObject jsonObject = getJsonObject(jsonString);
-            return prettyPrint(jsonObject);
+            if (jsonString.trim().startsWith("{")) {
+                return prettyPrint(getJsonObject(jsonString));
+            } else {
+                return prettyPrint(getJsonArray(jsonString));
+            }
         } catch (Exception ex) {
             logger.info("Returning original string due to exception: " + ex);
             return jsonString;

From 47ea303562bf143d36edd66d035286ab74df6b9d Mon Sep 17 00:00:00 2001
From: Ben Companjen <ben@companjen.name>
Date: Mon, 30 Oct 2023 08:48:05 +0100
Subject: [PATCH 094/546] Create InputStreams in try-with-resources

JsonUtil.getJsonObject closes the Readers, but not the InputStream.
It is the caller's responsibility to close the InputStream properly.
---
 .../harvard/iq/dataverse/api/MakeDataCountApi.java    |  6 +++++-
 .../engine/command/impl/GetProvJsonCommand.java       | 11 ++++++-----
 .../edu/harvard/iq/dataverse/util/json/JsonUtil.java  |  2 ++
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
index 25f3696fe6b..b2696757220 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
@@ -11,6 +11,7 @@
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.HttpURLConnection;
 import java.net.URL;
 import java.util.Iterator;
@@ -150,7 +151,10 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE
                     logger.warning("Failed to get citations from " + url.toString());
                     return error(Status.fromStatusCode(status), "Failed to get citations from " + url.toString());
                 }
-                JsonObject report = JsonUtil.getJsonObject(connection.getInputStream());
+                JsonObject report;
+                try (InputStream inStream = connection.getInputStream()) {
+                    report = JsonUtil.getJsonObject(inStream);
+                }
                 JsonObject links = report.getJsonObject("links");
                 JsonArray data = report.getJsonArray("data");
                 Iterator<JsonValue> iter = data.iterator();
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java
index b068c0126dd..b98cd70a4da 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java
@@ -35,12 +35,13 @@ public JsonObject execute(CommandContext ctxt) throws CommandException {
 
         try {
             StorageIO<DataFile> dataAccess = dataFile.getStorageIO();
-            InputStream inputStream = dataAccess.getAuxFileAsInputStream(provJsonExtension);
-            JsonObject jsonObject = null;
-            if (null != inputStream) {
-                jsonObject = JsonUtil.getJsonObject(inputStream);
+            try (InputStream inputStream = dataAccess.getAuxFileAsInputStream(provJsonExtension)) {
+                JsonObject jsonObject = null;
+                if (null != inputStream) {
+                    jsonObject = JsonUtil.getJsonObject(inputStream);
+                }
+                return jsonObject;
             }
-            return jsonObject;
         } catch (IOException ex) {
             String error = "Exception caught in DataAccess.getStorageIO(dataFile) getting file. Error: " + ex;
             throw new IllegalCommandException(error, this);
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
index 8ea0e6f0ace..c1a20bf4c87 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
@@ -72,6 +72,8 @@ public static JsonObject getJsonObject(String serializedJson) {
     
     /**
      * Return the contents of the {@link InputStream} as a JSON object.
+     *
+     * The caller of this method is responsible for closing the provided stream.
      * @param stream the input stream to read from
      * @throws JsonException when parsing fails.
      */

From 235c0387921a79d1f919b97567666341119c5cae Mon Sep 17 00:00:00 2001
From: Ben Companjen <ben@companjen.name>
Date: Mon, 30 Oct 2023 09:52:27 +0100
Subject: [PATCH 095/546] Complete the Javadoc docs for JsonUtil.getJsonX

---
 .../iq/dataverse/util/json/JsonUtil.java      | 35 ++++++++++++++++---
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
index c1a20bf4c87..72a1cd2e1eb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java
@@ -11,6 +11,7 @@
 import java.util.logging.Logger;
 import jakarta.json.Json;
 import jakarta.json.JsonArray;
+import jakarta.json.JsonException;
 import jakarta.json.JsonObject;
 import jakarta.json.JsonReader;
 import jakarta.json.JsonWriter;
@@ -61,7 +62,17 @@ public static String prettyPrint(JsonObject jsonObject) {
         }
         return stringWriter.toString();
     }
-    
+
+    /**
+     * Return the contents of the string as a JSON object.
+     * This method closes its resources when an exception occurs, but does
+     * not catch any exceptions.
+     * @param serializedJson the JSON object serialized as a {@code String}
+     * @throws JsonException when parsing fails.
+     * @see #getJsonObject(InputStream)
+     * @see #getJsonObjectFromFile(String)
+     * @see #getJsonArray(String)
+     */
     public static JsonObject getJsonObject(String serializedJson) {
         try (StringReader rdr = new StringReader(serializedJson)) {
             try (JsonReader jsonReader = Json.createReader(rdr)) {
@@ -69,25 +80,33 @@ public static JsonObject getJsonObject(String serializedJson) {
             }
         }
     }
-    
+
     /**
      * Return the contents of the {@link InputStream} as a JSON object.
      *
+     * This method closes its resources when an exception occurs, but does
+     * not catch any exceptions.
      * The caller of this method is responsible for closing the provided stream.
      * @param stream the input stream to read from
      * @throws JsonException when parsing fails.
+     * @see #getJsonObject(String)
+     * @see #getJsonObjectFromFile(String)
      */
     public static JsonObject getJsonObject(InputStream stream) {
         try (JsonReader jsonReader = Json.createReader(stream)) {
             return jsonReader.readObject();
         }
     }
-    
+
     /**
      * Return the contents of the file as a JSON object.
+     * This method closes its resources when an exception occurs, but does
+     * not catch any exceptions.
      * @param fileName the name of the file to read from
      * @throws FileNotFoundException when the file cannot be opened for reading
      * @throws JsonException when parsing fails.
+     * @see #getJsonObject(String)
+     * @see #getJsonObject(InputStream)
      */
     public static JsonObject getJsonObjectFromFile(String fileName) throws IOException {
         try (FileReader rdr = new FileReader(fileName)) {
@@ -96,7 +115,15 @@ public static JsonObject getJsonObjectFromFile(String fileName) throws IOExcepti
             }
         }
     }
-    
+
+    /**
+     * Return the contents of the string as a JSON array.
+     * This method closes its resources when an exception occurs, but does
+     * not catch any exceptions.
+     * @param serializedJson the JSON array serialized as a {@code String}
+     * @throws JsonException when parsing fails.
+     * @see #getJsonObject(String)
+     */
     public static JsonArray getJsonArray(String serializedJson) {
         try (StringReader rdr = new StringReader(serializedJson)) {
             try (JsonReader jsonReader = Json.createReader(rdr)) {

From bec806af94fa5e738c2e029f9b928d6ac5b2fc0a Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 30 Oct 2023 10:42:26 -0400
Subject: [PATCH 096/546] (the "disable facets" switch should also cover the
 search facets on the dataset page #9635)

---
 src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index fc18257196d..d79d0acfdb6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -822,6 +822,12 @@ public List<FacetLabel> getFileTagsFacetLabels() {
      * @return boolean
      */
     public boolean isThisVersionSearchable() {
+        // Just like on the collection page, facets on the Dataset page can be
+        // disabled instance-wide by an admin:
+        if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) {
+            return false;
+        }
+        
         SolrQuery solrQuery = new SolrQuery();
 
         solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, workingVersion.getDataset().getId().toString()));

From bf5e28fc472d14914e4f2642fb8d1e4aaa85d33b Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 1 Nov 2023 14:45:25 -0400
Subject: [PATCH 097/546] some refactoring changes (#8549)

---
 .../impl/CreateNewDataFilesCommand.java       | 72 ++++++++++++-------
 1 file changed, 46 insertions(+), 26 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
index 0470f59b861..7a32e398b8f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
@@ -135,7 +135,10 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
             }
         }
         String finalType = null;
-        
+        File newFile = null;    // this File will be used for a single-file, local (non-direct) upload
+        long fileSize = -1; 
+
+
         if (newStorageIdentifier == null) {
             if (getFilesTempDirectory() != null) {
                 try {
@@ -154,7 +157,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
                 // (note that "no size limit set" = "unlimited")
                 // (also note, that if this is a zip file, we'll be checking
                 // the size limit for each of the individual unpacked files)
-                Long fileSize = tempFile.toFile().length();
+                fileSize = tempFile.toFile().length();
                 if (fileSizeLimit != null && fileSize > fileSizeLimit) {
                     try {
                         tempFile.toFile().delete();
@@ -213,11 +216,11 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
                 }
 
                 DataFile datafile = null;
-                long fileSize = 0L; 
+                long uncompressedFileSize = -1; 
                 try {
                     uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile()));
                     File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit, storageQuotaLimit);
-                    fileSize = unZippedTempFile.length();
+                    uncompressedFileSize = unZippedTempFile.length();
                     datafile = FileUtil.createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, ctxt.systemConfig().getFileFixityChecksumAlgorithm());
                 } catch (IOException | FileExceedsMaxSizeException | FileExceedsStorageQuotaException ioex) {
                     // it looks like we simply skip the file silently, if its uncompressed size
@@ -248,7 +251,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
                     datafiles.add(datafile);
                     // Update quota if present
                     if (quota != null) {
-                        quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize);
+                        quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + uncompressedFileSize);
                     }
                     return CreateDataFileResult.success(fileName, finalType, datafiles);
                 }
@@ -628,7 +631,35 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
                     throw new CommandExecutionException("Failed to process uploaded BagIt file", ioex, this);
                 }
             }
+            
+            // These are the final File and its size that will be used to 
+            // add create a single Datafile: 
+            
+            newFile = tempFile.toFile();
+            fileSize = newFile.length();
+            
         } else {
+            // Direct upload.
+            
+            // Since this is a direct upload, and therefore no temp file associated 
+            // with it, we may, OR MAY NOT know the size of the file. If this is 
+            // a direct upload via the UI, the page must have already looked up 
+            // the size, after the client confirmed that the upload had completed. 
+            // (so that we can reject the upload here, i.e. before the user clicks
+            // save, if it's over the size limit or storage quota). However, if 
+            // this is a direct upload via the API, we will wait until the 
+            // upload is finalized in the saveAndAddFiles method to enforce the 
+            // limits. 
+            if (newFileSize != null) {
+                fileSize = newFileSize;
+                
+                // if the size is specified, and it's above the individual size 
+                // limit for this store, we can reject it now:
+                if (fileSizeLimit != null && fileSize > fileSizeLimit) {
+                    throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit)), this);
+                }
+            }
+            
             // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied
             finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType;
             String type = determineFileTypeByNameAndExtension(fileName);
@@ -639,34 +670,19 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
                 }
                 logger.fine("Supplied type: " + suppliedContentType + ", finalType: " + finalType);
             }
+            
+            
         }
+        
         // Finally, if none of the special cases above were applicable (or 
         // if we were unable to unpack an uploaded file, etc.), we'll just 
         // create and return a single DataFile:
-        File newFile = null;
-        long fileSize = -1; 
         
-        if (tempFile != null) {
-            newFile = tempFile.toFile();
-            fileSize = newFile.length();
-        } else {
-            // If this is a direct upload, and therefore no temp file associated 
-            // with it, the file size must be explicitly passed to the command 
-            // (note that direct upload relies on knowing the size of the file 
-            // that's being uploaded in advance).
-            if (newFileSize != null) {
-                fileSize = newFileSize;
-            } else {
-                // This is a direct upload via the API (DVUploader, etc.) 
-                //throw new CommandExecutionException("File size must be explicitly specified when creating DataFiles with Direct Upload", this);
-            }
-        }
         
         // We have already checked that this file does not exceed the individual size limit; 
         // but if we are processing it as is, as a single file, we need to check if 
         // its size does not go beyond the allocated storage quota (if specified):
         
-        
         if (storageQuotaLimit != null && fileSize > storageQuotaLimit) {
             if (newFile != null) {
                 // Remove the temp. file, if this is a non-direct upload. 
@@ -696,9 +712,13 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
             }
             datafiles.add(datafile);
 
-            // Update quota (may not be necessary in the context of direct upload - ?)
-            if (fileSize > 0 && quota != null) {
-                quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize);
+            // Update quota (not necessary in the context of direct upload, will be done later)
+            // On a second thought, @todo: we should delay updating the storage size/quotas
+            // until the file is saved and finalized, for all upload cases!
+            if (newFile != null) {
+                if (fileSize > 0 && quota != null) {
+                    quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize);
+                }
             }
             return CreateDataFileResult.success(fileName, finalType, datafiles);
         }

From 61abac1352f88a501bb0274d9689b0b3e5c8c017 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 1 Nov 2023 16:42:15 -0400
Subject: [PATCH 098/546] #9464 create json

---
 .../harvard/iq/dataverse/DataversePage.java   |   2 +
 .../iq/dataverse/DataverseServiceBean.java    | 211 ++++++++++++++++++
 2 files changed, 213 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
index 943a74327d5..12b7e41b3d8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
@@ -597,6 +597,8 @@ public void updateOptionsRadio(Long mdbId, Long dsftId) {
 
 
     public String save() {
+                 String test = dataverseService.getCollectionDatasetSchema(dataverse.getId());
+                 System.out.print(test);
         List<DataverseFieldTypeInputLevel> listDFTIL = new ArrayList<>();
         if (editMode != null && ( editMode.equals(EditMode.INFO) || editMode.equals(EditMode.CREATE))) {
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 549b8310122..070fc20a5da 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -42,6 +42,7 @@
 import jakarta.persistence.NonUniqueResultException;
 import jakarta.persistence.PersistenceContext;
 import jakarta.persistence.TypedQuery;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.solr.client.solrj.SolrServerException;
 
 /**
@@ -80,6 +81,9 @@ public class DataverseServiceBean implements java.io.Serializable {
     @EJB
     PermissionServiceBean permissionService;
     
+    @EJB
+    DataverseFieldTypeInputLevelServiceBean dataverseFieldTypeInputLevelService;
+    
     @EJB
     SystemConfig systemConfig;
 
@@ -919,5 +923,212 @@ public List<Object[]> getDatasetTitlesWithinDataverse(Long dataverseId) {
         return em.createNativeQuery(cqString).getResultList();
     }
 
+        
+    public  String getCollectionDatasetSchema(Long dataverseId) {
+       
+        List<MetadataBlock> selectedBlocks = new ArrayList<>();
+        List<DatasetFieldType> requiredDSFT = new ArrayList<>();
+
+        Dataverse testDV = this.find(dataverseId);
+
+        while (!testDV.isMetadataBlockRoot()) {
+            if (testDV.getOwner() == null) {
+                break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value
+            }
+            testDV = testDV.getOwner();
+        }
+        
+        selectedBlocks.addAll(testDV.getMetadataBlocks());
+
+        for (MetadataBlock mdb : selectedBlocks) {
+            for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) {
+                if (!dsft.isChild()) {
+                    DataverseFieldTypeInputLevel dsfIl = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), dsft.getId());
+                    if (dsfIl != null) {
+                        dsft.setRequiredDV(dsfIl.isRequired());
+                        dsft.setInclude(dsfIl.isInclude());
+                    } else {
+                        dsft.setRequiredDV(dsft.isRequired());
+                        dsft.setInclude(true);
+                    }
+                    if (dsft.isHasChildren()) {
+                        for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) {
+                            DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId());
+                            if (dsfIlChild != null) {
+                                child.setRequiredDV(dsfIlChild.isRequired());
+                                child.setInclude(dsfIlChild.isInclude());
+                            } else {
+                                // in the case of conditionally required (child = true, parent = false)
+                                // we set this to false; i.e this is the default "don't override" value
+                                child.setRequiredDV(child.isRequired() && dsft.isRequired());
+                                child.setInclude(true);
+                            }
+                        }
+                    }
+                    if(dsft.isRequiredDV()){
+                        requiredDSFT.add(dsft);
+                    }
+                }
+            }            
+
+        }
+        
+        String reqMDBNames = "";
+        List<MetadataBlock> hasReqFields = new ArrayList<>();
+        String retval = datasetSchemaPreface;
+        for (MetadataBlock mdb : selectedBlocks) {
+            for (DatasetFieldType dsft : requiredDSFT) {
+                if (dsft.getMetadataBlock().equals(mdb)) {
+                    hasReqFields.add(mdb);
+                    if (!reqMDBNames.isEmpty()) reqMDBNames += ",";
+                    reqMDBNames += "\"" + mdb.getName() + "\"";
+                    break;
+                }
+            }
+        }
+        
+        for (MetadataBlock mdb : hasReqFields) {
+            retval += getCustomMDBSchema(mdb, requiredDSFT);
+        }
+        
+        retval += "\n}\n";
+        
+        retval += endOfjson.replace("blockNames", reqMDBNames);
+
+        return retval;
+    
+    } 
+    
+    private  String datasetSchemaPreface = 
+    "{\n" +
+    "    \"$schema\": \"http://json-schema.org/draft-04/schema#\",\n" +
+    "    \"$defs\": {\n" +
+    "    \"field\": {\n" + 
+    "        \"type\": \"object\",\n" +
+    "        \"required\": [\"typeClass\", \"multiple\", \"typeName\"],\n" +
+    "        \"properties\": {\n" + 
+    "            \"value\": {\n" +
+    "                \"anyOf\": [\n" +
+    "                    {\n" +
+    "                        \"type\": \"array\"\n" +
+    "                    },\n" +
+    "                    {\n" + 
+    "                        \"type\": \"string\"\n" +
+    "                    },\n" +
+    "                    {\n" +
+    "                        \"$ref\": \"#/$defs/field\"\n" +
+    "                    }\n" + 
+    "                ]\n" + 
+    "            },\n" + 
+    "            \"typeClass\": {\n" +
+    "                \"type\": \"string\"\n" +
+    "            }\n," +
+    "            \"multiple\": {\n" +
+    "                \"type\": \"boolean\"\n" +
+    "            },\n" +
+    "            \"typeName\": {\n" + 
+    "                \"type\": \"string\"\n" +
+    "            }\n" +
+    "        }\n" +
+    "    }\n" + 
+    "},\n" + 
+    "\"type\": \"object\",\n" +
+    "\"properties\": {\n" + 
+    "    \"datasetVersion\": {\n" + 
+    "        \"type\": \"object\",\n" +
+    "        \"properties\": {\n" + 
+    "           \"license\": {\n" + 
+    "                \"type\": \"object\",\n" + 
+    "                \"properties\": {\n" + 
+    "                    \"name\": {\n" +
+    "                        \"type\": \"string\"\n" + 
+    "                    },\n" + 
+    "                    \"uri\": {\n" + 
+    "                        \"type\": \"string\",\n" + 
+    "                        \"format\": \"uri\"\n" + 
+    "                   }\n" + 
+    "                },\n" + 
+    "                \"required\": [\"name\", \"uri\"]\n" + 
+    "            },\n" + 
+    "            \"metadataBlocks\": {\n" + 
+    "                \"type\": \"object\",\n" + 
+    "               \"properties\": {\n" +
+    ""  ;  
+
+
+    
+    private String reqValTemplate = "                                        {\n" +
+"                                            \"contains\": {\n" +
+"                                                \"properties\": {\n" +
+"                                                    \"typeName\": {\n" +
+"                                                        \"const\": \"reqFieldTypeName\"\n" +
+"                                                    }\n" +
+"                                                }\n" +
+"                                            }\n" +
+"                                        },";
+    
+    private String minItemsTemplate = "\n                                    \"minItems\": numMinItems,\n" +
+"                                    \"allOf\": [\n";
+    private String endOfReqVal = "                                    ]\n" +
+"                                }\n" +
+"                            },\n" +
+"                            \"required\": [\"fields\"]\n" +
+"                        },";
+    
+    private String endOfjson = ",\n" +
+"                    \"required\": [blockNames]\n" +
+"                }\n" +
+"            },\n" +
+"            \"required\": [\"license\", \"metadataBlocks\"]\n" +
+"        }\n" +
+"    },\n" +
+"    \"required\": [\"datasetVersion\"]\n" +
+"}\n";
+    
+    private String startOfMDB = "\"blockName\": {\n" +
+"                            \"type\": \"object\",\n" +
+"                            \"properties\": {\n" +
+"                                \"fields\": {\n" +
+"                                    \"type\": \"array\",\n" +
+"                                    \"items\": {\n" +
+"                                        \"$ref\": \"#/$defs/field\"\n" +
+"                                    },";
+    
     
+    private String getCustomMDBSchema (MetadataBlock mdb, List<DatasetFieldType> requiredDSFT){
+        String retval = "";
+        boolean mdbHasReqField = false;
+        int numReq = 0;
+        List<DatasetFieldType> requiredThisMDB = new ArrayList<>();
+        
+        for (DatasetFieldType dsft : requiredDSFT ){
+
+            if(dsft.getMetadataBlock().equals(mdb)){
+                numReq++;
+                mdbHasReqField = true;
+                requiredThisMDB.add(dsft);
+            }
+        }
+        if (mdbHasReqField){
+        retval  += startOfMDB.replace("blockName", mdb.getName());
+        
+        retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size()));
+        int count = 0;
+        for (DatasetFieldType dsft:requiredThisMDB ){
+            count++;
+            String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName());
+            if (count < requiredThisMDB.size()){
+                retval += reqValImp + "\n";
+            } else {
+               reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1);
+               retval += reqValImp+ "\n";
+               retval += endOfReqVal;
+            }            
+        }
+        
+        }
+
+        return retval;
+    }    
+            
 }

From 38f09f6e401d069956b56df94eb03437ca905a0e Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 2 Nov 2023 09:32:20 -0400
Subject: [PATCH 099/546] #9464 fix json schema formatting

---
 .../edu/harvard/iq/dataverse/DataverseServiceBean.java     | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 070fc20a5da..be2a97541c6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -991,7 +991,7 @@ public  String getCollectionDatasetSchema(Long dataverseId) {
             retval += getCustomMDBSchema(mdb, requiredDSFT);
         }
         
-        retval += "\n}\n";
+        retval += "\n                     }";
         
         retval += endOfjson.replace("blockNames", reqMDBNames);
 
@@ -1022,7 +1022,7 @@ public  String getCollectionDatasetSchema(Long dataverseId) {
     "            },\n" + 
     "            \"typeClass\": {\n" +
     "                \"type\": \"string\"\n" +
-    "            }\n," +
+    "            },\n" +
     "            \"multiple\": {\n" +
     "                \"type\": \"boolean\"\n" +
     "            },\n" +
@@ -1085,7 +1085,8 @@ public  String getCollectionDatasetSchema(Long dataverseId) {
 "    \"required\": [\"datasetVersion\"]\n" +
 "}\n";
     
-    private String startOfMDB = "\"blockName\": {\n" +
+    private String startOfMDB = "" +
+"                           \"blockName\": {\n" +
 "                            \"type\": \"object\",\n" +
 "                            \"properties\": {\n" +
 "                                \"fields\": {\n" +

From 5ca4cc08b92b0fe0c4f44bcd3c8da2e32f5ebb47 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 2 Nov 2023 09:48:51 -0400
Subject: [PATCH 100/546] #9464 remove license from required

---
 .../java/edu/harvard/iq/dataverse/DataverseServiceBean.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index be2a97541c6..5942d4a8010 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -1079,7 +1079,7 @@ public  String getCollectionDatasetSchema(Long dataverseId) {
 "                    \"required\": [blockNames]\n" +
 "                }\n" +
 "            },\n" +
-"            \"required\": [\"license\", \"metadataBlocks\"]\n" +
+"            \"required\": [\"metadataBlocks\"]\n" +
 "        }\n" +
 "    },\n" +
 "    \"required\": [\"datasetVersion\"]\n" +

From 18cd587a46ad7770109d501520126bd185517b7c Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 2 Nov 2023 12:06:08 -0400
Subject: [PATCH 101/546] A few more improvements, borrowed from @qqmyers QDR
 fork: eliminates reindexing the parent dataverse when adding guestbooks,
 stops the update dataverse command from unnecessarily reindexing the
 underlying datasets in some cases, makes that reindex truly async. in the
 onSuccess part of the command. (#9635)

---
 .../command/impl/UpdateDataverseCommand.java  |  31 +-
 .../dataverse/ingest/IngestServiceBean.java   | 463 +++++++++---------
 .../iq/dataverse/search/IndexServiceBean.java |   1 +
 3 files changed, 259 insertions(+), 236 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
index 56c76f04c05..fe9415f39f9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
@@ -32,6 +32,8 @@ public class UpdateDataverseCommand extends AbstractCommand<Dataverse> {
 	private final List<DatasetFieldType> facetList;
         private final List<Dataverse> featuredDataverseList;
         private final List<DataverseFieldTypeInputLevel> inputLevelList;
+        
+        private boolean datasetsReindexRequired = false; 
 
 	public UpdateDataverseCommand(Dataverse editedDv, List<DatasetFieldType> facetList, List<Dataverse> featuredDataverseList, 
                     DataverseRequest aRequest,  List<DataverseFieldTypeInputLevel> inputLevelList ) {
@@ -74,9 +76,13 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
                 }
             }
             
-            DataverseType oldDvType = ctxt.dataverses().find(editedDv.getId()).getDataverseType();
-            String oldDvAlias = ctxt.dataverses().find(editedDv.getId()).getAlias();
-            String oldDvName = ctxt.dataverses().find(editedDv.getId()).getName();
+            Dataverse oldDv = ctxt.dataverses().find(editedDv.getId());
+            
+            DataverseType oldDvType = oldDv.getDataverseType();
+            String oldDvAlias = oldDv.getAlias();
+            String oldDvName = oldDv.getName();
+            oldDv = null; 
+            
             Dataverse result = ctxt.dataverses().save(editedDv);
             
             if ( facetList != null ) {
@@ -101,6 +107,14 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
                 }
             }
             
+            // We don't want to reindex the children datasets unnecessarily: 
+            // When these values are changed we need to reindex all children datasets
+            // This check is not recursive as all the values just report the immediate parent
+            if (!oldDvType.equals(editedDv.getDataverseType())
+                || !oldDvName.equals(editedDv.getName())
+                || !oldDvAlias.equals(editedDv.getAlias())) {
+                datasetsReindexRequired = true;
+            }
             
             return result;
 	}
@@ -110,9 +124,16 @@ public boolean onSuccess(CommandContext ctxt, Object r) {
         
         // first kick of async index of datasets
         // TODO: is this actually needed? Is there a better way to handle
+        // It appears that we at some point lost some extra logic here, where
+        // we only reindex the underlying datasets if one or more of the specific set
+        // of fields have been changed (since these values are included in the 
+        // indexed solr documents for dataasets). So I'm putting that back. -L.A.
         Dataverse result = (Dataverse) r;
-        List<Dataset> datasets = ctxt.datasets().findByOwnerId(result.getId());
-        ctxt.index().asyncIndexDatasetList(datasets, true);
+        
+        if (datasetsReindexRequired) {
+            List<Dataset> datasets = ctxt.datasets().findByOwnerId(result.getId());
+            ctxt.index().asyncIndexDatasetList(datasets, true);
+        }
         
         return ctxt.dataverses().index((Dataverse) r);
     }  
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 40dc3d6fdd6..ac5c8e44a72 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -168,55 +168,55 @@ public class IngestServiceBean {
     // It must be called before we attempt to permanently save the files in 
     // the database by calling the Save command on the dataset and/or version.
     public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
-                                                   List<DataFile> newFiles,
-                                                   DataFile fileToReplace,
-                                                   boolean tabIngest) {
-		List<DataFile> ret = new ArrayList<>();
-
-		if (newFiles != null && newFiles.size() > 0) {
-			// ret = new ArrayList<>();
-			// final check for duplicate file names;
-			// we tried to make the file names unique on upload, but then
-			// the user may have edited them on the "add files" page, and
-			// renamed FOOBAR-1.txt back to FOOBAR.txt...
+            List<DataFile> newFiles,
+            DataFile fileToReplace,
+            boolean tabIngest) {
+        List<DataFile> ret = new ArrayList<>();
+
+        if (newFiles != null && newFiles.size() > 0) {
+            // ret = new ArrayList<>();
+            // final check for duplicate file names;
+            // we tried to make the file names unique on upload, but then
+            // the user may have edited them on the "add files" page, and
+            // renamed FOOBAR-1.txt back to FOOBAR.txt...
             IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles, fileToReplace);
-			Dataset dataset = version.getDataset();
-
-			for (DataFile dataFile : newFiles) {
-				boolean unattached = false;
-				boolean savedSuccess = false;
-				if (dataFile.getOwner() == null) {
-					unattached = true;
-					dataFile.setOwner(dataset);
-				}
+            Dataset dataset = version.getDataset();
+
+            for (DataFile dataFile : newFiles) {
+                boolean unattached = false;
+                boolean savedSuccess = false;
+                if (dataFile.getOwner() == null) {
+                    unattached = true;
+                    dataFile.setOwner(dataset);
+                }
 
-				String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier());
-				String driverType = DataAccess.getDriverType(storageInfo[0]);
-				String storageLocation = storageInfo[1];
-				String tempFileLocation = null;
-				Path tempLocationPath = null;
-				if (driverType.equals("tmp")) {  //"tmp" is the default if no prefix or the "tmp://" driver
-					tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation;
+                String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier());
+                String driverType = DataAccess.getDriverType(storageInfo[0]);
+                String storageLocation = storageInfo[1];
+                String tempFileLocation = null;
+                Path tempLocationPath = null;
+                if (driverType.equals("tmp")) {  //"tmp" is the default if no prefix or the "tmp://" driver
+                    tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation;
 
-					// Try to save the file in its permanent location:
-					tempLocationPath = Paths.get(tempFileLocation);
-					WritableByteChannel writeChannel = null;
-					FileChannel readChannel = null;
+                    // Try to save the file in its permanent location:
+                    tempLocationPath = Paths.get(tempFileLocation);
+                    WritableByteChannel writeChannel = null;
+                    FileChannel readChannel = null;
 
-					StorageIO<DataFile> dataAccess = null;
+                    StorageIO<DataFile> dataAccess = null;
 
-					try {
-						logger.fine("Attempting to create a new storageIO object for " + storageLocation);
-						dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation);
+                    try {
+                        logger.fine("Attempting to create a new storageIO object for " + storageLocation);
+                        dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation);
 
-						logger.fine("Successfully created a new storageIO object.");
-						/*
+                        logger.fine("Successfully created a new storageIO object.");
+                        /*
 						 * This commented-out code demonstrates how to copy bytes from a local
 						 * InputStream (or a readChannel) into the writable byte channel of a Dataverse
 						 * DataAccessIO object:
-						 */
+                         */
 
-						/*
+ /*
 						 * storageIO.open(DataAccessOption.WRITE_ACCESS);
 						 * 
 						 * writeChannel = storageIO.getWriteChannel(); readChannel = new
@@ -225,9 +225,9 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						 * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while (
 						 * start < readChannel.size() ) { readChannel.transferTo(start,
 						 * bytesPerIteration, writeChannel); start += bytesPerIteration; }
-						 */
+                         */
 
-						/*
+ /*
 						 * But it's easier to use this convenience method from the DataAccessIO:
 						 * 
 						 * (if the underlying storage method for this file is local filesystem, the
@@ -235,214 +235,215 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						 * 
 						 * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(),
 						 * StandardCopyOption.REPLACE_EXISTING);
-						 */
-						dataAccess.savePath(tempLocationPath);
-
-						// Set filesize in bytes
-						//
-						dataFile.setFilesize(dataAccess.getSize());
-						savedSuccess = true;
-						logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
-
-                                            // TODO: reformat this file to remove the many tabs added in cc08330
-                                            extractMetadataNcml(dataFile, tempLocationPath);
-
-					} catch (IOException ioex) {
-                    logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
-					} finally {
-						if (readChannel != null) {
-							try {
-								readChannel.close();
-							} catch (IOException e) {
-							}
-						}
-						if (writeChannel != null) {
-							try {
-								writeChannel.close();
-							} catch (IOException e) {
-							}
-						}
-					}
+                         */
+                        dataAccess.savePath(tempLocationPath);
+
+                        // Set filesize in bytes
+                        //
+                        dataFile.setFilesize(dataAccess.getSize());
+                        savedSuccess = true;
+                        logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
+
+                        // TODO: reformat this file to remove the many tabs added in cc08330
+                        extractMetadataNcml(dataFile, tempLocationPath);
+
+                    } catch (IOException ioex) {
+                        logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
+                    } finally {
+                        if (readChannel != null) {
+                            try {
+                                readChannel.close();
+                            } catch (IOException e) {
+                            }
+                        }
+                        if (writeChannel != null) {
+                            try {
+                                writeChannel.close();
+                            } catch (IOException e) {
+                            }
+                        }
+                    }
 
                     // Since we may have already spent some CPU cycles scaling down image thumbnails, 
-					// we may as well save them, by moving these generated images to the permanent
-					// dataset directory. We should also remember to delete any such files in the
-					// temp directory:
-					List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()),
-							storageLocation);
-					if (generatedTempFiles != null) {
-						for (Path generated : generatedTempFiles) {
-							if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to
-												// save the main file!
-								logger.fine("(Will also try to permanently save generated thumbnail file "
-										+ generated.toString() + ")");
-								try {
-									// Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(),
-									// generated.getFileName().toString()));
-									int i = generated.toString().lastIndexOf("thumb");
-									if (i > 1) {
-										String extensionTag = generated.toString().substring(i);
-										dataAccess.savePathAsAux(generated, extensionTag);
-										logger.fine(
-												"Saved generated thumbnail as aux object. \"preview available\" status: "
-														+ dataFile.isPreviewImageAvailable());
-									} else {
-										logger.warning(
-												"Generated thumbnail file name does not match the expected pattern: "
-														+ generated.toString());
-									}
-
-								} catch (IOException ioex) {
-									logger.warning("Failed to save generated file " + generated.toString());
-								}
-							}
-
-							// ... but we definitely want to delete it:
-							try {
-								Files.delete(generated);
-							} catch (IOException ioex) {
-								logger.warning("Failed to delete generated file " + generated.toString());
-							}
-						}
-					}
-					// Any necessary post-processing:
-					// performPostProcessingTasks(dataFile);
-				} else {
-					try {
-						StorageIO<DvObject> dataAccess = DataAccess.getStorageIO(dataFile);
-						//Populate metadata
-						dataAccess.open(DataAccessOption.READ_ACCESS);
-						//set file size
-						logger.fine("Setting file size: " + dataAccess.getSize());
-						dataFile.setFilesize(dataAccess.getSize());
-						if(dataAccess instanceof S3AccessIO) {
-							  ((S3AccessIO<DvObject>)dataAccess).removeTempTag();
-						}
-					} catch (IOException ioex) {
-						logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " ("
-								+ ioex.getMessage() + ")");
-					}
-					savedSuccess = true;
-				}
+                    // we may as well save them, by moving these generated images to the permanent
+                    // dataset directory. We should also remember to delete any such files in the
+                    // temp directory:
+                    List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()),
+                            storageLocation);
+                    if (generatedTempFiles != null) {
+                        for (Path generated : generatedTempFiles) {
+                            if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to
+                                // save the main file!
+                                logger.fine("(Will also try to permanently save generated thumbnail file "
+                                        + generated.toString() + ")");
+                                try {
+                                    // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(),
+                                    // generated.getFileName().toString()));
+                                    int i = generated.toString().lastIndexOf("thumb");
+                                    if (i > 1) {
+                                        String extensionTag = generated.toString().substring(i);
+                                        dataAccess.savePathAsAux(generated, extensionTag);
+                                        logger.fine(
+                                                "Saved generated thumbnail as aux object. \"preview available\" status: "
+                                                + dataFile.isPreviewImageAvailable());
+                                    } else {
+                                        logger.warning(
+                                                "Generated thumbnail file name does not match the expected pattern: "
+                                                + generated.toString());
+                                    }
+
+                                } catch (IOException ioex) {
+                                    logger.warning("Failed to save generated file " + generated.toString());
+                                }
+                            }
+
+                            // ... but we definitely want to delete it:
+                            try {
+                                Files.delete(generated);
+                            } catch (IOException ioex) {
+                                logger.warning("Failed to delete generated file " + generated.toString());
+                            }
+                        }
+                    }
+                    // Any necessary post-processing:
+                    // performPostProcessingTasks(dataFile);
+                } else {
+                    // This is a direct upload 
+                    try {
+                        StorageIO<DvObject> dataAccess = DataAccess.getStorageIO(dataFile);
+                        //Populate metadata
+                        dataAccess.open(DataAccessOption.READ_ACCESS);
+                        //set file size
+                        logger.fine("Setting file size: " + dataAccess.getSize());
+                        dataFile.setFilesize(dataAccess.getSize());
+                        if (dataAccess instanceof S3AccessIO) {
+                            ((S3AccessIO<DvObject>) dataAccess).removeTempTag();
+                        }
+                    } catch (IOException ioex) {
+                        logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " ("
+                                + ioex.getMessage() + ")");
+                    }
+                    savedSuccess = true;
+                }
+
+                logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset.");
+                boolean belowLimit = false;
+
+                try {
+                    //getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null
+                    belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit();
+                } catch (IOException e) {
+                    logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage());
+                }
 
-				logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset.");
-				boolean belowLimit = false;
-
-				try {
-					//getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null
-					belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit();
-				} catch (IOException e) {
-					logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage());
-				} 
-
-				if (savedSuccess && belowLimit) {
-					// These are all brand new files, so they should all have
-					// one filemetadata total. -- L.A.
-					FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
-					String fileName = fileMetadata.getLabel();
-
-					boolean metadataExtracted = false;
-                                        boolean metadataExtractedFromNetcdf = false;
-					if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) {
-						/*
+                if (savedSuccess && belowLimit) {
+                    // These are all brand new files, so they should all have
+                    // one filemetadata total. -- L.A.
+                    FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
+                    String fileName = fileMetadata.getLabel();
+
+                    boolean metadataExtracted = false;
+                    boolean metadataExtractedFromNetcdf = false;
+                    if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) {
+                        /*
 						 * Note that we don't try to ingest the file right away - instead we mark it as
 						 * "scheduled for ingest", then at the end of the save process it will be queued
 						 * for async. ingest in the background. In the meantime, the file will be
 						 * ingested as a regular, non-tabular file, and appear as such to the user,
 						 * until the ingest job is finished with the Ingest Service.
-						 */
-						dataFile.SetIngestScheduled();
-					} else if (fileMetadataExtractable(dataFile)) {
-
-						try {
-							// FITS is the only type supported for metadata
-							// extraction, as of now. -- L.A. 4.0
-                                                        // Note that extractMetadataNcml() is used for NetCDF/HDF5.
-							dataFile.setContentType("application/fits");
-							metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
-						} catch (IOException mex) {
-							logger.severe("Caught exception trying to extract indexable metadata from file "
-									+ fileName + ",  " + mex.getMessage());
-						}
-						if (metadataExtracted) {
-							logger.fine("Successfully extracted indexable metadata from file " + fileName);
-						} else {
-							logger.fine("Failed to extract indexable metadata from file " + fileName);
-						}
-                                        } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) {
-                                            try {
-                                                logger.fine("trying to extract metadata from netcdf");
-                                                metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version);
-                                            } catch (IOException ex) {
-                                                logger.fine("could not extract metadata from netcdf: " + ex);
-                                            }
-                                            if (metadataExtractedFromNetcdf) {
-                                                logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName);
-                                            } else {
-                                                logger.fine("Failed to extract indexable metadata from netcdf file " + fileName);
-                                            }
+                         */
+                        dataFile.SetIngestScheduled();
+                    } else if (fileMetadataExtractable(dataFile)) {
 
-                                        } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) {
+                        try {
+                            // FITS is the only type supported for metadata
+                            // extraction, as of now. -- L.A. 4.0
+                            // Note that extractMetadataNcml() is used for NetCDF/HDF5.
+                            dataFile.setContentType("application/fits");
+                            metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
+                        } catch (IOException mex) {
+                            logger.severe("Caught exception trying to extract indexable metadata from file "
+                                    + fileName + ",  " + mex.getMessage());
+                        }
+                        if (metadataExtracted) {
+                            logger.fine("Successfully extracted indexable metadata from file " + fileName);
+                        } else {
+                            logger.fine("Failed to extract indexable metadata from file " + fileName);
+                        }
+                    } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) {
+                        try {
+                            logger.fine("trying to extract metadata from netcdf");
+                            metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version);
+                        } catch (IOException ex) {
+                            logger.fine("could not extract metadata from netcdf: " + ex);
+                        }
+                        if (metadataExtractedFromNetcdf) {
+                            logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName);
+                        } else {
+                            logger.fine("Failed to extract indexable metadata from netcdf file " + fileName);
+                        }
+
+                    } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) {
                         // Make sure no *uningested* tab-delimited files are saved with the type "text/tab-separated-values"!
                         // "text/tsv" should be used instead: 
                         dataFile.setContentType(FileUtil.MIME_TYPE_TSV);
                     }
-				}
+                }
                 if (unattached) {
                     dataFile.setOwner(null);
                 }
-				// ... and let's delete the main temp file if it exists:
-				if(tempLocationPath!=null) {
-    				try {
-	    				logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
-			    		Files.delete(tempLocationPath);
-				    } catch (IOException ex) {
-					    // (non-fatal - it's just a temp file.)
-    					logger.warning("Failed to delete temp file " + tempLocationPath.toString());
-	    			}				
-				}
-				if (savedSuccess) {
-					// temp dbug line
-					// System.out.println("ADDING FILE: " + fileName + "; for dataset: " +
-					// dataset.getGlobalId());
-					// Make sure the file is attached to the dataset and to the version, if this
-					// hasn't been done yet:
-					if (dataFile.getOwner() == null) {
-						dataFile.setOwner(dataset);
-
-						version.getFileMetadatas().add(dataFile.getFileMetadata());
-						dataFile.getFileMetadata().setDatasetVersion(version);
-						dataset.getFiles().add(dataFile);
-
-						if (dataFile.getFileMetadata().getCategories() != null) {
-							ListIterator<DataFileCategory> dfcIt = dataFile.getFileMetadata().getCategories()
-									.listIterator();
-
-							while (dfcIt.hasNext()) {
-								DataFileCategory dataFileCategory = dfcIt.next();
-
-								if (dataFileCategory.getDataset() == null) {
-									DataFileCategory newCategory = dataset
-											.getCategoryByName(dataFileCategory.getName());
-									if (newCategory != null) {
-										newCategory.addFileMetadata(dataFile.getFileMetadata());
-										// dataFileCategory = newCategory;
-										dfcIt.set(newCategory);
-									} else {
-										dfcIt.remove();
-									}
-								}
-							}
-						}
-					}
-				}
+                // ... and let's delete the main temp file if it exists:
+                if (tempLocationPath != null) {
+                    try {
+                        logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
+                        Files.delete(tempLocationPath);
+                    } catch (IOException ex) {
+                        // (non-fatal - it's just a temp file.)
+                        logger.warning("Failed to delete temp file " + tempLocationPath.toString());
+                    }
+                }
+                if (savedSuccess) {
+                    // temp dbug line
+                    // System.out.println("ADDING FILE: " + fileName + "; for dataset: " +
+                    // dataset.getGlobalId());
+                    // Make sure the file is attached to the dataset and to the version, if this
+                    // hasn't been done yet:
+                    if (dataFile.getOwner() == null) {
+                        dataFile.setOwner(dataset);
+
+                        version.getFileMetadatas().add(dataFile.getFileMetadata());
+                        dataFile.getFileMetadata().setDatasetVersion(version);
+                        dataset.getFiles().add(dataFile);
+
+                        if (dataFile.getFileMetadata().getCategories() != null) {
+                            ListIterator<DataFileCategory> dfcIt = dataFile.getFileMetadata().getCategories()
+                                    .listIterator();
+
+                            while (dfcIt.hasNext()) {
+                                DataFileCategory dataFileCategory = dfcIt.next();
+
+                                if (dataFileCategory.getDataset() == null) {
+                                    DataFileCategory newCategory = dataset
+                                            .getCategoryByName(dataFileCategory.getName());
+                                    if (newCategory != null) {
+                                        newCategory.addFileMetadata(dataFile.getFileMetadata());
+                                        // dataFileCategory = newCategory;
+                                        dfcIt.set(newCategory);
+                                    } else {
+                                        dfcIt.remove();
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
 
-				ret.add(dataFile);
-			}
-		}
+                ret.add(dataFile);
+            }
+        }
 
-		return ret;
-	}
+        return ret;
+    }
     
     public List<Path> listGeneratedTempFiles(Path tempDirectory, String baseName) {
         List<Path> generatedFiles = new ArrayList<>();
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
index d6d0be7a17b..4e27465f83f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
@@ -420,6 +420,7 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
         }
     }
 
+    @Asynchronous
     public void asyncIndexDatasetList(List<Dataset> datasets, boolean doNormalSolrDocCleanUp) {
         for(Dataset dataset : datasets) {
             asyncIndexDataset(dataset, true);

From 5b1c80047911486813ac9872646fe1c390ff327b Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 2 Nov 2023 12:38:10 -0400
Subject: [PATCH 102/546] Revert "A few more improvements, borrowed from
 @qqmyers QDR fork: eliminates reindexing the" (some things got checked in by
 mistake) This reverts commit 18cd587a46ad7770109d501520126bd185517b7c.

---
 .../command/impl/UpdateDataverseCommand.java  |  31 +-
 .../dataverse/ingest/IngestServiceBean.java   | 463 +++++++++---------
 .../iq/dataverse/search/IndexServiceBean.java |   1 -
 3 files changed, 236 insertions(+), 259 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
index fe9415f39f9..56c76f04c05 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
@@ -32,8 +32,6 @@ public class UpdateDataverseCommand extends AbstractCommand<Dataverse> {
 	private final List<DatasetFieldType> facetList;
         private final List<Dataverse> featuredDataverseList;
         private final List<DataverseFieldTypeInputLevel> inputLevelList;
-        
-        private boolean datasetsReindexRequired = false; 
 
 	public UpdateDataverseCommand(Dataverse editedDv, List<DatasetFieldType> facetList, List<Dataverse> featuredDataverseList, 
                     DataverseRequest aRequest,  List<DataverseFieldTypeInputLevel> inputLevelList ) {
@@ -76,13 +74,9 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
                 }
             }
             
-            Dataverse oldDv = ctxt.dataverses().find(editedDv.getId());
-            
-            DataverseType oldDvType = oldDv.getDataverseType();
-            String oldDvAlias = oldDv.getAlias();
-            String oldDvName = oldDv.getName();
-            oldDv = null; 
-            
+            DataverseType oldDvType = ctxt.dataverses().find(editedDv.getId()).getDataverseType();
+            String oldDvAlias = ctxt.dataverses().find(editedDv.getId()).getAlias();
+            String oldDvName = ctxt.dataverses().find(editedDv.getId()).getName();
             Dataverse result = ctxt.dataverses().save(editedDv);
             
             if ( facetList != null ) {
@@ -107,14 +101,6 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
                 }
             }
             
-            // We don't want to reindex the children datasets unnecessarily: 
-            // When these values are changed we need to reindex all children datasets
-            // This check is not recursive as all the values just report the immediate parent
-            if (!oldDvType.equals(editedDv.getDataverseType())
-                || !oldDvName.equals(editedDv.getName())
-                || !oldDvAlias.equals(editedDv.getAlias())) {
-                datasetsReindexRequired = true;
-            }
             
             return result;
 	}
@@ -124,16 +110,9 @@ public boolean onSuccess(CommandContext ctxt, Object r) {
         
         // first kick of async index of datasets
         // TODO: is this actually needed? Is there a better way to handle
-        // It appears that we at some point lost some extra logic here, where
-        // we only reindex the underlying datasets if one or more of the specific set
-        // of fields have been changed (since these values are included in the 
-        // indexed solr documents for dataasets). So I'm putting that back. -L.A.
         Dataverse result = (Dataverse) r;
-        
-        if (datasetsReindexRequired) {
-            List<Dataset> datasets = ctxt.datasets().findByOwnerId(result.getId());
-            ctxt.index().asyncIndexDatasetList(datasets, true);
-        }
+        List<Dataset> datasets = ctxt.datasets().findByOwnerId(result.getId());
+        ctxt.index().asyncIndexDatasetList(datasets, true);
         
         return ctxt.dataverses().index((Dataverse) r);
     }  
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index ac5c8e44a72..40dc3d6fdd6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -168,55 +168,55 @@ public class IngestServiceBean {
     // It must be called before we attempt to permanently save the files in 
     // the database by calling the Save command on the dataset and/or version.
     public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
-            List<DataFile> newFiles,
-            DataFile fileToReplace,
-            boolean tabIngest) {
-        List<DataFile> ret = new ArrayList<>();
-
-        if (newFiles != null && newFiles.size() > 0) {
-            // ret = new ArrayList<>();
-            // final check for duplicate file names;
-            // we tried to make the file names unique on upload, but then
-            // the user may have edited them on the "add files" page, and
-            // renamed FOOBAR-1.txt back to FOOBAR.txt...
+                                                   List<DataFile> newFiles,
+                                                   DataFile fileToReplace,
+                                                   boolean tabIngest) {
+		List<DataFile> ret = new ArrayList<>();
+
+		if (newFiles != null && newFiles.size() > 0) {
+			// ret = new ArrayList<>();
+			// final check for duplicate file names;
+			// we tried to make the file names unique on upload, but then
+			// the user may have edited them on the "add files" page, and
+			// renamed FOOBAR-1.txt back to FOOBAR.txt...
             IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles, fileToReplace);
-            Dataset dataset = version.getDataset();
-
-            for (DataFile dataFile : newFiles) {
-                boolean unattached = false;
-                boolean savedSuccess = false;
-                if (dataFile.getOwner() == null) {
-                    unattached = true;
-                    dataFile.setOwner(dataset);
-                }
+			Dataset dataset = version.getDataset();
+
+			for (DataFile dataFile : newFiles) {
+				boolean unattached = false;
+				boolean savedSuccess = false;
+				if (dataFile.getOwner() == null) {
+					unattached = true;
+					dataFile.setOwner(dataset);
+				}
 
-                String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier());
-                String driverType = DataAccess.getDriverType(storageInfo[0]);
-                String storageLocation = storageInfo[1];
-                String tempFileLocation = null;
-                Path tempLocationPath = null;
-                if (driverType.equals("tmp")) {  //"tmp" is the default if no prefix or the "tmp://" driver
-                    tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation;
+				String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier());
+				String driverType = DataAccess.getDriverType(storageInfo[0]);
+				String storageLocation = storageInfo[1];
+				String tempFileLocation = null;
+				Path tempLocationPath = null;
+				if (driverType.equals("tmp")) {  //"tmp" is the default if no prefix or the "tmp://" driver
+					tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation;
 
-                    // Try to save the file in its permanent location:
-                    tempLocationPath = Paths.get(tempFileLocation);
-                    WritableByteChannel writeChannel = null;
-                    FileChannel readChannel = null;
+					// Try to save the file in its permanent location:
+					tempLocationPath = Paths.get(tempFileLocation);
+					WritableByteChannel writeChannel = null;
+					FileChannel readChannel = null;
 
-                    StorageIO<DataFile> dataAccess = null;
+					StorageIO<DataFile> dataAccess = null;
 
-                    try {
-                        logger.fine("Attempting to create a new storageIO object for " + storageLocation);
-                        dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation);
+					try {
+						logger.fine("Attempting to create a new storageIO object for " + storageLocation);
+						dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation);
 
-                        logger.fine("Successfully created a new storageIO object.");
-                        /*
+						logger.fine("Successfully created a new storageIO object.");
+						/*
 						 * This commented-out code demonstrates how to copy bytes from a local
 						 * InputStream (or a readChannel) into the writable byte channel of a Dataverse
 						 * DataAccessIO object:
-                         */
+						 */
 
- /*
+						/*
 						 * storageIO.open(DataAccessOption.WRITE_ACCESS);
 						 * 
 						 * writeChannel = storageIO.getWriteChannel(); readChannel = new
@@ -225,9 +225,9 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						 * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while (
 						 * start < readChannel.size() ) { readChannel.transferTo(start,
 						 * bytesPerIteration, writeChannel); start += bytesPerIteration; }
-                         */
+						 */
 
- /*
+						/*
 						 * But it's easier to use this convenience method from the DataAccessIO:
 						 * 
 						 * (if the underlying storage method for this file is local filesystem, the
@@ -235,215 +235,214 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						 * 
 						 * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(),
 						 * StandardCopyOption.REPLACE_EXISTING);
-                         */
-                        dataAccess.savePath(tempLocationPath);
-
-                        // Set filesize in bytes
-                        //
-                        dataFile.setFilesize(dataAccess.getSize());
-                        savedSuccess = true;
-                        logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
-
-                        // TODO: reformat this file to remove the many tabs added in cc08330
-                        extractMetadataNcml(dataFile, tempLocationPath);
-
-                    } catch (IOException ioex) {
-                        logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
-                    } finally {
-                        if (readChannel != null) {
-                            try {
-                                readChannel.close();
-                            } catch (IOException e) {
-                            }
-                        }
-                        if (writeChannel != null) {
-                            try {
-                                writeChannel.close();
-                            } catch (IOException e) {
-                            }
-                        }
-                    }
+						 */
+						dataAccess.savePath(tempLocationPath);
+
+						// Set filesize in bytes
+						//
+						dataFile.setFilesize(dataAccess.getSize());
+						savedSuccess = true;
+						logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
+
+                                            // TODO: reformat this file to remove the many tabs added in cc08330
+                                            extractMetadataNcml(dataFile, tempLocationPath);
+
+					} catch (IOException ioex) {
+                    logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
+					} finally {
+						if (readChannel != null) {
+							try {
+								readChannel.close();
+							} catch (IOException e) {
+							}
+						}
+						if (writeChannel != null) {
+							try {
+								writeChannel.close();
+							} catch (IOException e) {
+							}
+						}
+					}
 
                     // Since we may have already spent some CPU cycles scaling down image thumbnails, 
-                    // we may as well save them, by moving these generated images to the permanent
-                    // dataset directory. We should also remember to delete any such files in the
-                    // temp directory:
-                    List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()),
-                            storageLocation);
-                    if (generatedTempFiles != null) {
-                        for (Path generated : generatedTempFiles) {
-                            if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to
-                                // save the main file!
-                                logger.fine("(Will also try to permanently save generated thumbnail file "
-                                        + generated.toString() + ")");
-                                try {
-                                    // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(),
-                                    // generated.getFileName().toString()));
-                                    int i = generated.toString().lastIndexOf("thumb");
-                                    if (i > 1) {
-                                        String extensionTag = generated.toString().substring(i);
-                                        dataAccess.savePathAsAux(generated, extensionTag);
-                                        logger.fine(
-                                                "Saved generated thumbnail as aux object. \"preview available\" status: "
-                                                + dataFile.isPreviewImageAvailable());
-                                    } else {
-                                        logger.warning(
-                                                "Generated thumbnail file name does not match the expected pattern: "
-                                                + generated.toString());
-                                    }
-
-                                } catch (IOException ioex) {
-                                    logger.warning("Failed to save generated file " + generated.toString());
-                                }
-                            }
-
-                            // ... but we definitely want to delete it:
-                            try {
-                                Files.delete(generated);
-                            } catch (IOException ioex) {
-                                logger.warning("Failed to delete generated file " + generated.toString());
-                            }
-                        }
-                    }
-                    // Any necessary post-processing:
-                    // performPostProcessingTasks(dataFile);
-                } else {
-                    // This is a direct upload 
-                    try {
-                        StorageIO<DvObject> dataAccess = DataAccess.getStorageIO(dataFile);
-                        //Populate metadata
-                        dataAccess.open(DataAccessOption.READ_ACCESS);
-                        //set file size
-                        logger.fine("Setting file size: " + dataAccess.getSize());
-                        dataFile.setFilesize(dataAccess.getSize());
-                        if (dataAccess instanceof S3AccessIO) {
-                            ((S3AccessIO<DvObject>) dataAccess).removeTempTag();
-                        }
-                    } catch (IOException ioex) {
-                        logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " ("
-                                + ioex.getMessage() + ")");
-                    }
-                    savedSuccess = true;
-                }
-
-                logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset.");
-                boolean belowLimit = false;
-
-                try {
-                    //getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null
-                    belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit();
-                } catch (IOException e) {
-                    logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage());
-                }
-
-                if (savedSuccess && belowLimit) {
-                    // These are all brand new files, so they should all have
-                    // one filemetadata total. -- L.A.
-                    FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
-                    String fileName = fileMetadata.getLabel();
+					// we may as well save them, by moving these generated images to the permanent
+					// dataset directory. We should also remember to delete any such files in the
+					// temp directory:
+					List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()),
+							storageLocation);
+					if (generatedTempFiles != null) {
+						for (Path generated : generatedTempFiles) {
+							if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to
+												// save the main file!
+								logger.fine("(Will also try to permanently save generated thumbnail file "
+										+ generated.toString() + ")");
+								try {
+									// Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(),
+									// generated.getFileName().toString()));
+									int i = generated.toString().lastIndexOf("thumb");
+									if (i > 1) {
+										String extensionTag = generated.toString().substring(i);
+										dataAccess.savePathAsAux(generated, extensionTag);
+										logger.fine(
+												"Saved generated thumbnail as aux object. \"preview available\" status: "
+														+ dataFile.isPreviewImageAvailable());
+									} else {
+										logger.warning(
+												"Generated thumbnail file name does not match the expected pattern: "
+														+ generated.toString());
+									}
+
+								} catch (IOException ioex) {
+									logger.warning("Failed to save generated file " + generated.toString());
+								}
+							}
+
+							// ... but we definitely want to delete it:
+							try {
+								Files.delete(generated);
+							} catch (IOException ioex) {
+								logger.warning("Failed to delete generated file " + generated.toString());
+							}
+						}
+					}
+					// Any necessary post-processing:
+					// performPostProcessingTasks(dataFile);
+				} else {
+					try {
+						StorageIO<DvObject> dataAccess = DataAccess.getStorageIO(dataFile);
+						//Populate metadata
+						dataAccess.open(DataAccessOption.READ_ACCESS);
+						//set file size
+						logger.fine("Setting file size: " + dataAccess.getSize());
+						dataFile.setFilesize(dataAccess.getSize());
+						if(dataAccess instanceof S3AccessIO) {
+							  ((S3AccessIO<DvObject>)dataAccess).removeTempTag();
+						}
+					} catch (IOException ioex) {
+						logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " ("
+								+ ioex.getMessage() + ")");
+					}
+					savedSuccess = true;
+				}
 
-                    boolean metadataExtracted = false;
-                    boolean metadataExtractedFromNetcdf = false;
-                    if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) {
-                        /*
+				logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset.");
+				boolean belowLimit = false;
+
+				try {
+					//getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null
+					belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit();
+				} catch (IOException e) {
+					logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage());
+				} 
+
+				if (savedSuccess && belowLimit) {
+					// These are all brand new files, so they should all have
+					// one filemetadata total. -- L.A.
+					FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
+					String fileName = fileMetadata.getLabel();
+
+					boolean metadataExtracted = false;
+                                        boolean metadataExtractedFromNetcdf = false;
+					if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) {
+						/*
 						 * Note that we don't try to ingest the file right away - instead we mark it as
 						 * "scheduled for ingest", then at the end of the save process it will be queued
 						 * for async. ingest in the background. In the meantime, the file will be
 						 * ingested as a regular, non-tabular file, and appear as such to the user,
 						 * until the ingest job is finished with the Ingest Service.
-                         */
-                        dataFile.SetIngestScheduled();
-                    } else if (fileMetadataExtractable(dataFile)) {
-
-                        try {
-                            // FITS is the only type supported for metadata
-                            // extraction, as of now. -- L.A. 4.0
-                            // Note that extractMetadataNcml() is used for NetCDF/HDF5.
-                            dataFile.setContentType("application/fits");
-                            metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
-                        } catch (IOException mex) {
-                            logger.severe("Caught exception trying to extract indexable metadata from file "
-                                    + fileName + ",  " + mex.getMessage());
-                        }
-                        if (metadataExtracted) {
-                            logger.fine("Successfully extracted indexable metadata from file " + fileName);
-                        } else {
-                            logger.fine("Failed to extract indexable metadata from file " + fileName);
-                        }
-                    } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) {
-                        try {
-                            logger.fine("trying to extract metadata from netcdf");
-                            metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version);
-                        } catch (IOException ex) {
-                            logger.fine("could not extract metadata from netcdf: " + ex);
-                        }
-                        if (metadataExtractedFromNetcdf) {
-                            logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName);
-                        } else {
-                            logger.fine("Failed to extract indexable metadata from netcdf file " + fileName);
-                        }
+						 */
+						dataFile.SetIngestScheduled();
+					} else if (fileMetadataExtractable(dataFile)) {
+
+						try {
+							// FITS is the only type supported for metadata
+							// extraction, as of now. -- L.A. 4.0
+                                                        // Note that extractMetadataNcml() is used for NetCDF/HDF5.
+							dataFile.setContentType("application/fits");
+							metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
+						} catch (IOException mex) {
+							logger.severe("Caught exception trying to extract indexable metadata from file "
+									+ fileName + ",  " + mex.getMessage());
+						}
+						if (metadataExtracted) {
+							logger.fine("Successfully extracted indexable metadata from file " + fileName);
+						} else {
+							logger.fine("Failed to extract indexable metadata from file " + fileName);
+						}
+                                        } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) {
+                                            try {
+                                                logger.fine("trying to extract metadata from netcdf");
+                                                metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version);
+                                            } catch (IOException ex) {
+                                                logger.fine("could not extract metadata from netcdf: " + ex);
+                                            }
+                                            if (metadataExtractedFromNetcdf) {
+                                                logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName);
+                                            } else {
+                                                logger.fine("Failed to extract indexable metadata from netcdf file " + fileName);
+                                            }
 
-                    } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) {
+                                        } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) {
                         // Make sure no *uningested* tab-delimited files are saved with the type "text/tab-separated-values"!
                         // "text/tsv" should be used instead: 
                         dataFile.setContentType(FileUtil.MIME_TYPE_TSV);
                     }
-                }
+				}
                 if (unattached) {
                     dataFile.setOwner(null);
                 }
-                // ... and let's delete the main temp file if it exists:
-                if (tempLocationPath != null) {
-                    try {
-                        logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
-                        Files.delete(tempLocationPath);
-                    } catch (IOException ex) {
-                        // (non-fatal - it's just a temp file.)
-                        logger.warning("Failed to delete temp file " + tempLocationPath.toString());
-                    }
-                }
-                if (savedSuccess) {
-                    // temp dbug line
-                    // System.out.println("ADDING FILE: " + fileName + "; for dataset: " +
-                    // dataset.getGlobalId());
-                    // Make sure the file is attached to the dataset and to the version, if this
-                    // hasn't been done yet:
-                    if (dataFile.getOwner() == null) {
-                        dataFile.setOwner(dataset);
-
-                        version.getFileMetadatas().add(dataFile.getFileMetadata());
-                        dataFile.getFileMetadata().setDatasetVersion(version);
-                        dataset.getFiles().add(dataFile);
-
-                        if (dataFile.getFileMetadata().getCategories() != null) {
-                            ListIterator<DataFileCategory> dfcIt = dataFile.getFileMetadata().getCategories()
-                                    .listIterator();
-
-                            while (dfcIt.hasNext()) {
-                                DataFileCategory dataFileCategory = dfcIt.next();
-
-                                if (dataFileCategory.getDataset() == null) {
-                                    DataFileCategory newCategory = dataset
-                                            .getCategoryByName(dataFileCategory.getName());
-                                    if (newCategory != null) {
-                                        newCategory.addFileMetadata(dataFile.getFileMetadata());
-                                        // dataFileCategory = newCategory;
-                                        dfcIt.set(newCategory);
-                                    } else {
-                                        dfcIt.remove();
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
+				// ... and let's delete the main temp file if it exists:
+				if(tempLocationPath!=null) {
+    				try {
+	    				logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
+			    		Files.delete(tempLocationPath);
+				    } catch (IOException ex) {
+					    // (non-fatal - it's just a temp file.)
+    					logger.warning("Failed to delete temp file " + tempLocationPath.toString());
+	    			}				
+				}
+				if (savedSuccess) {
+					// temp dbug line
+					// System.out.println("ADDING FILE: " + fileName + "; for dataset: " +
+					// dataset.getGlobalId());
+					// Make sure the file is attached to the dataset and to the version, if this
+					// hasn't been done yet:
+					if (dataFile.getOwner() == null) {
+						dataFile.setOwner(dataset);
+
+						version.getFileMetadatas().add(dataFile.getFileMetadata());
+						dataFile.getFileMetadata().setDatasetVersion(version);
+						dataset.getFiles().add(dataFile);
+
+						if (dataFile.getFileMetadata().getCategories() != null) {
+							ListIterator<DataFileCategory> dfcIt = dataFile.getFileMetadata().getCategories()
+									.listIterator();
+
+							while (dfcIt.hasNext()) {
+								DataFileCategory dataFileCategory = dfcIt.next();
+
+								if (dataFileCategory.getDataset() == null) {
+									DataFileCategory newCategory = dataset
+											.getCategoryByName(dataFileCategory.getName());
+									if (newCategory != null) {
+										newCategory.addFileMetadata(dataFile.getFileMetadata());
+										// dataFileCategory = newCategory;
+										dfcIt.set(newCategory);
+									} else {
+										dfcIt.remove();
+									}
+								}
+							}
+						}
+					}
+				}
 
-                ret.add(dataFile);
-            }
-        }
+				ret.add(dataFile);
+			}
+		}
 
-        return ret;
-    }
+		return ret;
+	}
     
     public List<Path> listGeneratedTempFiles(Path tempDirectory, String baseName) {
         List<Path> generatedFiles = new ArrayList<>();
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
index 4e27465f83f..d6d0be7a17b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
@@ -420,7 +420,6 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
         }
     }
 
-    @Asynchronous
     public void asyncIndexDatasetList(List<Dataset> datasets, boolean doNormalSolrDocCleanUp) {
         for(Dataset dataset : datasets) {
             asyncIndexDataset(dataset, true);

From a165efe0350a420409c802b3ae4400a20490c220 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 2 Nov 2023 12:42:19 -0400
Subject: [PATCH 103/546] (trying again) A few more improvements, borrowed from
 @qqmyers QDR fork: eliminates reindexing the parent dataverse when adding
 guestbooks, stops the update dataverse command from unnecessarily reindexing
 the underlying datasets in some cases, makes that reindex truly async. in the
 onSuccess part of the command. (#9635)

---
 .../harvard/iq/dataverse/GuestbookPage.java   | 19 +++++++-----
 .../command/impl/UpdateDataverseCommand.java  | 31 ++++++++++++++++---
 .../iq/dataverse/search/IndexServiceBean.java |  1 +
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
index 9fb584a9133..5fe858ab636 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
@@ -288,19 +288,22 @@ public String save() {
            
         Command<Dataverse> cmd;
         try {
+            // Per recent #dv-tech conversation w/ Jim - copying the code 
+            // below from his QDR branch; the code that used to be here called
+            // UpdateDataverseCommand when saving new guestbooks, and that involved 
+            // an unnecessary reindexing of the dataverse (and, in some cases, 
+            // reindexing of the underlying datasets). - L.A.
             if (editMode == EditMode.CREATE || editMode == EditMode.CLONE ) {
                 guestbook.setCreateTime(new Timestamp(new Date().getTime()));
-                guestbook.setUsageCount(new Long(0));
+                guestbook.setUsageCount(Long.valueOf(0));
                 guestbook.setEnabled(true);
                 dataverse.getGuestbooks().add(guestbook);
-                cmd = new UpdateDataverseCommand(dataverse, null, null, dvRequestService.getDataverseRequest(), null);                
-                commandEngine.submit(cmd);
+                logger.info("Returned from command");
                 create = true;
-            } else {
-                cmd = new UpdateDataverseGuestbookCommand(dataverse, guestbook, dvRequestService.getDataverseRequest());
-                commandEngine.submit(cmd);
-            }
-
+            } 
+            cmd = new UpdateDataverseGuestbookCommand(dataverse, guestbook, dvRequestService.getDataverseRequest());
+            commandEngine.submit(cmd);
+        
         } catch (EJBException ex) {
             StringBuilder error = new StringBuilder();
             error.append(ex).append(" ");
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
index 56c76f04c05..fe9415f39f9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
@@ -32,6 +32,8 @@ public class UpdateDataverseCommand extends AbstractCommand<Dataverse> {
 	private final List<DatasetFieldType> facetList;
         private final List<Dataverse> featuredDataverseList;
         private final List<DataverseFieldTypeInputLevel> inputLevelList;
+        
+        private boolean datasetsReindexRequired = false; 
 
 	public UpdateDataverseCommand(Dataverse editedDv, List<DatasetFieldType> facetList, List<Dataverse> featuredDataverseList, 
                     DataverseRequest aRequest,  List<DataverseFieldTypeInputLevel> inputLevelList ) {
@@ -74,9 +76,13 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
                 }
             }
             
-            DataverseType oldDvType = ctxt.dataverses().find(editedDv.getId()).getDataverseType();
-            String oldDvAlias = ctxt.dataverses().find(editedDv.getId()).getAlias();
-            String oldDvName = ctxt.dataverses().find(editedDv.getId()).getName();
+            Dataverse oldDv = ctxt.dataverses().find(editedDv.getId());
+            
+            DataverseType oldDvType = oldDv.getDataverseType();
+            String oldDvAlias = oldDv.getAlias();
+            String oldDvName = oldDv.getName();
+            oldDv = null; 
+            
             Dataverse result = ctxt.dataverses().save(editedDv);
             
             if ( facetList != null ) {
@@ -101,6 +107,14 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
                 }
             }
             
+            // We don't want to reindex the children datasets unnecessarily: 
+            // When these values are changed we need to reindex all children datasets
+            // This check is not recursive as all the values just report the immediate parent
+            if (!oldDvType.equals(editedDv.getDataverseType())
+                || !oldDvName.equals(editedDv.getName())
+                || !oldDvAlias.equals(editedDv.getAlias())) {
+                datasetsReindexRequired = true;
+            }
             
             return result;
 	}
@@ -110,9 +124,16 @@ public boolean onSuccess(CommandContext ctxt, Object r) {
         
         // first kick of async index of datasets
         // TODO: is this actually needed? Is there a better way to handle
+        // It appears that we at some point lost some extra logic here, where
+        // we only reindex the underlying datasets if one or more of the specific set
+        // of fields have been changed (since these values are included in the 
+        // indexed solr documents for dataasets). So I'm putting that back. -L.A.
         Dataverse result = (Dataverse) r;
-        List<Dataset> datasets = ctxt.datasets().findByOwnerId(result.getId());
-        ctxt.index().asyncIndexDatasetList(datasets, true);
+        
+        if (datasetsReindexRequired) {
+            List<Dataset> datasets = ctxt.datasets().findByOwnerId(result.getId());
+            ctxt.index().asyncIndexDatasetList(datasets, true);
+        }
         
         return ctxt.dataverses().index((Dataverse) r);
     }  
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
index d6d0be7a17b..4e27465f83f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
@@ -420,6 +420,7 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) {
         }
     }
 
+    @Asynchronous
     public void asyncIndexDatasetList(List<Dataset> datasets, boolean doNormalSolrDocCleanUp) {
         for(Dataset dataset : datasets) {
             asyncIndexDataset(dataset, true);

From 9954c5edcc51c702351f39c8549b3e24170522e3 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 3 Nov 2023 12:27:28 -0400
Subject: [PATCH 104/546] checking in some intermediate changes/work in
 progress (#8549)

---
 .../iq/dataverse/EditDatafilesPage.java       |   2 +-
 .../dataverse/ingest/IngestServiceBean.java   | 511 ++++++++++--------
 2 files changed, 277 insertions(+), 236 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index a942830b19e..c40399e160f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -2169,7 +2169,7 @@ public void handleExternalUpload() {
                 - Max size specified in db: check too make sure file is within limits
             // ---------------------------- */
             /**
-             * @todo: this size check is probably redundant here, since the new
+             * @todo: this file size limit check is now redundant here, since the new
              * CreateNewFilesCommand is going to perform it (and the quota 
              * checks too, if enabled
              */
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 40dc3d6fdd6..d6496a97b97 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -30,6 +30,7 @@
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.DataFileCategory;
 import edu.harvard.iq.dataverse.DataFileServiceBean;
+import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
 import edu.harvard.iq.dataverse.DataTable;
 import edu.harvard.iq.dataverse.DatasetField;
 import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
@@ -48,6 +49,8 @@
 import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
 import edu.harvard.iq.dataverse.dataaccess.S3AccessIO;
 import edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator;
+import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException;
+import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable;
 import edu.harvard.iq.dataverse.datavariable.SummaryStatistic;
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataExtractor;
@@ -71,6 +74,7 @@
 import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReaderSpi;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.*;
+import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
 
 import org.apache.commons.io.IOUtils;
 //import edu.harvard.iq.dvn.unf.*;
@@ -121,6 +125,7 @@
 import jakarta.jms.Message;
 import jakarta.faces.application.FacesMessage;
 import jakarta.ws.rs.core.MediaType;
+import java.text.MessageFormat;
 import ucar.nc2.NetcdfFile;
 import ucar.nc2.NetcdfFiles;
 
@@ -158,7 +163,8 @@ public class IngestServiceBean {
     private static String dateTimeFormat_ymdhmsS = "yyyy-MM-dd HH:mm:ss.SSS";
     private static String dateFormat_ymd = "yyyy-MM-dd";
     
-    // This method tries to permanently store new files on the filesystem. 
+    // This method tries to permanently store new files in storage (on the filesystem,
+    // in an S3 bucket, etc.).
     // Then it adds the files that *have been successfully saved* to the 
     // dataset (by attaching the DataFiles to the Dataset, and the corresponding
     // FileMetadatas to the DatasetVersion). It also tries to ensure that none 
@@ -167,56 +173,59 @@ public class IngestServiceBean {
     // DataFileCategory objects, if any were already assigned to the files). 
     // It must be called before we attempt to permanently save the files in 
     // the database by calling the Save command on the dataset and/or version.
+    
     public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
-                                                   List<DataFile> newFiles,
-                                                   DataFile fileToReplace,
-                                                   boolean tabIngest) {
-		List<DataFile> ret = new ArrayList<>();
-
-		if (newFiles != null && newFiles.size() > 0) {
-			// ret = new ArrayList<>();
-			// final check for duplicate file names;
-			// we tried to make the file names unique on upload, but then
-			// the user may have edited them on the "add files" page, and
-			// renamed FOOBAR-1.txt back to FOOBAR.txt...
+            List<DataFile> newFiles,
+            DataFile fileToReplace,
+            boolean tabIngest, 
+            UserStorageQuota quota) /*throws FileExceedsMaxSizeException, FileExceedsStorageQuotaException*/ {
+        List<DataFile> ret = new ArrayList<>();
+
+        if (newFiles != null && newFiles.size() > 0) {
+            // ret = new ArrayList<>();
+            // final check for duplicate file names;
+            // we tried to make the file names unique on upload, but then
+            // the user may have edited them on the "add files" page, and
+            // renamed FOOBAR-1.txt back to FOOBAR.txt...
             IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles, fileToReplace);
-			Dataset dataset = version.getDataset();
-
-			for (DataFile dataFile : newFiles) {
-				boolean unattached = false;
-				boolean savedSuccess = false;
-				if (dataFile.getOwner() == null) {
-					unattached = true;
-					dataFile.setOwner(dataset);
-				}
-
-				String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier());
-				String driverType = DataAccess.getDriverType(storageInfo[0]);
-				String storageLocation = storageInfo[1];
-				String tempFileLocation = null;
-				Path tempLocationPath = null;
-				if (driverType.equals("tmp")) {  //"tmp" is the default if no prefix or the "tmp://" driver
-					tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation;
-
-					// Try to save the file in its permanent location:
-					tempLocationPath = Paths.get(tempFileLocation);
-					WritableByteChannel writeChannel = null;
-					FileChannel readChannel = null;
-
-					StorageIO<DataFile> dataAccess = null;
+            Dataset dataset = version.getDataset();
+
+            for (DataFile dataFile : newFiles) {
+                boolean unattached = false;
+                boolean savedSuccess = false;
+                if (dataFile.getOwner() == null) {
+                    unattached = true;
+                    dataFile.setOwner(dataset);
+                }
+                
+                String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier());
+                String driverType = DataAccess.getDriverType(storageInfo[0]);
+                String storageLocation = storageInfo[1];
+                String tempFileLocation = null;
+                Path tempLocationPath = null;
+                long confirmedFileSize = 0L; 
+                if (driverType.equals("tmp")) {  //"tmp" is the default if no prefix or the "tmp://" driver
+                    tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation;
+
+                    // Try to save the file in its permanent location:
+                    tempLocationPath = Paths.get(tempFileLocation);
+                    WritableByteChannel writeChannel = null;
+                    FileChannel readChannel = null;
+
+                    StorageIO<DataFile> dataAccess = null;
 
-					try {
-						logger.fine("Attempting to create a new storageIO object for " + storageLocation);
-						dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation);
+                    try {
+                        logger.fine("Attempting to create a new storageIO object for " + storageLocation);
+                        dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation);
 
-						logger.fine("Successfully created a new storageIO object.");
-						/*
+                        logger.fine("Successfully created a new storageIO object.");
+                        /*
 						 * This commented-out code demonstrates how to copy bytes from a local
 						 * InputStream (or a readChannel) into the writable byte channel of a Dataverse
 						 * DataAccessIO object:
-						 */
+                         */
 
-						/*
+ /*
 						 * storageIO.open(DataAccessOption.WRITE_ACCESS);
 						 * 
 						 * writeChannel = storageIO.getWriteChannel(); readChannel = new
@@ -225,9 +234,9 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						 * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while (
 						 * start < readChannel.size() ) { readChannel.transferTo(start,
 						 * bytesPerIteration, writeChannel); start += bytesPerIteration; }
-						 */
+                         */
 
-						/*
+ /*
 						 * But it's easier to use this convenience method from the DataAccessIO:
 						 * 
 						 * (if the underlying storage method for this file is local filesystem, the
@@ -235,214 +244,246 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 						 * 
 						 * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(),
 						 * StandardCopyOption.REPLACE_EXISTING);
-						 */
-						dataAccess.savePath(tempLocationPath);
-
-						// Set filesize in bytes
-						//
-						dataFile.setFilesize(dataAccess.getSize());
-						savedSuccess = true;
-						logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
-
-                                            // TODO: reformat this file to remove the many tabs added in cc08330
-                                            extractMetadataNcml(dataFile, tempLocationPath);
-
-					} catch (IOException ioex) {
-                    logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
-					} finally {
-						if (readChannel != null) {
-							try {
-								readChannel.close();
-							} catch (IOException e) {
-							}
-						}
-						if (writeChannel != null) {
-							try {
-								writeChannel.close();
-							} catch (IOException e) {
-							}
-						}
-					}
+                         */
+                        dataAccess.savePath(tempLocationPath);
+
+                        // Set filesize in bytes
+                        //
+                        confirmedFileSize = dataAccess.getSize();
+                        dataFile.setFilesize(confirmedFileSize);
+                        savedSuccess = true;
+                        logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
+
+                        // TODO: reformat this file to remove the many tabs added in cc08330
+                        extractMetadataNcml(dataFile, tempLocationPath);
+
+                    } catch (IOException ioex) {
+                        logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
+                    } finally {
+                        if (readChannel != null) {
+                            try {
+                                readChannel.close();
+                            } catch (IOException e) {
+                            }
+                        }
+                        if (writeChannel != null) {
+                            try {
+                                writeChannel.close();
+                            } catch (IOException e) {
+                            }
+                        }
+                    }
 
                     // Since we may have already spent some CPU cycles scaling down image thumbnails, 
-					// we may as well save them, by moving these generated images to the permanent
-					// dataset directory. We should also remember to delete any such files in the
-					// temp directory:
-					List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()),
-							storageLocation);
-					if (generatedTempFiles != null) {
-						for (Path generated : generatedTempFiles) {
-							if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to
-												// save the main file!
-								logger.fine("(Will also try to permanently save generated thumbnail file "
-										+ generated.toString() + ")");
-								try {
-									// Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(),
-									// generated.getFileName().toString()));
-									int i = generated.toString().lastIndexOf("thumb");
-									if (i > 1) {
-										String extensionTag = generated.toString().substring(i);
-										dataAccess.savePathAsAux(generated, extensionTag);
-										logger.fine(
-												"Saved generated thumbnail as aux object. \"preview available\" status: "
-														+ dataFile.isPreviewImageAvailable());
-									} else {
-										logger.warning(
-												"Generated thumbnail file name does not match the expected pattern: "
-														+ generated.toString());
-									}
-
-								} catch (IOException ioex) {
-									logger.warning("Failed to save generated file " + generated.toString());
-								}
-							}
-
-							// ... but we definitely want to delete it:
-							try {
-								Files.delete(generated);
-							} catch (IOException ioex) {
-								logger.warning("Failed to delete generated file " + generated.toString());
-							}
-						}
-					}
-					// Any necessary post-processing:
-					// performPostProcessingTasks(dataFile);
-				} else {
-					try {
-						StorageIO<DvObject> dataAccess = DataAccess.getStorageIO(dataFile);
-						//Populate metadata
-						dataAccess.open(DataAccessOption.READ_ACCESS);
-						//set file size
-						logger.fine("Setting file size: " + dataAccess.getSize());
-						dataFile.setFilesize(dataAccess.getSize());
-						if(dataAccess instanceof S3AccessIO) {
-							  ((S3AccessIO<DvObject>)dataAccess).removeTempTag();
-						}
-					} catch (IOException ioex) {
-						logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " ("
-								+ ioex.getMessage() + ")");
-					}
-					savedSuccess = true;
-				}
+                    // we may as well save them, by moving these generated images to the permanent
+                    // dataset directory. We should also remember to delete any such files in the
+                    // temp directory:
+                    List<Path> generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()),
+                            storageLocation);
+                    if (generatedTempFiles != null) {
+                        for (Path generated : generatedTempFiles) {
+                            if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to
+                                // save the main file!
+                                logger.fine("(Will also try to permanently save generated thumbnail file "
+                                        + generated.toString() + ")");
+                                try {
+                                    // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(),
+                                    // generated.getFileName().toString()));
+                                    int i = generated.toString().lastIndexOf("thumb");
+                                    if (i > 1) {
+                                        String extensionTag = generated.toString().substring(i);
+                                        dataAccess.savePathAsAux(generated, extensionTag);
+                                        logger.fine(
+                                                "Saved generated thumbnail as aux object. \"preview available\" status: "
+                                                + dataFile.isPreviewImageAvailable());
+                                    } else {
+                                        logger.warning(
+                                                "Generated thumbnail file name does not match the expected pattern: "
+                                                + generated.toString());
+                                    }
+
+                                } catch (IOException ioex) {
+                                    logger.warning("Failed to save generated file " + generated.toString());
+                                }
+                            }
+
+                            // ... but we definitely want to delete it:
+                            try {
+                                Files.delete(generated);
+                            } catch (IOException ioex) {
+                                logger.warning("Failed to delete generated file " + generated.toString());
+                            }
+                        }
+                    }
+                    // Any necessary post-processing:
+                    // performPostProcessingTasks(dataFile);
+                } else {
+                    // This is a direct upload 
+                    try {
+                        StorageIO<DvObject> dataAccess = DataAccess.getStorageIO(dataFile);
+                        //Populate metadata
+                        dataAccess.open(DataAccessOption.READ_ACCESS);
+                        
+                        confirmedFileSize = dataAccess.getSize();
+                        
+                        // For directly-uploaded files, we will perform the file size
+                        // limit and quota checks here. Perform them *again*, in 
+                        // some cases: a directly uploaded files have already been 
+                        // checked (for the sake of being able to reject the upload 
+                        // before the user clicks "save"). But in case of direct 
+                        // uploads via API, these checks haven't been performed yet, 
+                        // so, here's our chance.
+                        
+                        Long fileSizeLimit = systemConfig.getMaxFileUploadSizeForStore(version.getDataset().getEffectiveStorageDriverId());
+                        
+                        if (fileSizeLimit == null || confirmedFileSize < fileSizeLimit) {
+                        
+                            //set file size
+                            logger.fine("Setting file size: " + confirmedFileSize);
+                            dataFile.setFilesize(confirmedFileSize);
+                                                
+                            if (dataAccess instanceof S3AccessIO) {
+                                ((S3AccessIO<DvObject>) dataAccess).removeTempTag();
+                            }
+                            savedSuccess = true;
+                        }
+                    } catch (IOException ioex) {
+                        logger.warning("Failed to get file size, storage id, or failed to remove the temp tag on the saved S3 object" + dataFile.getStorageIdentifier() + " ("
+                                + ioex.getMessage() + ")");
+                    }
+                }
+                
+                // We will perform (another) quota check, and if still under quota
+                // (it's not impossible that somebody else may have uploaded more 
+                // stuff into the same collection/dataset etc., before this user 
+                // decided to click "save", for example!)
+        
+                if (systemConfig.isStorageQuotasEnforced() && quota != null) {
+                    long storageQuotaLimit = quota.getRemainingQuotaInBytes();
+                    if (confirmedFileSize > storageQuotaLimit) {
+                        savedSuccess = false; 
+                        //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(confirmedFileSize), bytesToHumanReadable(storageQuotaLimit)));
+                    }
+                }
+
+                logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset.");
+                boolean belowLimit = false;
 
-				logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset.");
-				boolean belowLimit = false;
-
-				try {
-					//getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null
-					belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit();
-				} catch (IOException e) {
-					logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage());
-				} 
-
-				if (savedSuccess && belowLimit) {
-					// These are all brand new files, so they should all have
-					// one filemetadata total. -- L.A.
-					FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
-					String fileName = fileMetadata.getLabel();
-
-					boolean metadataExtracted = false;
-                                        boolean metadataExtractedFromNetcdf = false;
-					if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) {
-						/*
+                try {
+                    //getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null
+                    belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit();
+                } catch (IOException e) {
+                    logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage());
+                }
+
+                if (savedSuccess && belowLimit) {
+                    // These are all brand new files, so they should all have
+                    // one filemetadata total. -- L.A.
+                    FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0);
+                    String fileName = fileMetadata.getLabel();
+
+                    boolean metadataExtracted = false;
+                    boolean metadataExtractedFromNetcdf = false;
+                    if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) {
+                        /*
 						 * Note that we don't try to ingest the file right away - instead we mark it as
 						 * "scheduled for ingest", then at the end of the save process it will be queued
 						 * for async. ingest in the background. In the meantime, the file will be
 						 * ingested as a regular, non-tabular file, and appear as such to the user,
 						 * until the ingest job is finished with the Ingest Service.
-						 */
-						dataFile.SetIngestScheduled();
-					} else if (fileMetadataExtractable(dataFile)) {
-
-						try {
-							// FITS is the only type supported for metadata
-							// extraction, as of now. -- L.A. 4.0
-                                                        // Note that extractMetadataNcml() is used for NetCDF/HDF5.
-							dataFile.setContentType("application/fits");
-							metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
-						} catch (IOException mex) {
-							logger.severe("Caught exception trying to extract indexable metadata from file "
-									+ fileName + ",  " + mex.getMessage());
-						}
-						if (metadataExtracted) {
-							logger.fine("Successfully extracted indexable metadata from file " + fileName);
-						} else {
-							logger.fine("Failed to extract indexable metadata from file " + fileName);
-						}
-                                        } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) {
-                                            try {
-                                                logger.fine("trying to extract metadata from netcdf");
-                                                metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version);
-                                            } catch (IOException ex) {
-                                                logger.fine("could not extract metadata from netcdf: " + ex);
-                                            }
-                                            if (metadataExtractedFromNetcdf) {
-                                                logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName);
-                                            } else {
-                                                logger.fine("Failed to extract indexable metadata from netcdf file " + fileName);
-                                            }
+                         */
+                        dataFile.SetIngestScheduled();
+                    } else if (fileMetadataExtractable(dataFile)) {
+
+                        try {
+                            // FITS is the only type supported for metadata
+                            // extraction, as of now. -- L.A. 4.0
+                            // Note that extractMetadataNcml() is used for NetCDF/HDF5.
+                            dataFile.setContentType("application/fits");
+                            metadataExtracted = extractMetadata(tempFileLocation, dataFile, version);
+                        } catch (IOException mex) {
+                            logger.severe("Caught exception trying to extract indexable metadata from file "
+                                    + fileName + ",  " + mex.getMessage());
+                        }
+                        if (metadataExtracted) {
+                            logger.fine("Successfully extracted indexable metadata from file " + fileName);
+                        } else {
+                            logger.fine("Failed to extract indexable metadata from file " + fileName);
+                        }
+                    } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) {
+                        try {
+                            logger.fine("trying to extract metadata from netcdf");
+                            metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version);
+                        } catch (IOException ex) {
+                            logger.fine("could not extract metadata from netcdf: " + ex);
+                        }
+                        if (metadataExtractedFromNetcdf) {
+                            logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName);
+                        } else {
+                            logger.fine("Failed to extract indexable metadata from netcdf file " + fileName);
+                        }
 
-                                        } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) {
+                    } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) {
                         // Make sure no *uningested* tab-delimited files are saved with the type "text/tab-separated-values"!
                         // "text/tsv" should be used instead: 
                         dataFile.setContentType(FileUtil.MIME_TYPE_TSV);
                     }
-				}
+                }
                 if (unattached) {
                     dataFile.setOwner(null);
                 }
-				// ... and let's delete the main temp file if it exists:
-				if(tempLocationPath!=null) {
-    				try {
-	    				logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
-			    		Files.delete(tempLocationPath);
-				    } catch (IOException ex) {
-					    // (non-fatal - it's just a temp file.)
-    					logger.warning("Failed to delete temp file " + tempLocationPath.toString());
-	    			}				
-				}
-				if (savedSuccess) {
-					// temp dbug line
-					// System.out.println("ADDING FILE: " + fileName + "; for dataset: " +
-					// dataset.getGlobalId());
-					// Make sure the file is attached to the dataset and to the version, if this
-					// hasn't been done yet:
-					if (dataFile.getOwner() == null) {
-						dataFile.setOwner(dataset);
-
-						version.getFileMetadatas().add(dataFile.getFileMetadata());
-						dataFile.getFileMetadata().setDatasetVersion(version);
-						dataset.getFiles().add(dataFile);
-
-						if (dataFile.getFileMetadata().getCategories() != null) {
-							ListIterator<DataFileCategory> dfcIt = dataFile.getFileMetadata().getCategories()
-									.listIterator();
-
-							while (dfcIt.hasNext()) {
-								DataFileCategory dataFileCategory = dfcIt.next();
-
-								if (dataFileCategory.getDataset() == null) {
-									DataFileCategory newCategory = dataset
-											.getCategoryByName(dataFileCategory.getName());
-									if (newCategory != null) {
-										newCategory.addFileMetadata(dataFile.getFileMetadata());
-										// dataFileCategory = newCategory;
-										dfcIt.set(newCategory);
-									} else {
-										dfcIt.remove();
-									}
-								}
-							}
-						}
-					}
-				}
+                // ... and let's delete the main temp file if it exists:
+                if (tempLocationPath != null) {
+                    try {
+                        logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString());
+                        Files.delete(tempLocationPath);
+                    } catch (IOException ex) {
+                        // (non-fatal - it's just a temp file.)
+                        logger.warning("Failed to delete temp file " + tempLocationPath.toString());
+                    }
+                }
+                if (savedSuccess) {
+                    // temp dbug line
+                    // System.out.println("ADDING FILE: " + fileName + "; for dataset: " +
+                    // dataset.getGlobalId());
+                    // Make sure the file is attached to the dataset and to the version, if this
+                    // hasn't been done yet:
+                    if (dataFile.getOwner() == null) {
+                        dataFile.setOwner(dataset);
+
+                        version.getFileMetadatas().add(dataFile.getFileMetadata());
+                        dataFile.getFileMetadata().setDatasetVersion(version);
+                        dataset.getFiles().add(dataFile);
+
+                        if (dataFile.getFileMetadata().getCategories() != null) {
+                            ListIterator<DataFileCategory> dfcIt = dataFile.getFileMetadata().getCategories()
+                                    .listIterator();
+
+                            while (dfcIt.hasNext()) {
+                                DataFileCategory dataFileCategory = dfcIt.next();
+
+                                if (dataFileCategory.getDataset() == null) {
+                                    DataFileCategory newCategory = dataset
+                                            .getCategoryByName(dataFileCategory.getName());
+                                    if (newCategory != null) {
+                                        newCategory.addFileMetadata(dataFile.getFileMetadata());
+                                        // dataFileCategory = newCategory;
+                                        dfcIt.set(newCategory);
+                                    } else {
+                                        dfcIt.remove();
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
 
-				ret.add(dataFile);
-			}
-		}
+                ret.add(dataFile);
+            }
+        }
 
-		return ret;
-	}
+        return ret;
+    }
     
     public List<Path> listGeneratedTempFiles(Path tempDirectory, String baseName) {
         List<Path> generatedFiles = new ArrayList<>();

From 7fdfc65e6961fc9161501a4a7826d04325b9b137 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 3 Nov 2023 13:23:42 -0400
Subject: [PATCH 105/546] per latest feedback (#9635)

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 34 +++++++++++++------
 .../harvard/iq/dataverse/GuestbookPage.java   |  1 -
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index d79d0acfdb6..9b243f7b8c0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -754,17 +754,26 @@ public boolean isIndexedVersion() {
         if (isIndexedVersion != null) {
             return isIndexedVersion;
         }
-        // The version is SUPPOSED to be indexed if it's the latest published version, or a
-        // draft. So if none of the above is true, we return false right away:
-
-        if (!(workingVersion.isDraft() || isThisLatestReleasedVersion())) {
+        
+        // Just like on the collection page, facets on the Dataset page can be
+        // disabled instance-wide by an admin:
+        if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) {
             return isIndexedVersion = false;
         }
-
-        // ... but if it is the latest published version or a draft, we want to test
-        // and confirm that this version *has* actually been indexed and is searchable
-        // (and that solr is actually up and running!), by running a quick solr search:
-        return isIndexedVersion = isThisVersionSearchable();
+        
+        // The version is SUPPOSED to be indexed if it's the latest published version, or a
+        // draft. So if none of the above is true, we can return fasle.
+        // ... but if it is the latest published version or a draft, we want to 
+        // confirm that this version *has* actually been indexed. 
+        
+        if (workingVersion.isDraft()) {
+            return isIndexedVersion = workingVersion.getDataset().getIndexTime() != null;
+        } else if (isThisLatestReleasedVersion()) {
+            return isIndexedVersion = (workingVersion.getDataset().getIndexTime() != null)
+                    && workingVersion.getDataset().getIndexTime().after(workingVersion.getReleaseTime());
+        }
+        
+        return isIndexedVersion = false ;
     }
 
     /**
@@ -820,7 +829,11 @@ public List<FacetLabel> getFileTagsFacetLabels() {
     /**
      * Verifies that solr is running and that the version is indexed and searchable
      * @return boolean
-     */
+     * Commenting out this method for now, since we have decided it was not 
+     * necessary, to query solr just to figure out if we can query solr. We will
+     * rely solely on the latest-relesed status and the indexed timestamp from 
+     * the database for that. - L.A.
+     *
     public boolean isThisVersionSearchable() {
         // Just like on the collection page, facets on the Dataset page can be
         // disabled instance-wide by an admin:
@@ -862,6 +875,7 @@ public boolean isThisVersionSearchable() {
 
         return false;
     }
+    */
 
     /**
      * Finds the list of numeric datafile ids in the Version specified, by running
diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
index 5fe858ab636..f54b1fb6117 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
@@ -298,7 +298,6 @@ public String save() {
                 guestbook.setUsageCount(Long.valueOf(0));
                 guestbook.setEnabled(true);
                 dataverse.getGuestbooks().add(guestbook);
-                logger.info("Returned from command");
                 create = true;
             } 
             cmd = new UpdateDataverseGuestbookCommand(dataverse, guestbook, dvRequestService.getDataverseRequest());

From c1afef9db9514567f064f336d7370d6f2173f74f Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 14:06:26 -0400
Subject: [PATCH 106/546] corrected function

---
 .../V6.0.0.3__10095-guestbook-at-request2.sql | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql

diff --git a/src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql b/src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql
new file mode 100644
index 00000000000..b6157e6a782
--- /dev/null
+++ b/src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql
@@ -0,0 +1,34 @@
+-- This creates a function that ESTIMATES the size of the
+-- GuestbookResponse table (for the metrics display), instead
+-- of relying on straight "SELECT COUNT(*) ..."
+-- It uses statistics to estimate the number of guestbook entries
+-- and the fraction of them related to downloads,
+-- i.e. those that weren't created for 'AccessRequest' events.
+-- Significant potential savings for an active installation.
+-- See https://github.com/IQSS/dataverse/issues/8840 and 
+-- https://github.com/IQSS/dataverse/pull/8972 for more details
+
+CREATE OR REPLACE FUNCTION estimateGuestBookResponseTableSize()
+RETURNS bigint AS $$
+DECLARE
+  estimatedsize bigint;
+BEGIN
+  SELECT CASE WHEN relpages<10 THEN 0
+              ELSE ((reltuples / relpages)
+               * (pg_relation_size('public.guestbookresponse') / current_setting('block_size')::int))::bigint
+               * (SELECT CASE WHEN ((select count(*) from pg_stats where tablename='guestbookresponse') = 0 
+                   OR (select array_position(most_common_vals::text::text[], 'AccessRequest') 
+                       FROM pg_stats WHERE tablename='guestbookresponse' AND attname='eventtype') IS NULL) THEN 1
+                   ELSE 1 - (SELECT (most_common_freqs::text::text[])[array_position(most_common_vals::text::text[], 'AccessRequest')]::float
+                       FROM pg_stats WHERE tablename='guestbookresponse' and attname='eventtype') END)
+         END
+     FROM   pg_class
+     WHERE  oid = 'public.guestbookresponse'::regclass INTO estimatedsize;
+
+     if estimatedsize = 0 then
+     SELECT COUNT(id) FROM guestbookresponse WHERE eventtype!= 'AccessRequest' INTO estimatedsize;
+     END if;   
+
+  RETURN estimatedsize;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE;

From cbf9774a2a10c014a02296799faef3868db857ff Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 3 Nov 2023 16:03:06 -0400
Subject: [PATCH 107/546] indextime stamps are not touched when Drafts are
 indexed. #9635

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 27 ++++++++++++++-----
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 9b243f7b8c0..8040db7e3f6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -762,18 +762,21 @@ public boolean isIndexedVersion() {
         }
         
         // The version is SUPPOSED to be indexed if it's the latest published version, or a
-        // draft. So if none of the above is true, we can return fasle.
-        // ... but if it is the latest published version or a draft, we want to 
-        // confirm that this version *has* actually been indexed. 
+        // draft. So if none of the above is true, we can return false right away. 
+        if (!(workingVersion.isDraft() || isThisLatestReleasedVersion())) {
+            return isIndexedVersion = false;
+        }
+        // If this is the latest published version, we want to confirm that this 
+        // version was successfully indexed after the last publication 
         
-        if (workingVersion.isDraft()) {
-            return isIndexedVersion = workingVersion.getDataset().getIndexTime() != null;
-        } else if (isThisLatestReleasedVersion()) {
+        if (isThisLatestReleasedVersion()) {
             return isIndexedVersion = (workingVersion.getDataset().getIndexTime() != null)
                     && workingVersion.getDataset().getIndexTime().after(workingVersion.getReleaseTime());
         }
         
-        return isIndexedVersion = false ;
+        // Drafts don't have the indextime stamps set/incremented when indexed, 
+        // so we'll just assume it is indexed, and will then hope for the best.
+        return isIndexedVersion = true;
     }
 
     /**
@@ -987,10 +990,19 @@ public Set<Long> getFileIdsInVersionFromSolr(Long datasetVersionId, String patte
             logger.fine("Remote Solr Exception: " + ex.getLocalizedMessage());
             String msg = ex.getLocalizedMessage();
             if (msg.contains(SearchFields.FILE_DELETED)) {
+                // This is a backward compatibility hook put in place many versions
+                // ago, to accommodate instances running Solr with schemas that 
+                // don't include this flag yet. Running Solr with an up-to-date
+                // schema has been a hard requirement for a while now; should we 
+                // remove it at this point? - L.A. 
                 fileDeletedFlagNotIndexed = true;
+            } else {
+                isIndexedVersion = false;
+                return resultIds;
             }
         } catch (Exception ex) {
             logger.warning("Solr exception: " + ex.getLocalizedMessage());
+            isIndexedVersion = false; 
             return resultIds;
         }
 
@@ -1003,6 +1015,7 @@ public Set<Long> getFileIdsInVersionFromSolr(Long datasetVersionId, String patte
                 queryResponse = solrClientService.getSolrClient().query(solrQuery);
             } catch (Exception ex) {
                 logger.warning("Caught a Solr exception (again!): " + ex.getLocalizedMessage());
+                isIndexedVersion = false; 
                 return resultIds;
             }
         }

From 94b08c9f01fce8ac0cd9e77c2a38921c71257152 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 16:21:13 -0400
Subject: [PATCH 108/546] add user/name to privateURL token, refactor methods

---
 .../iq/dataverse/FileDownloadServiceBean.java | 25 +----------
 .../edu/harvard/iq/dataverse/FilePage.java    |  2 +-
 .../edu/harvard/iq/dataverse/api/Files.java   |  6 +--
 .../AuthenticationServiceBean.java            | 42 ++++++++++++++++++-
 4 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
index 55817d4a746..7a03f1a35dc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
@@ -4,7 +4,6 @@
 import edu.harvard.iq.dataverse.authorization.Permission;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
-import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
 import edu.harvard.iq.dataverse.authorization.users.User;
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
@@ -16,8 +15,6 @@
 import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler;
 import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean;
 import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry;
-import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
-import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.FileUtil;
@@ -75,8 +72,6 @@ public class FileDownloadServiceBean implements java.io.Serializable {
     @EJB
     AuthenticationServiceBean authService;
     @EJB
-    PrivateUrlServiceBean privateUrlService;
-    @EJB
     SettingsServiceBean settingsService;
     @EJB
     MailServiceBean mailService;
@@ -352,7 +347,7 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter
         User user = session.getUser();
         DatasetVersion version = fmd.getDatasetVersion();
         if (version.isDraft() || fmd.getDatasetVersion().isDeaccessioned() || (fmd.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fmd))) {
-            apiToken = getApiToken(user);
+            apiToken = authService.getValidApiTokenForUser(user);
         }
         DataFile dataFile = null;
         if (fmd != null) {
@@ -379,24 +374,6 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter
         }
     }
 
-    public ApiToken getApiToken(User user) {
-        ApiToken apiToken = null;
-        if (user instanceof AuthenticatedUser) {
-            AuthenticatedUser authenticatedUser = (AuthenticatedUser) user;
-            apiToken = authService.findApiTokenByUser(authenticatedUser);
-            if (apiToken == null || apiToken.isExpired()) {
-                //No un-expired token
-                apiToken = authService.generateApiTokenForUser(authenticatedUser);
-            }
-        } else if (user instanceof PrivateUrlUser) {
-            PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
-            PrivateUrl privateUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
-            apiToken = new ApiToken();
-            apiToken.setTokenString(privateUrl.getToken());
-        }
-        return apiToken;
-    }
-
     public void downloadDatasetCitationXML(Dataset dataset) {
         downloadCitationXML(null, dataset, false);
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java
index bfae80ade27..479c8a429c6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java
@@ -1069,7 +1069,7 @@ public String preview(ExternalTool externalTool) {
         ApiToken apiToken = null;
         User user = session.getUser();
         if (fileMetadata.getDatasetVersion().isDraft() || fileMetadata.getDatasetVersion().isDeaccessioned() || (fileMetadata.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fileMetadata))) {
-            apiToken=fileDownloadService.getApiToken(user);
+            apiToken=authService.getValidApiTokenForUser(user);
         }
         if(externalTool == null){
             return "";
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
index ad24d81d996..d4fc92f912d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -814,10 +814,8 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P
             return error(BAD_REQUEST, "External tool does not have file scope.");
         }
         ApiToken apiToken = null;
-        User u = getRequestUser(crc);
-        if (u instanceof AuthenticatedUser) {
-            apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u);
-        }
+        User user = getRequestUser(crc);
+        apiToken = authSvc.getValidApiTokenForUser(user);
         FileMetadata target = fileSvc.findFileMetadata(fmid);
         if (target == null) {
             return error(BAD_REQUEST, "FileMetadata not found.");
diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
index 496620cd6e8..848e57bc6b0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
@@ -21,10 +21,14 @@
 import edu.harvard.iq.dataverse.authorization.providers.shib.ShibAuthenticationProvider;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
+import edu.harvard.iq.dataverse.authorization.users.User;
 import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailData;
 import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailServiceBean;
 import edu.harvard.iq.dataverse.passwordreset.PasswordResetData;
 import edu.harvard.iq.dataverse.passwordreset.PasswordResetServiceBean;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
 import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean;
@@ -118,6 +122,9 @@ public class AuthenticationServiceBean {
     @EJB
     SavedSearchServiceBean savedSearchService;
 
+    @EJB
+    PrivateUrlServiceBean privateUrlService;
+ 
     @PersistenceContext(unitName = "VDCNet-ejbPU")
     private EntityManager em;
         
@@ -931,14 +938,45 @@ public List <WorkflowComment> getWorkflowCommentsByAuthenticatedUser(Authenticat
         return query.getResultList();
     }
 
-    public ApiToken getValidApiTokenForUser(AuthenticatedUser user) {
+    /**
+     * This method gets a valid api token for an AuthenticatedUser, creating a new
+     * token if one doesn't exist or if the token is expired.
+     * 
+     * @param user
+     * @return
+     */
+    public ApiToken getValidApiTokenForAuthenticatedUser(AuthenticatedUser user) {
         ApiToken apiToken = null;
         apiToken = findApiTokenByUser(user);
-        if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) {
+        if ((apiToken == null) || apiToken.isExpired()) {
             logger.fine("Created apiToken for user: " + user.getIdentifier());
             apiToken = generateApiTokenForUser(user);
         }
         return apiToken;
     }
 
+    /**
+     *  Gets a token for an AuthenticatedUser or a PrivateUrlUser. It will create a
+     *  new token if needed for an AuthenticatedUser. Note that, for a PrivateUrlUser, this method creates a token
+     *  with a temporary AuthenticateUser that only has a userIdentifier - needed in generating signed Urls.
+     * @param user
+     * @return a token or null (i.e. if the user is not an AuthenticatedUser or PrivateUrlUser)
+     */
+
+    public ApiToken getValidApiTokenForUser(User user) {
+        ApiToken apiToken = null;
+        if (user instanceof AuthenticatedUser) {
+            apiToken = getValidApiTokenForAuthenticatedUser((AuthenticatedUser) user);
+        } else if (user instanceof PrivateUrlUser) {
+            PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
+            
+            PrivateUrl privateUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
+            apiToken = new ApiToken();
+            apiToken.setTokenString(privateUrl.getToken());
+            AuthenticatedUser au = new AuthenticatedUser();
+            au.setUserIdentifier(privateUrlUser.getIdentifier());
+            apiToken.setAuthenticatedUser(au);
+        }
+        return apiToken;
+    }
 }

From b4cf71e6b6a8970fed5faaaec2c258a2e24865e9 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 16:22:27 -0400
Subject: [PATCH 109/546] Fix potential infinite loop

---
 .../iq/dataverse/authorization/AuthenticationServiceBean.java    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
index 848e57bc6b0..1c0f5010059 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
@@ -622,6 +622,7 @@ public AuthenticatedUser createAuthenticatedUser(UserRecordIdentifier userRecord
             String identifier = internalUserIdentifier + i;
             while ( identifierExists(identifier) ) {
                 i += 1;
+                identifier = internalUserIdentifier + i;
             }
             authenticatedUser.setUserIdentifier(identifier);
         } else {

From a7e4a7870ee85c3ba4d09396eb3a801b316f2c4c Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 16:24:45 -0400
Subject: [PATCH 110/546] use authSvc method to handle both auth and privateUrl
 cases

and get valid token if expired/missing (#10045)
---
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 292aba0cee3..cbc0bcda6ac 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3930,10 +3930,7 @@ public Response getExternalToolDVParams(@Context ContainerRequestContext crc,
             }
             ApiToken apiToken = null;
             User u = getRequestUser(crc);
-            if (u instanceof AuthenticatedUser) {
-                apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u);
-            }
-            
+            apiToken = authSvc.getValidApiTokenForUser(u);
 
             ExternalToolHandler eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale);
             return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters()))));

From 264448e98028f21dd9f61ec5f05e344044298315 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 16:28:38 -0400
Subject: [PATCH 111/546] handle urls signed for a privateUrlUser

---
 .../api/auth/SignedUrlAuthMechanism.java      | 34 +++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java
index f8572144236..e2c2f2381d8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java
@@ -3,7 +3,10 @@
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
 import edu.harvard.iq.dataverse.authorization.users.User;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.UrlSignerUtil;
 
@@ -27,16 +30,18 @@ public class SignedUrlAuthMechanism implements AuthMechanism {
 
     @Inject
     protected AuthenticationServiceBean authSvc;
-
+    @Inject
+    protected PrivateUrlServiceBean privateUrlSvc;
+    
     @Override
     public User findUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse {
         String signedUrlRequestParameter = getSignedUrlRequestParameter(containerRequestContext);
         if (signedUrlRequestParameter == null) {
             return null;
         }
-        AuthenticatedUser authUser = getAuthenticatedUserFromSignedUrl(containerRequestContext);
-        if (authUser != null) {
-            return authUser;
+        User user = getAuthenticatedUserFromSignedUrl(containerRequestContext);
+        if (user != null) {
+            return user;
         }
         throw new WrappedAuthErrorResponse(RESPONSE_MESSAGE_BAD_SIGNED_URL);
     }
@@ -45,8 +50,8 @@ private String getSignedUrlRequestParameter(ContainerRequestContext containerReq
         return containerRequestContext.getUriInfo().getQueryParameters().getFirst(SIGNED_URL_TOKEN);
     }
 
-    private AuthenticatedUser getAuthenticatedUserFromSignedUrl(ContainerRequestContext containerRequestContext) {
-        AuthenticatedUser authUser = null;
+    private User getAuthenticatedUserFromSignedUrl(ContainerRequestContext containerRequestContext) {
+        User user = null;
         // The signedUrl contains a param telling which user this is supposed to be for.
         // We don't trust this. So we lookup that user, and get their API key, and use
         // that as a secret in validating the signedURL. If the signature can't be
@@ -54,17 +59,26 @@ private AuthenticatedUser getAuthenticatedUserFromSignedUrl(ContainerRequestCont
         // we reject the request.
         UriInfo uriInfo = containerRequestContext.getUriInfo();
         String userId = uriInfo.getQueryParameters().getFirst(SIGNED_URL_USER);
-        AuthenticatedUser targetUser = authSvc.getAuthenticatedUser(userId);
-        ApiToken userApiToken = authSvc.findApiTokenByUser(targetUser);
+        User targetUser = null; 
+        ApiToken userApiToken = null;
+        if(!userId.startsWith(PrivateUrlUser.PREFIX)) {
+        targetUser = authSvc.getAuthenticatedUser(userId);
+        userApiToken = authSvc.findApiTokenByUser((AuthenticatedUser)targetUser);
+        } else {
+            PrivateUrl privateUrl = privateUrlSvc.getPrivateUrlFromDatasetId(Long.parseLong(userId.substring(PrivateUrlUser.PREFIX.length())));
+            userApiToken = new ApiToken();
+            userApiToken.setTokenString(privateUrl.getToken());
+            targetUser = privateUrlSvc.getPrivateUrlUserFromToken(privateUrl.getToken());
+        }
         if (targetUser != null && userApiToken != null) {
             String signedUrl = URLDecoder.decode(uriInfo.getRequestUri().toString(), StandardCharsets.UTF_8);
             String requestMethod = containerRequestContext.getMethod();
             String signedUrlSigningKey = JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + userApiToken.getTokenString();
             boolean isSignedUrlValid = UrlSignerUtil.isValidUrl(signedUrl, userId, requestMethod, signedUrlSigningKey);
             if (isSignedUrlValid) {
-                authUser = targetUser;
+                user = targetUser;
             }
         }
-        return authUser;
+        return user;
     }
 }

From af490cdcb1b2b7ce713f67e93fe98028e596e9f2 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 16:31:42 -0400
Subject: [PATCH 112/546] use ! instead of # to avoid encoding issues in
 signedurls

---
 .../iq/dataverse/authorization/users/PrivateUrlUser.java        | 2 +-
 .../db/migration/V6.0.0.3__10093-privateurluser_id_update.sql   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 src/main/resources/db/migration/V6.0.0.3__10093-privateurluser_id_update.sql

diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java
index f64b5c301e7..03f018221fd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java
@@ -12,7 +12,7 @@
  */
 public class PrivateUrlUser implements User {
 
-    public static final String PREFIX = "#";
+    public static final String PREFIX = "!";
 
     /**
      * In the future, this could probably be dvObjectId rather than datasetId,
diff --git a/src/main/resources/db/migration/V6.0.0.3__10093-privateurluser_id_update.sql b/src/main/resources/db/migration/V6.0.0.3__10093-privateurluser_id_update.sql
new file mode 100644
index 00000000000..260f191f557
--- /dev/null
+++ b/src/main/resources/db/migration/V6.0.0.3__10093-privateurluser_id_update.sql
@@ -0,0 +1 @@
+ update roleassignment set assigneeidentifier=replace(assigneeidentifier, '#','!') where assigneeidentifier like '#%';
\ No newline at end of file

From 9f04e07b451e5705b69c2707e49420338fc084da Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 16:37:32 -0400
Subject: [PATCH 113/546] fine logging

---
 .../harvard/iq/dataverse/externaltools/ExternalToolHandler.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
index cdde9fbe0e8..2fc4df808bf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
+++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
@@ -141,7 +141,7 @@ public String handleRequest(boolean preview) {
             if (requestMethod.equals(HttpMethod.POST)) {
                 String body = JsonUtil.prettyPrint(createPostBody(params).build());
                 try {
-                    logger.info("POST Body: " + body);
+                    logger.fine("POST Body: " + body);
                     return postFormData(body);
                 } catch (IOException | InterruptedException ex) {
                     Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex);

From 204487d04879be88a1566dd454c5130e6d86bb8c Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 16:51:28 -0400
Subject: [PATCH 114/546] use authSvc methods, #10045 fix

---
 .../java/edu/harvard/iq/dataverse/DatasetPage.java   | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index fc18257196d..85a0277d39f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -5910,14 +5910,7 @@ public void setFolderPresort(boolean folderPresort) {
     public void explore(ExternalTool externalTool) {
         ApiToken apiToken = null;
         User user = session.getUser();
-        if (user instanceof AuthenticatedUser) {
-            apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
-        } else if (user instanceof PrivateUrlUser) {
-            PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
-            PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
-            apiToken = new ApiToken();
-            apiToken.setTokenString(privUrl.getToken());
-        }
+        apiToken = authService.getValidApiTokenForUser(user);
         ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataset, apiToken, session.getLocaleCode());
         PrimeFaces.current().executeScript(externalToolHandler.getExploreScript());
     }
@@ -5925,8 +5918,9 @@ public void explore(ExternalTool externalTool) {
     public void configure(ExternalTool externalTool) {
         ApiToken apiToken = null;
         User user = session.getUser();
+        //Not enabled for PrivateUrlUsers (who wouldn't have write permissions anyway)
         if (user instanceof AuthenticatedUser) {
-            apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
+            apiToken = authService.getValidApiTokenForAuthenticatedUser((AuthenticatedUser) user);
         }
         ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataset, apiToken, session.getLocaleCode());
         PrimeFaces.current().executeScript(externalToolHandler.getConfigureScript());

From e08f5d79bdcf1351967d90bd832cf06ed5803364 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 17:35:47 -0400
Subject: [PATCH 115/546] release note

---
 doc/release-notes/10093-signedUrl_improvements.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 doc/release-notes/10093-signedUrl_improvements.md

diff --git a/doc/release-notes/10093-signedUrl_improvements.md b/doc/release-notes/10093-signedUrl_improvements.md
new file mode 100644
index 00000000000..8f6ae89f981
--- /dev/null
+++ b/doc/release-notes/10093-signedUrl_improvements.md
@@ -0,0 +1,5 @@
+A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended.
+
+SignedUrls can now be used with PrivateUrl access, i.e. allowing PrivateUrl users to view Previews when they are configured to use signedUrls
+
+Launching a Dataset-level Configuration tool will automatically generate an api key when needed. This is consistent with how other types of tools work.

From cb5848ebf3729b5f86e2bca5b349b42080babf9e Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 17:41:16 -0400
Subject: [PATCH 116/546] doc update

---
 doc/sphinx-guides/source/api/external-tools.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst
index c72b52637c7..d802bc8714a 100644
--- a/doc/sphinx-guides/source/api/external-tools.rst
+++ b/doc/sphinx-guides/source/api/external-tools.rst
@@ -187,6 +187,7 @@ Using Example Manifests to Get Started
 ++++++++++++++++++++++++++++++++++++++
 
 Again, you can use :download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` or :download:`dynamicDatasetTool.json <../_static/installation/files/root/external-tools/dynamicDatasetTool.json>` as a starting point for your own manifest file.
+Additional working examples, including ones using signedUrls, are available at https://github.com/gdcc/dataverse-previewers .
 
 Testing Your External Tool
 --------------------------

From c208d04f1d5c7a325bcbf4628b550d9b58d713be Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 17:48:32 -0400
Subject: [PATCH 117/546] test updates

---
 .../harvard/iq/dataverse/privateurl/PrivateUrlUtilTest.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtilTest.java
index 8c9e0261bfa..da94b288bee 100644
--- a/src/test/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtilTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtilTest.java
@@ -102,7 +102,7 @@ public void testGetDatasetFromRoleAssignmentSuccess() {
         RoleAssignment ra = this.createTestRoleAssignment(dataset);
 
         assertNotNull(PrivateUrlUtil.getDatasetFromRoleAssignment(ra));
-        assertEquals("#42", ra.getAssigneeIdentifier());
+        assertEquals(PrivateUrlUser.PREFIX + "42", ra.getAssigneeIdentifier());
     }
 
     @Test
@@ -137,7 +137,7 @@ public void testGetDraftDatasetVersionFromRoleAssignmentSuccess() {
 
         DatasetVersion datasetVersionOut = PrivateUrlUtil.getDraftDatasetVersionFromRoleAssignment(ra);
         assertNotNull(datasetVersionOut);
-        assertEquals("#42", ra.getAssigneeIdentifier());
+        assertEquals(PrivateUrlUser.PREFIX + "42", ra.getAssigneeIdentifier());
     }
 
     @Test

From 98afbaf7057fa2f84cfa9fee9ac906a5eac4d48f Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 3 Nov 2023 17:52:42 -0400
Subject: [PATCH 118/546] another test

---
 .../edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java
index 1d054040e84..88f6a5bdbce 100644
--- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java
@@ -114,7 +114,7 @@ public void testJson_RoleAssignment() {
         JsonObjectBuilder job = JsonPrinter.json(ra);
         assertNotNull(job);
         JsonObject jsonObject = job.build();
-        assertEquals("#42", jsonObject.getString("assignee"));
+        assertEquals(PrivateUrlUser.PREFIX + "42", jsonObject.getString("assignee"));
         assertEquals(123, jsonObject.getInt("definitionPointId"));
         assertEquals("e1d53cf6-794a-457a-9709-7c07629a8267", jsonObject.getString("privateUrlToken"));
     }
@@ -135,7 +135,7 @@ public void testJson_PrivateUrl() {
         assertEquals("e1d53cf6-794a-457a-9709-7c07629a8267", jsonObject.getString("token"));
         assertEquals("https://dataverse.example.edu/privateurl.xhtml?token=e1d53cf6-794a-457a-9709-7c07629a8267", jsonObject.getString("link"));
         assertEquals("e1d53cf6-794a-457a-9709-7c07629a8267", jsonObject.getJsonObject("roleAssignment").getString("privateUrlToken"));
-        assertEquals("#42", jsonObject.getJsonObject("roleAssignment").getString("assignee"));
+        assertEquals(PrivateUrlUser.PREFIX + "42", jsonObject.getJsonObject("roleAssignment").getString("assignee"));
     }
 
     @Test

From f2eb91e8f7d4776d891684ff9a0dd05f71617860 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Sat, 4 Nov 2023 17:22:36 -0400
Subject: [PATCH 119/546] try longer lock wait

---
 src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index e3a7fd0cfc3..1414dd32864 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -62,7 +62,7 @@ public class UtilIT {
     private static final String BUILTIN_USER_KEY = "burrito";
     private static final String EMPTY_STRING = "";
     public static final int MAXIMUM_INGEST_LOCK_DURATION = 15;
-    public static final int MAXIMUM_PUBLISH_LOCK_DURATION = 15;
+    public static final int MAXIMUM_PUBLISH_LOCK_DURATION = 20;
     public static final int MAXIMUM_IMPORT_DURATION = 1;
 
     private static SwordConfigurationImpl swordConfiguration = new SwordConfigurationImpl();

From d074c257c845678c75690e067a68d26daab0993d Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Mon, 6 Nov 2023 09:15:54 -0500
Subject: [PATCH 120/546] use prefixes in RoleAssigneeBean

---
 .../edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java
index 059d5a8ffd3..5429f5952dd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java
@@ -99,15 +99,15 @@ public RoleAssignee getRoleAssignee(String identifier, Boolean augmented) {
         switch (identifier.charAt(0)) {
             case ':':
                 return predefinedRoleAssignees.get(identifier);
-            case '@':
+            case AuthenticatedUser.IDENTIFIER_PREFIX:
                 if (!augmented){
                     return authSvc.getAuthenticatedUser(identifier.substring(1));
                 } else {
                     return authSvc.getAuthenticatedUserWithProvider(identifier.substring(1));
-                }                
-            case '&':
+                }
+            case Group.IDENTIFIER_PREFIX:
                 return groupSvc.getGroup(identifier.substring(1));
-            case '#':
+            case PrivateUrlUser.PREFIX:
                 return PrivateUrlUtil.identifier2roleAssignee(identifier);
             default:
                 throw new IllegalArgumentException("Unsupported assignee identifier '" + identifier + "'");

From 780a0d5ae70bbd5e83ce843b36800304dbb46a0e Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Mon, 6 Nov 2023 09:24:27 -0500
Subject: [PATCH 121/546] use string, add missing import

---
 .../edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java
index 5429f5952dd..88acc1916cf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java
@@ -11,6 +11,7 @@
 import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupServiceBean;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.authorization.users.GuestUser;
+import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
 import edu.harvard.iq.dataverse.mydata.MyDataFilterParams;
 import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil;
@@ -96,8 +97,8 @@ public RoleAssignee getRoleAssignee(String identifier, Boolean augmented) {
         if (identifier == null || identifier.isEmpty()) {
             throw new IllegalArgumentException("Identifier cannot be null or empty string.");
         }
-        switch (identifier.charAt(0)) {
-            case ':':
+        switch (identifier.substring(0,1)) {
+            case ":":
                 return predefinedRoleAssignees.get(identifier);
             case AuthenticatedUser.IDENTIFIER_PREFIX:
                 if (!augmented){

From b8bc348636793d79e48830df9f8746a5c77d5b08 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 7 Nov 2023 10:19:36 -0500
Subject: [PATCH 122/546] stub out QA guide #10101

---
 doc/sphinx-guides/source/index.rst    |  1 +
 doc/sphinx-guides/source/qa/index.rst |  8 ++++++++
 doc/sphinx-guides/source/qa/intro.rst | 12 ++++++++++++
 3 files changed, 21 insertions(+)
 create mode 100755 doc/sphinx-guides/source/qa/index.rst
 create mode 100755 doc/sphinx-guides/source/qa/intro.rst

diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst
index e4eeea9b6d0..9d3d49ef4f2 100755
--- a/doc/sphinx-guides/source/index.rst
+++ b/doc/sphinx-guides/source/index.rst
@@ -20,6 +20,7 @@ These documentation guides are for the |version| version of Dataverse. To find g
   developers/index
   container/index
   style/index
+  qa/index
 
 How the Guides Are Organized
 ----------------------------
diff --git a/doc/sphinx-guides/source/qa/index.rst b/doc/sphinx-guides/source/qa/index.rst
new file mode 100755
index 00000000000..40b8e2e1492
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/index.rst
@@ -0,0 +1,8 @@
+QA Guide
+========
+
+**Contents:**
+
+.. toctree::
+
+   intro
diff --git a/doc/sphinx-guides/source/qa/intro.rst b/doc/sphinx-guides/source/qa/intro.rst
new file mode 100755
index 00000000000..12056bbeb91
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/intro.rst
@@ -0,0 +1,12 @@
+Introduction
+============
+
+This is the QA Guide for Dataverse.
+
+.. contents:: |toctitle|
+	:local:
+
+Intended Audience
+-----------------
+
+This guide is intended primarily for members of the Dataverse core team who are performing QA on pull requests. That said, the entire community is welcome to read and contribute back to what is written here.

From f81bf27011d17e5a31d7cfb230bd8b19f95ddc35 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 7 Nov 2023 15:53:13 -0500
Subject: [PATCH 123/546] Add initial load for the pages of the guide

---
 .../source/qa/checklist-qa-pr.rst             |  2 ++
 .../source/qa/checklist-qa-release.rst        |  2 ++
 .../source/qa/deploying-jenkins.rst           |  2 ++
 doc/sphinx-guides/source/qa/index.rst         | 21 ++++++++++++++-
 doc/sphinx-guides/source/qa/intro.rst         | 12 ---------
 .../source/qa/manual-testing.rst              |  2 ++
 .../source/qa/other-approaches.rst            |  2 ++
 doc/sphinx-guides/source/qa/overview.rst      | 27 +++++++++++++++++++
 .../source/qa/performance-tests.rst           |  2 ++
 .../source/qa/test-automation-integration.rst |  2 ++
 .../source/qa/testing-infrastructure.rst      |  2 ++
 doc/sphinx-guides/source/qa/tips-tricks.rst   |  2 ++
 .../source/qa/workflow-qa-pr.rst              |  7 +++++
 13 files changed, 72 insertions(+), 13 deletions(-)
 create mode 100644 doc/sphinx-guides/source/qa/checklist-qa-pr.rst
 create mode 100644 doc/sphinx-guides/source/qa/checklist-qa-release.rst
 create mode 100644 doc/sphinx-guides/source/qa/deploying-jenkins.rst
 delete mode 100755 doc/sphinx-guides/source/qa/intro.rst
 create mode 100644 doc/sphinx-guides/source/qa/manual-testing.rst
 create mode 100644 doc/sphinx-guides/source/qa/other-approaches.rst
 create mode 100644 doc/sphinx-guides/source/qa/overview.rst
 create mode 100644 doc/sphinx-guides/source/qa/performance-tests.rst
 create mode 100644 doc/sphinx-guides/source/qa/test-automation-integration.rst
 create mode 100644 doc/sphinx-guides/source/qa/testing-infrastructure.rst
 create mode 100644 doc/sphinx-guides/source/qa/tips-tricks.rst
 create mode 100644 doc/sphinx-guides/source/qa/workflow-qa-pr.rst

diff --git a/doc/sphinx-guides/source/qa/checklist-qa-pr.rst b/doc/sphinx-guides/source/qa/checklist-qa-pr.rst
new file mode 100644
index 00000000000..df60f4260fc
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/checklist-qa-pr.rst
@@ -0,0 +1,2 @@
+Checklist for QA on a PR
+========================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/checklist-qa-release.rst b/doc/sphinx-guides/source/qa/checklist-qa-release.rst
new file mode 100644
index 00000000000..34419fee8ae
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/checklist-qa-release.rst
@@ -0,0 +1,2 @@
+Checklist for QA on release
+===========================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/deploying-jenkins.rst b/doc/sphinx-guides/source/qa/deploying-jenkins.rst
new file mode 100644
index 00000000000..b15a6f88534
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/deploying-jenkins.rst
@@ -0,0 +1,2 @@
+Building and deploying a PR from Jenkins to dataverse-internal
+==============================================================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/index.rst b/doc/sphinx-guides/source/qa/index.rst
index 40b8e2e1492..019f5cdbd5c 100755
--- a/doc/sphinx-guides/source/qa/index.rst
+++ b/doc/sphinx-guides/source/qa/index.rst
@@ -5,4 +5,23 @@ QA Guide
 
 .. toctree::
 
-   intro
+   overview
+   testing-infrastructure
+   performance-tests
+   manual-testing
+   test-automation-integration
+   deploying-jenkins
+   other-approaches
+   tips-tricks
+   workflow-qa-pr
+   checklist-qa-pr
+   checklist-qa-release
+
+
+
+
+
+
+
+
+
diff --git a/doc/sphinx-guides/source/qa/intro.rst b/doc/sphinx-guides/source/qa/intro.rst
deleted file mode 100755
index 12056bbeb91..00000000000
--- a/doc/sphinx-guides/source/qa/intro.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-Introduction
-============
-
-This is the QA Guide for Dataverse.
-
-.. contents:: |toctitle|
-	:local:
-
-Intended Audience
------------------
-
-This guide is intended primarily for members of the Dataverse core team who are performing QA on pull requests. That said, the entire community is welcome to read and contribute back to what is written here.
diff --git a/doc/sphinx-guides/source/qa/manual-testing.rst b/doc/sphinx-guides/source/qa/manual-testing.rst
new file mode 100644
index 00000000000..5c3393af546
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/manual-testing.rst
@@ -0,0 +1,2 @@
+Manual testing approach
+=======================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/other-approaches.rst b/doc/sphinx-guides/source/qa/other-approaches.rst
new file mode 100644
index 00000000000..420d40fa09c
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/other-approaches.rst
@@ -0,0 +1,2 @@
+Other approaches to deploying and testing
+=========================================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/overview.rst b/doc/sphinx-guides/source/qa/overview.rst
new file mode 100644
index 00000000000..d9e21a2f0ab
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/overview.rst
@@ -0,0 +1,27 @@
+Overview
+========
+
+.. contents:: |toctitle|
+    :local:
+
+
+What is an API?
+---------------
+
+This document describes the testing process used by QA at IQSS and provides a guide for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community.
+
+The basic workflow is bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub Kanban board. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common develop branch and ultimately released as part of the product. Before a pull request is merged it must be reviewed by a member of the development team from a coding perspective, it must pass automated integration tests before moving to QA. There it is tested manually, exercising the UI using three common browser types and any business logic it implements.  Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions are tested. Once these pass and any bugs found corrected, the automated integration tests are confirmed to be passing, the pr is merged into develop, the pr closed, and the branch deleted. At this point the pr moves from the QA column automatically into the Done column and the process repeats with the next pr until it is decided to make a release.
+
+A release likely spans multiple two week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time.
+
+The decision to make a release can be based on time since last release, some important feature needed by the community or contractual deadline or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub.
+
+The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time consuming it is done once near the end. Using a load generating tool named Locust, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50 user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds (I believe), it is not a real world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
+
+Once performance has been tested and recorded in a google spreadsheet for this proposed version, the release will be prepared and posted.
+
+Preparing the release consists of writing and reviewing the release notes compiled from individual notes in prs that have been merged for this release. A pr is made for the notes and merged. Next, increment the version numbers in certain code files,  produce a pr with those changes and merge that into the common develop branch. Last, a pr is made to merge develop into the master branch. Once that is merged a guides build with the new release version is made from the master branch. Last, a release war file is built from master and an installer is built from the master branch and includes the newly built war file. 
+
+Publishing the release consists of creating a new draft release on Github, posting the release notes, uploading the .war file and the installer .zip file and any ancillary files used to configure this release. The latest link for the guides should be updated on the guides server to point to the newest version. Once that is all in place, specify the version name and the master branch at the top of the GitHub draft release and publish. This will tag the master branch with the version number and make the release notes and files available to the public.
+
+Once released, post to dv general about the release and when possible, deploy to demo and production.
diff --git a/doc/sphinx-guides/source/qa/performance-tests.rst b/doc/sphinx-guides/source/qa/performance-tests.rst
new file mode 100644
index 00000000000..c3df2dc7951
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/performance-tests.rst
@@ -0,0 +1,2 @@
+Checklist for QA Release
+========================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/test-automation-integration.rst b/doc/sphinx-guides/source/qa/test-automation-integration.rst
new file mode 100644
index 00000000000..c3df2dc7951
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/test-automation-integration.rst
@@ -0,0 +1,2 @@
+Checklist for QA Release
+========================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.rst b/doc/sphinx-guides/source/qa/testing-infrastructure.rst
new file mode 100644
index 00000000000..5b3de602bc7
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/testing-infrastructure.rst
@@ -0,0 +1,2 @@
+Testing Infrastructure
+======================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/tips-tricks.rst b/doc/sphinx-guides/source/qa/tips-tricks.rst
new file mode 100644
index 00000000000..738f701d33b
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/tips-tricks.rst
@@ -0,0 +1,2 @@
+Tips and tricks
+===============
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/workflow-qa-pr.rst b/doc/sphinx-guides/source/qa/workflow-qa-pr.rst
new file mode 100644
index 00000000000..b2fae01da68
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/workflow-qa-pr.rst
@@ -0,0 +1,7 @@
+Workflow for completing on a PR
+===============================
+
+.. contents:: |toctitle|
+    :local:
+
+1.	Assign the PR you are working on to yourself.
\ No newline at end of file

From 1093892e2fdeb60ec4d21e9c86ac34332decee18 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 7 Nov 2023 16:13:08 -0500
Subject: [PATCH 124/546] Change to introduction title on overview

---
 doc/sphinx-guides/source/qa/overview.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/qa/overview.rst b/doc/sphinx-guides/source/qa/overview.rst
index d9e21a2f0ab..9059fa87c5e 100644
--- a/doc/sphinx-guides/source/qa/overview.rst
+++ b/doc/sphinx-guides/source/qa/overview.rst
@@ -5,8 +5,8 @@ Overview
     :local:
 
 
-What is an API?
----------------
+Introduction
+------------
 
 This document describes the testing process used by QA at IQSS and provides a guide for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community.
 

From 0e01b4eaf4276da01fd28204244ff57917861999 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 7 Nov 2023 16:57:19 -0500
Subject: [PATCH 125/546] Add content to "performance testing" and "manual
 testing" and fix the header on the other pages.

---
 .../source/qa/manual-testing.rst              | 43 ++++++++++++++++++-
 doc/sphinx-guides/source/qa/overview.rst      | 24 +++++++----
 .../source/qa/performance-tests.rst           | 25 ++++++++++-
 .../source/qa/test-automation-integration.rst |  7 ++-
 .../source/qa/testing-infrastructure.rst      | 18 +++++++-
 5 files changed, 101 insertions(+), 16 deletions(-)

diff --git a/doc/sphinx-guides/source/qa/manual-testing.rst b/doc/sphinx-guides/source/qa/manual-testing.rst
index 5c3393af546..27ee40d849e 100644
--- a/doc/sphinx-guides/source/qa/manual-testing.rst
+++ b/doc/sphinx-guides/source/qa/manual-testing.rst
@@ -1,2 +1,43 @@
 Manual testing approach
-=======================
\ No newline at end of file
+=======================
+
+.. contents:: |toctitle|
+    :local:
+
+Introduction
+------------
+We use a risk-based, manual testing approach to achieve the most benefit with limited resources. This means we want to catch bugs where they are likely to exist, ensure core functions work, and failures do not have catastrophic results. In practice this means we do a brief positive check of core functions on each build called a smoke test, we test the most likely place for new bugs to exist, the area where things have changed, and attempt to prevent catastrophic failure by asking about the scope and reach of the code and how failures may occur. 
+
+If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by the Design group and by the community.
+
+Examining a pull request for test cases:
+----------------------------------------
+What does it do? What problem does it solve?
+++++++++++++++++++++++++++++++++++++++++++++
+Read the top part of the pull request for a description, notes for reviewers, and usually a how-to test section. Does it make sense? If not, read the underlying ticket it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it.
+How is it configured?
++++++++++++++++++++++
+Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. An admin will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! 
+
+Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields are added, try the export feature. Of course, try the suggestions under how to test. Those may be sufficient, but you should always think about it based on what it does.
+
+Try adding, modifying, and deleting any objects involved. This is probably covered by using the feature but a good basic approach to keep in mind.
+
+Make sure any server logging is appropriate. You should tail the server log while running your tests. Watch for unreported errors or stack traces especially chatty logging. If you do find a bug you will need to report the stack trace from the server.log
+
+Exercise the UI if there is one. I tend to use Chrome for most of my basic testing as it’s used twice as much as the next most commonly used browser, according to our site’s Google Analytics. I first go through all the options in the UI. Then, if all works, I’ll spot-check using Firefox and Safari.
+
+Check permissions. Is this feature limited to a specific set of users? Can it be accessed by a guest or by a non-privileged user? How about pasting a privileged page URL into a non-privileged user’s browser?
+
+Think about risk. Is the feature or function part of a critical area such as permissions? Does the functionality modify data? You may do more testing when the risk is higher.
+
+Smoke test
+-----------
+
+1.	Go to the homepage on https://dataverse-internal.iq.harvard.edu. Scroll to the bottom to ensure the build number is the one you intend to test from Jenkins.
+2.	Create a new user: I use a formulaic name with my initials and date and make the username and password the same, eg. kc080622.
+3.	Create a dataverse: I use the same username
+4.	Create a dataset: I use the same username; I fill in the required fields (I do not use a template).
+5.	Upload 3 different types of files: I use a tabular file, 50by1000.dta, an image file, and a text file.
+6.	Publish the dataset.
+7.	Download a file, done.
diff --git a/doc/sphinx-guides/source/qa/overview.rst b/doc/sphinx-guides/source/qa/overview.rst
index 9059fa87c5e..2c934564fcb 100644
--- a/doc/sphinx-guides/source/qa/overview.rst
+++ b/doc/sphinx-guides/source/qa/overview.rst
@@ -7,21 +7,27 @@ Overview
 
 Introduction
 ------------
-
 This document describes the testing process used by QA at IQSS and provides a guide for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community.
 
-The basic workflow is bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub Kanban board. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common develop branch and ultimately released as part of the product. Before a pull request is merged it must be reviewed by a member of the development team from a coding perspective, it must pass automated integration tests before moving to QA. There it is tested manually, exercising the UI using three common browser types and any business logic it implements.  Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions are tested. Once these pass and any bugs found corrected, the automated integration tests are confirmed to be passing, the pr is merged into develop, the pr closed, and the branch deleted. At this point the pr moves from the QA column automatically into the Done column and the process repeats with the next pr until it is decided to make a release.
+Workflow
+--------
+The basic workflow is bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub Kanban board. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common develop branch and ultimately released as part of the product. Before a pull request is merged it must be reviewed by a member of the development team from a coding perspective, it must pass automated integration tests before moving to QA. There it is tested manually, exercising the UI using three common browser types and any business logic it implements.  Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions are tested. Once this passes and any bugs that are found are corrected, the automated integration tests are confirmed to be passing, the PR is merged into development, the PR is closed, and the branch is deleted. At this point, the pr moves from the QA column automatically into the Done column and the process repeats with the next pr until it is decided to make a release.
+
+Release cadence and sprints
+---------------------------
+A release likely spans multiple two-week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time.
 
-A release likely spans multiple two week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time.
+The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub.
 
-The decision to make a release can be based on time since last release, some important feature needed by the community or contractual deadline or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub.
+Performance testing and deployment
+----------------------------------
+The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named Locust, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds (I believe), it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
 
-The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time consuming it is done once near the end. Using a load generating tool named Locust, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50 user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds (I believe), it is not a real world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
+Once the performance has been tested and recorded in a Google spreadsheet for this proposed version, the release will be prepared and posted.
 
-Once performance has been tested and recorded in a google spreadsheet for this proposed version, the release will be prepared and posted.
+Preparing the release consists of writing and reviewing the release notes compiled from individual notes in PRs that have been merged for this release. A PR is made for the notes and merged. Next, increment the version numbers in certain code files, produce a PR with those changes, and merge that into the common development branch. Last, a PR is made to merge and develop into the master branch. Once that is merged a guide build with the new release version is made from the master branch. Last, a release war file is built from the master and an installer is built from the master branch and includes the newly built war file. 
 
-Preparing the release consists of writing and reviewing the release notes compiled from individual notes in prs that have been merged for this release. A pr is made for the notes and merged. Next, increment the version numbers in certain code files,  produce a pr with those changes and merge that into the common develop branch. Last, a pr is made to merge develop into the master branch. Once that is merged a guides build with the new release version is made from the master branch. Last, a release war file is built from master and an installer is built from the master branch and includes the newly built war file. 
+Publishing the release consists of creating a new draft release on GitHub, posting the release notes, uploading the .war file and the installer .zip file, and any ancillary files used to configure this release. The latest link for the guides should be updated on the guides server to point to the newest version. Once that is all in place, specify the version name and the master branch at the top of the GitHub draft release and publish. This will tag the master branch with the version number and make the release notes and files available to the public.
 
-Publishing the release consists of creating a new draft release on Github, posting the release notes, uploading the .war file and the installer .zip file and any ancillary files used to configure this release. The latest link for the guides should be updated on the guides server to point to the newest version. Once that is all in place, specify the version name and the master branch at the top of the GitHub draft release and publish. This will tag the master branch with the version number and make the release notes and files available to the public.
+Once released, post to Dataverse general about the release and when possible, deploy to demo and production.
 
-Once released, post to dv general about the release and when possible, deploy to demo and production.
diff --git a/doc/sphinx-guides/source/qa/performance-tests.rst b/doc/sphinx-guides/source/qa/performance-tests.rst
index c3df2dc7951..673f797ed94 100644
--- a/doc/sphinx-guides/source/qa/performance-tests.rst
+++ b/doc/sphinx-guides/source/qa/performance-tests.rst
@@ -1,2 +1,23 @@
-Checklist for QA Release
-========================
\ No newline at end of file
+Performance testing
+===================
+
+.. contents:: |toctitle|
+    :local:
+
+Introduction
+------------
+To run performance tests, we have a performance test cluster on AWS that employs web, database, and Solr. The database contains a copy of production that is updated weekly on Sundays. To ensure the homepage content is consistent between test runs across releases, two scripts set the datasets that will appear on the homepage. There is a script on the web server in the default CentOS user dir and one on the database server in the default CentOS user dir. Run these scripts before conducting the tests. 
+
+Access
+------
+Access to performance cluster instances requires ssh keys, see Leonid. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. 
+
+Special notes
+-------------
+Please note the performance database is also used occasionally by Julian and the Curation team to generate prod reports so a courtesy check with Julian would be good before taking over the env.
+
+Executing the performance script
+--------------------------------
+To execute the performance test script, you need to install a local copy of the database-helper-scripts project (https://github.com/IQSS/dataverse-helper-scripts), written by Raman. I have since produced a stripped-down script that calls just the DB and ds and works with python3. 
+
+The automated integration test runs happen on each commit to a PR on an AWS instance and should be reviewed to be passing before merging into development. Their status can be seen on the PR page near the bottom, above the merge button. See Don Sizemore or Phil for questions.
diff --git a/doc/sphinx-guides/source/qa/test-automation-integration.rst b/doc/sphinx-guides/source/qa/test-automation-integration.rst
index c3df2dc7951..050013cd9af 100644
--- a/doc/sphinx-guides/source/qa/test-automation-integration.rst
+++ b/doc/sphinx-guides/source/qa/test-automation-integration.rst
@@ -1,2 +1,5 @@
-Checklist for QA Release
-========================
\ No newline at end of file
+Test automation and integration test
+====================================
+
+.. contents:: |toctitle|
+    :local:
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.rst b/doc/sphinx-guides/source/qa/testing-infrastructure.rst
index 5b3de602bc7..98c4c6b2faf 100644
--- a/doc/sphinx-guides/source/qa/testing-infrastructure.rst
+++ b/doc/sphinx-guides/source/qa/testing-infrastructure.rst
@@ -1,2 +1,16 @@
-Testing Infrastructure
-======================
\ No newline at end of file
+Infrastructure for testing
+==========================
+
+.. contents:: |toctitle|
+    :local:
+
+
+Dataverse internal
+-------------------
+To build and test a PR, we use a build named IQSS_Dataverse_Internal on jenkins.dataverse.org, which deploys the .war file to an AWS instance named dataverse-internal.iq.harvard.edu.
+Login to Jenkins requires a username and password. Check with Don Sizemore. Login to the dataverse-internal server requires a key, see Leonid. 
+
+Guides server
+-------------
+There is also a guides build project named guides.dataverse.org. Any test builds of guides are deployed to a named directory** on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. 
+Login to the guides server requires a key, see Don Sizemore.  

From 998cfe6c356dd8049e1e508dbc77696e7a7787a8 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 7 Nov 2023 17:07:35 -0500
Subject: [PATCH 126/546] Fix an issue with an item on the manual testing

---
 doc/sphinx-guides/source/qa/manual-testing.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/sphinx-guides/source/qa/manual-testing.rst b/doc/sphinx-guides/source/qa/manual-testing.rst
index 27ee40d849e..645d8182fcd 100644
--- a/doc/sphinx-guides/source/qa/manual-testing.rst
+++ b/doc/sphinx-guides/source/qa/manual-testing.rst
@@ -15,6 +15,7 @@ Examining a pull request for test cases:
 What does it do? What problem does it solve?
 ++++++++++++++++++++++++++++++++++++++++++++
 Read the top part of the pull request for a description, notes for reviewers, and usually a how-to test section. Does it make sense? If not, read the underlying ticket it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it.
+
 How is it configured?
 +++++++++++++++++++++
 Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. An admin will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! 

From 02a570aceb89afa0b9f110f76137959d5704b2e4 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 8 Nov 2023 11:34:53 -0500
Subject: [PATCH 127/546] #9464 Add commands, endpoints, IT, etc

---
 .../harvard/iq/dataverse/DataversePage.java   |   2 -
 .../iq/dataverse/DataverseServiceBean.java    | 124 ++++++++++--------
 .../harvard/iq/dataverse/api/Dataverses.java  |  35 +++++
 .../command/impl/GetDatasetSchemaCommand.java |  44 +++++++
 .../impl/ValidateDatasetJsonCommand.java      |  45 +++++++
 .../harvard/iq/dataverse/api/DatasetsIT.java  |  41 ++++++
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  21 +++
 7 files changed, 258 insertions(+), 54 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
index 12b7e41b3d8..943a74327d5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
@@ -597,8 +597,6 @@ public void updateOptionsRadio(Long mdbId, Long dsftId) {
 
 
     public String save() {
-                 String test = dataverseService.getCollectionDatasetSchema(dataverse.getId());
-                 System.out.print(test);
         List<DataverseFieldTypeInputLevel> listDFTIL = new ArrayList<>();
         if (editMode != null && ( editMode.equals(EditMode.INFO) || editMode.equals(EditMode.CREATE))) {
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 5942d4a8010..407cfb343c4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -44,6 +44,11 @@
 import jakarta.persistence.TypedQuery;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.everit.json.schema.Schema;
+import org.everit.json.schema.ValidationException;
+import org.everit.json.schema.loader.SchemaLoader;
+import org.json.JSONObject;
+import org.json.JSONTokener;
 
 /**
  *
@@ -924,12 +929,12 @@ public List<Object[]> getDatasetTitlesWithinDataverse(Long dataverseId) {
     }
 
         
-    public  String getCollectionDatasetSchema(Long dataverseId) {
+    public  String getCollectionDatasetSchema(String dataverseAlias) {
        
         List<MetadataBlock> selectedBlocks = new ArrayList<>();
         List<DatasetFieldType> requiredDSFT = new ArrayList<>();
 
-        Dataverse testDV = this.find(dataverseId);
+        Dataverse testDV = this.findByAlias(dataverseAlias);
 
         while (!testDV.isMetadataBlockRoot()) {
             if (testDV.getOwner() == null) {
@@ -997,7 +1002,60 @@ public  String getCollectionDatasetSchema(Long dataverseId) {
 
         return retval;
     
-    } 
+    }    
+    
+    private String getCustomMDBSchema (MetadataBlock mdb, List<DatasetFieldType> requiredDSFT){
+        String retval = "";
+        boolean mdbHasReqField = false;
+        int numReq = 0;
+        List<DatasetFieldType> requiredThisMDB = new ArrayList<>();
+        
+        for (DatasetFieldType dsft : requiredDSFT ){
+
+            if(dsft.getMetadataBlock().equals(mdb)){
+                numReq++;
+                mdbHasReqField = true;
+                requiredThisMDB.add(dsft);
+            }
+        }
+        if (mdbHasReqField){
+        retval  += startOfMDB.replace("blockName", mdb.getName());
+        
+        retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size()));
+        int count = 0;
+        for (DatasetFieldType dsft:requiredThisMDB ){
+            count++;
+            String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName());
+            if (count < requiredThisMDB.size()){
+                retval += reqValImp + "\n";
+            } else {
+               reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1);
+               retval += reqValImp+ "\n";
+               retval += endOfReqVal;
+            }            
+        }
+        
+        }
+
+        return retval;
+    }
+    
+    public String isDatasetJsonValid(String dataverseAlias, String jsonInput) {
+        JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias)));
+        
+        try {               
+            Schema schema = SchemaLoader.load(rawSchema);
+            schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid
+        } catch (ValidationException vx) {
+            logger.info("Dataset schema error : " + vx); //without classLoader is blows up in actual deployment
+            return "Dataset schema error : " + vx.getErrorMessage();
+        } catch (Exception ex) {
+            logger.info("Dataset file error : " + ex.getLocalizedMessage());
+            return "Dataset file error : " + ex.getLocalizedMessage();
+        } 
+
+        return "The Dataset json provided is valid for this Dataverse Collection.";
+    }
     
     private  String datasetSchemaPreface = 
     "{\n" +
@@ -1053,9 +1111,17 @@ public  String getCollectionDatasetSchema(Long dataverseId) {
     "            \"metadataBlocks\": {\n" + 
     "                \"type\": \"object\",\n" + 
     "               \"properties\": {\n" +
-    ""  ;  
-
-
+    ""  ;
+    
+    private String startOfMDB = "" +
+"                           \"blockName\": {\n" +
+"                            \"type\": \"object\",\n" +
+"                            \"properties\": {\n" +
+"                                \"fields\": {\n" +
+"                                    \"type\": \"array\",\n" +
+"                                    \"items\": {\n" +
+"                                        \"$ref\": \"#/$defs/field\"\n" +
+"                                    },";
     
     private String reqValTemplate = "                                        {\n" +
 "                                            \"contains\": {\n" +
@@ -1085,51 +1151,5 @@ public  String getCollectionDatasetSchema(Long dataverseId) {
 "    \"required\": [\"datasetVersion\"]\n" +
 "}\n";
     
-    private String startOfMDB = "" +
-"                           \"blockName\": {\n" +
-"                            \"type\": \"object\",\n" +
-"                            \"properties\": {\n" +
-"                                \"fields\": {\n" +
-"                                    \"type\": \"array\",\n" +
-"                                    \"items\": {\n" +
-"                                        \"$ref\": \"#/$defs/field\"\n" +
-"                                    },";
-    
-    
-    private String getCustomMDBSchema (MetadataBlock mdb, List<DatasetFieldType> requiredDSFT){
-        String retval = "";
-        boolean mdbHasReqField = false;
-        int numReq = 0;
-        List<DatasetFieldType> requiredThisMDB = new ArrayList<>();
-        
-        for (DatasetFieldType dsft : requiredDSFT ){
-
-            if(dsft.getMetadataBlock().equals(mdb)){
-                numReq++;
-                mdbHasReqField = true;
-                requiredThisMDB.add(dsft);
-            }
-        }
-        if (mdbHasReqField){
-        retval  += startOfMDB.replace("blockName", mdb.getName());
-        
-        retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size()));
-        int count = 0;
-        for (DatasetFieldType dsft:requiredThisMDB ){
-            count++;
-            String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName());
-            if (count < requiredThisMDB.size()){
-                retval += reqValImp + "\n";
-            } else {
-               reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1);
-               retval += reqValImp+ "\n";
-               retval += endOfReqVal;
-            }            
-        }
-        
-        }
-
-        return retval;
-    }    
             
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
index d0711aefa5f..fabb33e328a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
@@ -44,6 +44,7 @@
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetSchemaCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand;
@@ -68,6 +69,7 @@
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateExplicitGroupCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetsCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.ValidateDatasetJsonCommand;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
@@ -232,6 +234,39 @@ public Response addDataverse(@Context ContainerRequestContext crc, String body,
 
         }
     }
+    
+    @POST
+    @AuthRequired
+    @Path("{identifier}/validateDatasetJson")
+    @Consumes("application/json")
+    public Response validateDatasetJson(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String idtf) {
+        User u = getRequestUser(crc);
+        try {
+            String validationMessage = execCommand(new ValidateDatasetJsonCommand(createDataverseRequest(u), findDataverseOrDie(idtf), body));
+            return ok(validationMessage);
+        } catch (WrappedResponse ex) {
+            Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex);
+            return ex.getResponse();
+        }
+    }
+    
+    @GET
+    @AuthRequired
+    @Path("{identifier}/datasetSchema")
+    @Produces(MediaType.APPLICATION_JSON)
+    public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf) {
+        User u = getRequestUser(crc);
+
+        try {
+            String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf)));
+            return ok(datasetSchema);
+        } catch (WrappedResponse ex) {
+            Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex);
+            return ex.getResponse();
+        }
+    }
+            
+    
 
     @POST
     @AuthRequired
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java
new file mode 100644
index 00000000000..48f135dba32
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java
@@ -0,0 +1,44 @@
+
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+
+
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author stephenkraffmiller
+ */
+@RequiredPermissions(Permission.AddDataset)
+public class GetDatasetSchemaCommand extends AbstractCommand<String> {
+    
+    private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName());
+    
+    private final Dataverse dataverse;
+    
+    public GetDatasetSchemaCommand(DataverseRequest aRequest, Dataverse target) {
+        super(aRequest, target);
+        dataverse = target;
+    }
+
+    @Override
+    public String execute(CommandContext ctxt) throws CommandException {
+        try {              
+            return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias());
+            
+        } catch (Exception ex) {
+            String error = "Exception caught in getting the schema for this collection. Error: " + ex;
+            throw new IllegalCommandException(error, this);
+        }       
+    }
+    
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
new file mode 100644
index 00000000000..a01aee1db9e
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
@@ -0,0 +1,45 @@
+
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+
+ 
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author stephenkraffmiller
+ */
+@RequiredPermissions(Permission.AddDataset)
+public class ValidateDatasetJsonCommand extends AbstractCommand<String> {
+    
+    private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName());
+    
+    private final Dataverse dataverse;
+    private final String datasetJson;
+    
+    public ValidateDatasetJsonCommand(DataverseRequest aRequest, Dataverse target, String datasetJsonIn) {
+        super(aRequest, target);
+        dataverse = target;
+        datasetJson = datasetJsonIn;
+    }
+
+    @Override
+    public String execute(CommandContext ctxt) throws CommandException {
+        try {
+            String retVal = ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson);
+            return retVal;
+        } catch (Exception ex) {
+            String error = "Exception caught in getting the schema for this collection. Error: " + ex;
+            return error;
+        }
+    }   
+}
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 56bf53c1c99..14131c0fa57 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -162,6 +162,47 @@ public static void afterClass() {
                 .statusCode(200);
          */
     }
+    
+    @Test
+    public void testCollectionSchema(){
+        
+        Response createUser = UtilIT.createRandomUser();
+        createUser.prettyPrint();
+        String username = UtilIT.getUsernameFromResponse(createUser);
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+        
+        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.prettyPrint();
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+        
+        Response getCollectionSchemaResponse =  UtilIT.getCollectionSchema(dataverseAlias, apiToken);
+        getCollectionSchemaResponse.prettyPrint();
+        getCollectionSchemaResponse.then().assertThat()
+                .statusCode(200);
+        
+        String expectedJson = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch1.json");
+        
+        Response validateDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, expectedJson, apiToken);
+        validateDatasetJsonResponse.prettyPrint();
+        validateDatasetJsonResponse.then().assertThat()
+                .statusCode(200);
+        
+        
+        String pathToJsonFile = "scripts/search/tests/data/datasetMissingReqFields.json"; 
+        
+        String jsonIn = UtilIT.getDatasetJson(pathToJsonFile);
+        
+        Response validateBadDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, jsonIn, apiToken);
+        validateBadDatasetJsonResponse.prettyPrint();
+        validateBadDatasetJsonResponse.then().assertThat()
+                .statusCode(200);
+
+        
+        validateBadDatasetJsonResponse.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body(containsString("Dataset schema error"));
+        
+    }
 
     @Test
     public void testCreateDataset() {
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index e3a7fd0cfc3..0443bd2e35e 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -406,6 +406,27 @@ static Response getGuestbookResponses(String dataverseAlias, Long guestbookId, S
         }
         return requestSpec.get("/api/dataverses/" + dataverseAlias + "/guestbookResponses/");
     }
+    
+    static Response getCollectionSchema (String dataverseAlias, String apiToken){
+        
+        Response getCollectionSchemaResponse = given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .contentType("application/json")
+                .get("/api/dataverses/" + dataverseAlias + "/datasetSchema");
+        return getCollectionSchemaResponse;
+        
+    }
+    
+    static Response validateDatasetJson (String dataverseAlias, String datasetJson, String apiToken){
+        
+        Response getValidateDatasetJsonResponse = given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .body(datasetJson)
+                .contentType("application/json")
+                .post("/api/dataverses/" + dataverseAlias + "/validateDatasetJson");
+        return getValidateDatasetJsonResponse;
+        
+    }
 
     static Response createRandomDatasetViaNativeApi(String dataverseAlias, String apiToken) {
         return createRandomDatasetViaNativeApi(dataverseAlias, apiToken, false);

From 521e8d24eaf1fb9e0ad144a336a06ed384f4a55b Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 8 Nov 2023 11:53:23 -0500
Subject: [PATCH 128/546] #9464 delete test dataverse

---
 src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 14131c0fa57..c43a0c251a2 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -202,6 +202,10 @@ public void testCollectionSchema(){
                 .statusCode(OK.getStatusCode())
                 .body(containsString("Dataset schema error"));
         
+        Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken);
+        deleteDataverseResponse.prettyPrint();
+        assertEquals(200, deleteDataverseResponse.getStatusCode());
+        
     }
 
     @Test

From b73c77a55d5a753beab080db35d01b78fef2a8a9 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Wed, 8 Nov 2023 12:09:29 -0500
Subject: [PATCH 129/546] Page restructure and additional info load

---
 .../source/qa/checklist-qa-pr.rst             |   2 -
 .../source/qa/checklist-qa-release.rst        |   2 -
 .../source/qa/deploying-jenkins.rst           |   2 -
 doc/sphinx-guides/source/qa/index.rst         |   5 -
 .../source/qa/manual-testing.rst              |  12 +-
 .../source/qa/other-approaches.rst            | 125 +++++++++++++++++-
 doc/sphinx-guides/source/qa/overview.rst      |   4 +-
 .../source/qa/performance-tests.rst           |   8 +-
 .../source/qa/test-automation-integration.rst |  32 ++++-
 .../source/qa/testing-infrastructure.rst      |   6 +-
 doc/sphinx-guides/source/qa/tips-tricks.rst   |   2 -
 .../source/qa/workflow-qa-pr.rst              |   7 -
 12 files changed, 170 insertions(+), 37 deletions(-)
 delete mode 100644 doc/sphinx-guides/source/qa/checklist-qa-pr.rst
 delete mode 100644 doc/sphinx-guides/source/qa/checklist-qa-release.rst
 delete mode 100644 doc/sphinx-guides/source/qa/deploying-jenkins.rst
 delete mode 100644 doc/sphinx-guides/source/qa/tips-tricks.rst
 delete mode 100644 doc/sphinx-guides/source/qa/workflow-qa-pr.rst

diff --git a/doc/sphinx-guides/source/qa/checklist-qa-pr.rst b/doc/sphinx-guides/source/qa/checklist-qa-pr.rst
deleted file mode 100644
index df60f4260fc..00000000000
--- a/doc/sphinx-guides/source/qa/checklist-qa-pr.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-Checklist for QA on a PR
-========================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/checklist-qa-release.rst b/doc/sphinx-guides/source/qa/checklist-qa-release.rst
deleted file mode 100644
index 34419fee8ae..00000000000
--- a/doc/sphinx-guides/source/qa/checklist-qa-release.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-Checklist for QA on release
-===========================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/deploying-jenkins.rst b/doc/sphinx-guides/source/qa/deploying-jenkins.rst
deleted file mode 100644
index b15a6f88534..00000000000
--- a/doc/sphinx-guides/source/qa/deploying-jenkins.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-Building and deploying a PR from Jenkins to dataverse-internal
-==============================================================
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/index.rst b/doc/sphinx-guides/source/qa/index.rst
index 019f5cdbd5c..c0c617d561d 100755
--- a/doc/sphinx-guides/source/qa/index.rst
+++ b/doc/sphinx-guides/source/qa/index.rst
@@ -10,12 +10,7 @@ QA Guide
    performance-tests
    manual-testing
    test-automation-integration
-   deploying-jenkins
    other-approaches
-   tips-tricks
-   workflow-qa-pr
-   checklist-qa-pr
-   checklist-qa-release
 
 
 
diff --git a/doc/sphinx-guides/source/qa/manual-testing.rst b/doc/sphinx-guides/source/qa/manual-testing.rst
index 645d8182fcd..8e50e6b6b08 100644
--- a/doc/sphinx-guides/source/qa/manual-testing.rst
+++ b/doc/sphinx-guides/source/qa/manual-testing.rst
@@ -1,4 +1,4 @@
-Manual testing approach
+Manual Testing Approach
 =======================
 
 .. contents:: |toctitle|
@@ -10,13 +10,13 @@ We use a risk-based, manual testing approach to achieve the most benefit with li
 
 If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by the Design group and by the community.
 
-Examining a pull request for test cases:
+Examining a Pull Pequest for Test Cases:
 ----------------------------------------
-What does it do? What problem does it solve?
+What Problem Does it Solve?
 ++++++++++++++++++++++++++++++++++++++++++++
 Read the top part of the pull request for a description, notes for reviewers, and usually a how-to test section. Does it make sense? If not, read the underlying ticket it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it.
 
-How is it configured?
+How is it Configured?
 +++++++++++++++++++++
 Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. An admin will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! 
 
@@ -32,7 +32,7 @@ Check permissions. Is this feature limited to a specific set of users? Can it be
 
 Think about risk. Is the feature or function part of a critical area such as permissions? Does the functionality modify data? You may do more testing when the risk is higher.
 
-Smoke test
+Smoke Test
 -----------
 
 1.	Go to the homepage on https://dataverse-internal.iq.harvard.edu. Scroll to the bottom to ensure the build number is the one you intend to test from Jenkins.
@@ -41,4 +41,4 @@ Smoke test
 4.	Create a dataset: I use the same username; I fill in the required fields (I do not use a template).
 5.	Upload 3 different types of files: I use a tabular file, 50by1000.dta, an image file, and a text file.
 6.	Publish the dataset.
-7.	Download a file, done.
+7.	Download a file.
diff --git a/doc/sphinx-guides/source/qa/other-approaches.rst b/doc/sphinx-guides/source/qa/other-approaches.rst
index 420d40fa09c..bd92e7d22d8 100644
--- a/doc/sphinx-guides/source/qa/other-approaches.rst
+++ b/doc/sphinx-guides/source/qa/other-approaches.rst
@@ -1,2 +1,125 @@
 Other approaches to deploying and testing
-=========================================
\ No newline at end of file
+=========================================
+
+.. contents:: |toctitle|
+    :local:
+
+This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. I’m assuming if a developer is testing, they would likely just deploy to their dev environment. That might be ok but not sure the env is fully configured enough to offer a real-world testing scenario. An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built.
+
+Tips and tricks
+---------------
+
+-	Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (I have one on Google Drive). This potentially will help with future testing.
+-	When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice.
+-	Always tail the server.log file while testing. Open a terminal window to the test instance and tail -F server.log. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure.
+-	When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done.
+-	When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked.
+-	When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it.
+-	Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup.
+
+Workflow for Completing QA on a PR
+-----------------------------------
+
+1.	Assign the PR you are working on to yourself.
+
+2.	What does it do?
+
+    Read the description at the top of the PR, any release notes, documentation, and the original issue.
+
+3.	Does it address the issue it closes? 
+
+    The PR should address the issue entirely unless otherwise noted.
+
+4.	How do you test it?
+    
+    Look at the “how to test section” at the top of the pull request. Does it make sense? This likely won’t be the only testing you perform. You can develop further tests from the original issue or problem description, from the description of functionality, the documentation, configuration, and release notes. Also consider trying to reveal bugs by trying to break it: try bad or missing data, very large values or volume of data, exceed any place that may have a limit or boundary.
+
+5.	Does it have or need documentation?
+
+    Small changes or fixes usually don’t have doc but new features or extensions of a feature or new configuration options should have documentation.
+
+6.	Does it have or need release notes?
+
+    Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed.
+
+7.	Does it use a DB, flyway script?
+    
+    Good to know since it may collide with another existing one by version or it could be a one way transform of your DB so back up your test DB before. Also, happens during deployment so be on the lookout for any issues.
+
+8.	Validate the documentation.
+
+    Build the doc using Jenkins, does it build without errors?
+    Read it through for sense.
+    Use it for test cases and to understand the feature.
+
+9.	Build and deploy the pull request.
+
+    Normally this is done using Jenkins and automatically deployed to the QA test machine.
+
+10.	Configure if required
+
+    If needed to operate and everyone installing or upgrading will use this, configure now as all testing will use it.
+
+11.	Smoke test the branch.
+    
+    Standard, minimal test of core functionality.
+
+12.	Regression test-related or potentially affected features
+
+    If config is optional and testing without config turned on, do some spot checks/ regression tests of related or potentially affected areas. 
+
+13.	Configure if optional
+
+    What is the default, enabled or disabled? Is that clearly indicated? Test both.
+    By config here we mean enabling the functionality versus choosing a particular config option. Some complex features have config options in addition to enabling. Those will also need to be tested.
+
+14.	Test all the new or changed functionality.
+
+    The heart of the PR, what is this PR adding or fixing? Is it all there and working?
+
+15.	Regression test related or potentially affected features.
+    
+    Sometimes new stuff modifies and extends other functionality or functionality that is shared with other aspects of the system, e.g. Export, Import. Check the underlying functionality that was also modified but in a spot check or briefer manner.
+
+16.	Report any issues found within the PR
+
+    It can be easy to lose track of what you’ve found, steps to reproduce, and any errors or stack traces from the server log. Add these in a numbered list to a comment in the pr. Easier to check off when fixed and to work on. Add large amounts of text as in the server log as attached, meaningfully named files.
+
+17.	Retest all fixes, spot check feature functionality, smoke test
+    
+    Similar to your initial testing, it is only narrower.
+
+18.	Test Upgrade Instructions, if required
+
+    Some features build upon the existing architecture but require modifications, such as adding a new column to the DB or changing or adding data. It is crucial that this works properly for our 100+ installations. This testing should be performed at the least on the prior version with basic data objects (collection, dataset, files) and any other data that will be updated by this feature. Using the sample data from the prior version would be good or deploying to dataverse-internal and upgrading there would be a good test. Remember to back up your DB before doing a transformative upgrade so that you can repeat it later if you find a bug.
+
+19.	Make sure the integration tests in the PR have been completed and passed.
+    
+    They are run with each commit to the PR and take approximately 42 minutes to run.
+
+20.	Merge PR
+    
+    Click merge to include this PR into the common develop branch.
+
+21.	Delete merged branch
+    
+    Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions.
+
+
+Checklist for Completing QA on a PR
+------------------------------------
+
+1. Build the docs 
+2. Smoke test the pr 
+3. Test the new functionality
+4. Regression test 
+5. Test any upgrade instructions
+
+Checklist for QA on Release
+---------------------------
+
+1.	Review Consolidated Release Notes, in particular upgrade instructions.
+2.	Conduct performance testing and compare with the previous release.
+3.	Perform clean install and smoke test.
+4.	Potentially follow upgrade instructions. Though they have been performed incrementally for each PR, the sequence may need checking
+
diff --git a/doc/sphinx-guides/source/qa/overview.rst b/doc/sphinx-guides/source/qa/overview.rst
index 2c934564fcb..153fab1a28f 100644
--- a/doc/sphinx-guides/source/qa/overview.rst
+++ b/doc/sphinx-guides/source/qa/overview.rst
@@ -13,13 +13,13 @@ Workflow
 --------
 The basic workflow is bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub Kanban board. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common develop branch and ultimately released as part of the product. Before a pull request is merged it must be reviewed by a member of the development team from a coding perspective, it must pass automated integration tests before moving to QA. There it is tested manually, exercising the UI using three common browser types and any business logic it implements.  Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions are tested. Once this passes and any bugs that are found are corrected, the automated integration tests are confirmed to be passing, the PR is merged into development, the PR is closed, and the branch is deleted. At this point, the pr moves from the QA column automatically into the Done column and the process repeats with the next pr until it is decided to make a release.
 
-Release cadence and sprints
+Release Cadence and Sprints
 ---------------------------
 A release likely spans multiple two-week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time.
 
 The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub.
 
-Performance testing and deployment
+Performance Testing and Deployment
 ----------------------------------
 The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named Locust, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds (I believe), it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
 
diff --git a/doc/sphinx-guides/source/qa/performance-tests.rst b/doc/sphinx-guides/source/qa/performance-tests.rst
index 673f797ed94..1bfde798100 100644
--- a/doc/sphinx-guides/source/qa/performance-tests.rst
+++ b/doc/sphinx-guides/source/qa/performance-tests.rst
@@ -1,4 +1,4 @@
-Performance testing
+Performance Testing
 ===================
 
 .. contents:: |toctitle|
@@ -12,11 +12,11 @@ Access
 ------
 Access to performance cluster instances requires ssh keys, see Leonid. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. 
 
-Special notes
--------------
+Special Notes ⚠️
+-----------------
 Please note the performance database is also used occasionally by Julian and the Curation team to generate prod reports so a courtesy check with Julian would be good before taking over the env.
 
-Executing the performance script
+Executing the Performance Script
 --------------------------------
 To execute the performance test script, you need to install a local copy of the database-helper-scripts project (https://github.com/IQSS/dataverse-helper-scripts), written by Raman. I have since produced a stripped-down script that calls just the DB and ds and works with python3. 
 
diff --git a/doc/sphinx-guides/source/qa/test-automation-integration.rst b/doc/sphinx-guides/source/qa/test-automation-integration.rst
index 050013cd9af..13c48105f91 100644
--- a/doc/sphinx-guides/source/qa/test-automation-integration.rst
+++ b/doc/sphinx-guides/source/qa/test-automation-integration.rst
@@ -2,4 +2,34 @@ Test automation and integration test
 ====================================
 
 .. contents:: |toctitle|
-    :local:
\ No newline at end of file
+    :local:
+
+This test suite is added to and maintained by development. It is generally advisable for code contributors to add integration tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base’s code paths as possible, every time to catch bugs. 
+
+This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency.
+
+Building and Deploying a Pull Request from Jenkins to Dataverse-Internal:
+-------------------------------------------------------------------------
+
+1.	Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. 
+
+2.	From the pull request page, click the copy icon next to the pull request branch name.
+
+3.	Log on to jenkins.dataverse.org, select the IQSS_Dataverse_Internal project, and configure the repository URL and branch specifier to match the ones from the pull request. For example:
+
+    - 8372-gdcc-xoai-library has IQSS implied
+        | **Repository URL:** https://github.com/IQSS/dataverse.git 
+        | **Branch specifier:** \*/8372-gdcc-xoai-library
+    - GlobalDataverseCommunityConsortium:GDCC/DC-3B
+        | **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git 
+        | **Branch specifier:** \*/GDCC/DC-3B. 
+
+4.	Click Build Now and note the build number in progress.
+
+5.	Once complete, go to https://dataverse-internal.iq.harvard.edu and check that the deployment succeeded, and that the homepage displays the latest build number.
+
+6.	If for some reason it didn’t deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (su - dataverse, /usr/local/payara5/bin/asadmin list-applications,  /usr/local/payara5/bin/asadmin undeploy dataverse-5.11.1,  /usr/local/payara5/bin/asadmin deploy /tmp/dataverse-5.11.1.war)
+
+7.	If that didn’t work, you may have run into a flyway DB script collision error but that should be indicated by the server.log
+
+8.	Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to tail -F server.log in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting.
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.rst b/doc/sphinx-guides/source/qa/testing-infrastructure.rst
index 98c4c6b2faf..d35bc6e9a23 100644
--- a/doc/sphinx-guides/source/qa/testing-infrastructure.rst
+++ b/doc/sphinx-guides/source/qa/testing-infrastructure.rst
@@ -1,16 +1,16 @@
-Infrastructure for testing
+Infrastructure for Testing
 ==========================
 
 .. contents:: |toctitle|
     :local:
 
 
-Dataverse internal
+Dataverse Internal
 -------------------
 To build and test a PR, we use a build named IQSS_Dataverse_Internal on jenkins.dataverse.org, which deploys the .war file to an AWS instance named dataverse-internal.iq.harvard.edu.
 Login to Jenkins requires a username and password. Check with Don Sizemore. Login to the dataverse-internal server requires a key, see Leonid. 
 
-Guides server
+Guides Server
 -------------
 There is also a guides build project named guides.dataverse.org. Any test builds of guides are deployed to a named directory** on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. 
 Login to the guides server requires a key, see Don Sizemore.  
diff --git a/doc/sphinx-guides/source/qa/tips-tricks.rst b/doc/sphinx-guides/source/qa/tips-tricks.rst
deleted file mode 100644
index 738f701d33b..00000000000
--- a/doc/sphinx-guides/source/qa/tips-tricks.rst
+++ /dev/null
@@ -1,2 +0,0 @@
-Tips and tricks
-===============
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/workflow-qa-pr.rst b/doc/sphinx-guides/source/qa/workflow-qa-pr.rst
deleted file mode 100644
index b2fae01da68..00000000000
--- a/doc/sphinx-guides/source/qa/workflow-qa-pr.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-Workflow for completing on a PR
-===============================
-
-.. contents:: |toctitle|
-    :local:
-
-1.	Assign the PR you are working on to yourself.
\ No newline at end of file

From 7c630f70a4e548bf51c4ed27ee1a50825d795379 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 8 Nov 2023 14:13:14 -0500
Subject: [PATCH 130/546] #9464 add release note

---
 doc/release-notes/9464-json-validation.md | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 doc/release-notes/9464-json-validation.md

diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md
new file mode 100644
index 00000000000..3703b420225
--- /dev/null
+++ b/doc/release-notes/9464-json-validation.md
@@ -0,0 +1,3 @@
+Functionality has been added to help validate dataset json prior to dataset creation. There are two new API endpoints in this release. The first takes in a Dataverse Collection alias and returns a custom schema based on the required fields of the collection.
+The second takes in a Dataverse collection alias and a dataset json file and does an automated validation of the json file against the custom schema for the collection. (Issue 9464 and 9465)
+

From 720b3b0f488482b04cd0b7f1c15d129a387bbb79 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 8 Nov 2023 15:00:28 -0500
Subject: [PATCH 131/546] add doc for get schema

---
 doc/sphinx-guides/source/api/native-api.rst | 22 +++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 1992390410c..e7e7c6fc280 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -505,6 +505,28 @@ The fully expanded example above (without environment variables) looks like this
 
 .. note:: Previous endpoints ``$SERVER/api/dataverses/$id/metadatablocks/:isRoot`` and ``POST https://$SERVER/api/dataverses/$id/metadatablocks/:isRoot?key=$apiKey`` are deprecated, but supported.
 
+.. _get-dataset-json-schema:
+
+Retrieve a JSON schema for a Collection
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Retrieves a JSON schema customized for a given Dataverse collection in order to validate a Dataset JSON file prior to creating the dataset:
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export ID=root
+
+  curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID//datasetSchema"
+
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root//datasetSchema"
+
+
 
 .. _create-dataset-command: 
 

From 7be534771370f0e588920c818cf0b0e42ead4111 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 8 Nov 2023 15:32:21 -0500
Subject: [PATCH 132/546] #9464 fix typo

---
 doc/sphinx-guides/source/api/native-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index e7e7c6fc280..5d784eebd64 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -524,7 +524,7 @@ The fully expanded example above (without environment variables) looks like this
 
 .. code-block:: bash
 
-  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root//datasetSchema"
+  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/datasetSchema"
 
 
 

From c553d1b3a1e7b69174477c48ada4952ec2bd9e3b Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 8 Nov 2023 16:15:50 -0500
Subject: [PATCH 133/546] Add permission note

---
 doc/sphinx-guides/source/api/native-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 5d784eebd64..f170cfd53f9 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -526,7 +526,7 @@ The fully expanded example above (without environment variables) looks like this
 
   curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/datasetSchema"
 
-
+Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint.
 
 .. _create-dataset-command: 
 

From eb963307890e61f1c5124e7b367bde9803cc8af5 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 8 Nov 2023 16:35:06 -0500
Subject: [PATCH 134/546] refactor calls, add reference endpoints call, drop
 globus://

---
 .../harvard/iq/dataverse/api/Datasets.java    | 24 +++++++++-----
 .../dataaccess/GlobusAccessibleStore.java     | 31 ++++++++++++++++---
 .../dataverse/globus/GlobusServiceBean.java   | 13 ++------
 3 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index cbfe6e8c5f5..b1c528f3fd9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3573,7 +3573,13 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
         URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
 
         boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId);
-        String transferEndpoint = GlobusAccessibleStore.getEndpointId(storeId);
+        String transferEndpoint = null;
+        JsonArray referenceEndpointsWithPaths = null;
+        if(managed) {
+            transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId);
+        } else {
+            referenceEndpointsWithPaths = GlobusAccessibleStore.getReferenceEndpointsWithPaths(storeId);
+        }
 
         JsonObjectBuilder queryParams = Json.createObjectBuilder();
         queryParams.add("queryParameters",
@@ -3587,17 +3593,21 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
         substitutedParams.keySet().forEach((key) -> {
             params.add(key, substitutedParams.get(key));
         });
+        params.add("managed", Boolean.toString(managed));
         if(transferEndpoint!= null) {
-            params.add("managed", Boolean.toString(managed)).add("endpoint", transferEndpoint);
+            params.add("endpoint", transferEndpoint);
         } else {
-            //ToDO: Reference endpoints
+            params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths);
         }
 
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
-        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths")
-                .add(URLTokenUtil.HTTP_METHOD, "POST")
-                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusTransferPaths")
-                .add(URLTokenUtil.TIMEOUT, 300));
+        if (managed) {
+
+            allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths")
+                    .add(URLTokenUtil.HTTP_METHOD, "POST")
+                    .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusTransferPaths")
+                    .add(URLTokenUtil.TIMEOUT, 300));
+        }
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles")
                 .add(URLTokenUtil.HTTP_METHOD, "POST")
                 .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles")
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
index ca029947462..1d98044b2b5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
@@ -1,5 +1,9 @@
 package edu.harvard.iq.dataverse.dataaccess;
 
+import jakarta.json.Json;
+import jakarta.json.JsonArray;
+import jakarta.json.JsonArrayBuilder;
+
 public interface GlobusAccessibleStore {
 
     static final String MANAGED = "managed";
@@ -11,14 +15,29 @@ public static boolean isDataverseManaged(String driverId) {
         return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(driverId, MANAGED));
     }
     
-    public static String getEndpointId(String driverId) {
-        String baseUrl = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH);
-        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf(DataAccess.SEPARATOR) + 3);
+    public static String getTransferEndpointId(String driverId) {
+        String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH);
         int pathStart = endpointWithBasePath.indexOf("/");
         return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
-        
     }
     
+    public static String getTransferPath(String driverId) {
+        String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH);
+        int pathStart = endpointWithBasePath.indexOf("/");
+        return pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "";
+
+    }
+
+    public static JsonArray getReferenceEndpointsWithPaths(String driverId) {
+        String[] endpoints = StorageIO.getConfigParamForDriver(driverId, GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*");
+        JsonArrayBuilder builder = Json.createArrayBuilder();
+        for(int i=0;i<endpoints.length;i++) {
+            builder.add(endpoints[i]);
+        }
+        return builder.build();
+    }
+
+    
     public static boolean acceptsGlobusTransfers(String storeId) {
         if(StorageIO.getConfigParamForDriver(storeId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH) != null) {
             return true;
@@ -33,4 +52,8 @@ public static boolean allowsGlobusReferences(String storeId) {
         return false;
     }
     
+    public static String getGlobusToken(String storeId) {
+        return StorageIO.getConfigParamForDriver(storeId, GLOBUS_TOKEN);
+    }
+    
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 3a9f75b7846..dab0e36852c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -1351,14 +1351,7 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
         String driverId = dataset.getEffectiveStorageDriverId();
         GlobusEndpoint endpoint = null;
         
-        //ToDo - consolidate with GlobusOverlayAccessIO.parsePath()
-        String baseUrl = System.getProperty("dataverse.files." + driverId + "." + GlobusAccessibleStore.GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH);
-
-        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf("://") + 3);
-        int pathStart = endpointWithBasePath.indexOf("/");
-        logger.info("endpointWithBasePath: " + endpointWithBasePath);
-        String directoryPath = (pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "");
-        logger.info("directoryPath: " + directoryPath);
+        String directoryPath = GlobusAccessibleStore.getTransferPath(driverId);
 
         if (GlobusAccessibleStore.isDataverseManaged(driverId) && (dataset!=null)) {
             directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage();
@@ -1374,11 +1367,11 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
         }
         logger.info("directoryPath finally: " + directoryPath);
         
-        String endpointId = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
+        String endpointId = GlobusAccessibleStore.getTransferPath(driverId);
         
         logger.info("endpointId: " + endpointId);
         
-        String globusToken = System.getProperty("dataverse.files." + driverId + "." + GlobusAccessibleStore.GLOBUS_TOKEN);
+        String globusToken = GlobusAccessibleStore.getGlobusToken(driverId);
 
         AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
         String clientToken = accessToken.getOtherTokens().get(0).getAccessToken();

From a080f84b8cd20693e14a227993a6c6c33510c020 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 9 Nov 2023 09:35:12 -0500
Subject: [PATCH 135/546] #9464 add doc for validate json

---
 doc/sphinx-guides/source/api/native-api.rst | 25 ++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index f170cfd53f9..1af98b6c20e 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -518,7 +518,7 @@ Retrieves a JSON schema customized for a given Dataverse collection in order to
   export SERVER_URL=https://demo.dataverse.org
   export ID=root
 
-  curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID//datasetSchema"
+  curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/datasetSchema"
 
 The fully expanded example above (without environment variables) looks like this:
 
@@ -528,6 +528,29 @@ The fully expanded example above (without environment variables) looks like this
 
 Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint.
 
+.. _validate-dataset-json:
+
+Validate Dataset.json file for a Collection
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Validates a Dataset json file customized for a given Dataverse collection prior to creating the dataset:
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export ID=root
+
+  curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/validateDatasetJson" --upload-file dataset.json -H 'Content-type:application/json'
+
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" --upload-file dataset.json -H 'Content-type:application/json'
+
+Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint.
+
 .. _create-dataset-command: 
 
 Create a Dataset in a Dataverse Collection

From d32446ec1a72f23234136f322d0d955c7ef8d77b Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 9 Nov 2023 12:33:36 -0500
Subject: [PATCH 136/546] upgrade Sphinx, add Markdown support #10101 #10111

We don't seem to be using intersphinx and I got an error about it so i
simply removed it.

I also got rid of the Python 3.10 vs 3.11 divide. It's no longer
relevant and some older servers were holding us back. Everyone should
use the same (latest!) Sphinx.

I included a tiny conclusion.md file as a demo, but it can be removed,
of course.
---
 doc/sphinx-guides/requirements.txt        | 15 +++++----------
 doc/sphinx-guides/source/conf.py          |  5 +----
 doc/sphinx-guides/source/qa/conclusion.md | 11 +++++++++++
 doc/sphinx-guides/source/qa/index.rst     | 10 +---------
 4 files changed, 18 insertions(+), 23 deletions(-)
 create mode 100644 doc/sphinx-guides/source/qa/conclusion.md

diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt
index 028f07d11cb..f55d2b9e518 100755
--- a/doc/sphinx-guides/requirements.txt
+++ b/doc/sphinx-guides/requirements.txt
@@ -1,12 +1,7 @@
-# Developers, please use Python 3.9 or lower to build the guides.
-# For your convenience, a solution for Python 3.10 is provided below
-# but we would prefer that you use the same version of Sphinx
-# (below on the < 3.10 line) that is used to build the production guides.
-Sphinx==3.5.4 ; python_version < '3.10'
-Sphinx==5.3.0 ; python_version >= '3.10'
+Sphinx==7.2.6
 
-# Necessary workaround for ReadTheDocs for Sphinx 3.x - unnecessary as of Sphinx 4.5+
-Jinja2>=3.0.2,<3.1
-
-# Sphinx - Additional modules
+# inline icons
 sphinx-icon==0.1.2
+
+# Markdown support
+myst-parser==2.0.0
diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py
index 0660ec3b071..a264ff23db0 100755
--- a/doc/sphinx-guides/source/conf.py
+++ b/doc/sphinx-guides/source/conf.py
@@ -38,11 +38,11 @@
 # ones.
 extensions = [
     'sphinx.ext.autodoc',
-    'sphinx.ext.intersphinx',
     'sphinx.ext.ifconfig',
     'sphinx.ext.viewcode',
     'sphinx.ext.graphviz',
     'sphinxcontrib.icon',
+    'myst_parser',
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -430,9 +430,6 @@
 # If false, no index is generated.
 #epub_use_index = True
 
-
-# Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'https://docs.python.org/': None}
 # Suppress "WARNING: unknown mimetype for ..." https://github.com/IQSS/dataverse/issues/3391
 suppress_warnings = ['epub.unknown_project_files']
 rst_prolog = """
diff --git a/doc/sphinx-guides/source/qa/conclusion.md b/doc/sphinx-guides/source/qa/conclusion.md
new file mode 100644
index 00000000000..233dc3cdf3d
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/conclusion.md
@@ -0,0 +1,11 @@
+Conclusion
+==========
+
+QA is awesome. Do you know what else is awesome? Markdown.
+
+It's easy to create a [link](https://dataverse.org), for example, and nested bullets don't need extra indentation:
+
+- foo
+  - one
+  - two
+- bar
diff --git a/doc/sphinx-guides/source/qa/index.rst b/doc/sphinx-guides/source/qa/index.rst
index c0c617d561d..dd8c046fddc 100755
--- a/doc/sphinx-guides/source/qa/index.rst
+++ b/doc/sphinx-guides/source/qa/index.rst
@@ -11,12 +11,4 @@ QA Guide
    manual-testing
    test-automation-integration
    other-approaches
-
-
-
-
-
-
-
-
-
+   conclusion

From 7d3836646c1fa72262acc8a1de6242277b5948d5 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 9 Nov 2023 14:23:53 -0500
Subject: [PATCH 137/546] #9464 add strings to bundle

---
 .../harvard/iq/dataverse/DataverseServiceBean.java  | 13 +++++++------
 src/main/java/propertyFiles/Bundle.properties       |  3 +++
 .../edu/harvard/iq/dataverse/api/DatasetsIT.java    |  2 +-
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 407cfb343c4..bbf35535915 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -18,6 +18,7 @@
 import edu.harvard.iq.dataverse.search.IndexServiceBean;
 import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
 import edu.harvard.iq.dataverse.search.SolrSearchResult;
+import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.StringUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import java.io.File;
@@ -1047,14 +1048,14 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) {
             Schema schema = SchemaLoader.load(rawSchema);
             schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid
         } catch (ValidationException vx) {
-            logger.info("Dataset schema error : " + vx); //without classLoader is blows up in actual deployment
-            return "Dataset schema error : " + vx.getErrorMessage();
-        } catch (Exception ex) {
-            logger.info("Dataset file error : " + ex.getLocalizedMessage());
-            return "Dataset file error : " + ex.getLocalizedMessage();
+            logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); 
+            return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " "  + vx.getErrorMessage();
+        } catch (Exception ex) {            
+            logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage());
+            return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage();
         } 
 
-        return "The Dataset json provided is valid for this Dataverse Collection.";
+        return BundleUtil.getStringFromBundle("dataverses.api.validate.json.succeeded");
     }
     
     private  String datasetSchemaPreface = 
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 79887f7e76c..150dd9048a1 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -2670,6 +2670,9 @@ dataverses.api.move.dataverse.error.forceMove=Please use the parameter ?forceMov
 dataverses.api.create.dataset.error.mustIncludeVersion=Please provide initial version in the dataset json
 dataverses.api.create.dataset.error.superuserFiles=Only a superuser may add files via this api
 dataverses.api.create.dataset.error.mustIncludeAuthorName=Please provide author name in the dataset json
+dataverses.api.validate.json.succeeded=The Dataset JSON provided is valid for this Dataverse Collection.
+dataverses.api.validate.json.failed=The Dataset JSON provided failed validation with the following error: 
+dataverses.api.validate.json.exception=Validation failed with following exception:
 
 #Access.java
 access.api.allowRequests.failure.noDataset=Could not find Dataset with id: {0}
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index c43a0c251a2..24fbcf13d23 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -200,7 +200,7 @@ public void testCollectionSchema(){
         
         validateBadDatasetJsonResponse.then().assertThat()
                 .statusCode(OK.getStatusCode())
-                .body(containsString("Dataset schema error"));
+                .body(containsString("failed validation"));
         
         Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken);
         deleteDataverseResponse.prettyPrint();

From 7887a0527ddeceabd10a62bde68fcd38ed9cd824 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 9 Nov 2023 14:51:55 -0500
Subject: [PATCH 138/546] #9464 simplify commands

---
 .../engine/command/impl/GetDatasetSchemaCommand.java   | 10 ++--------
 .../command/impl/ValidateDatasetJsonCommand.java       | 10 +++-------
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java
index 48f135dba32..2d5e1251614 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java
@@ -31,14 +31,8 @@ public GetDatasetSchemaCommand(DataverseRequest aRequest, Dataverse target) {
     }
 
     @Override
-    public String execute(CommandContext ctxt) throws CommandException {
-        try {              
-            return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias());
-            
-        } catch (Exception ex) {
-            String error = "Exception caught in getting the schema for this collection. Error: " + ex;
-            throw new IllegalCommandException(error, this);
-        }       
+    public String execute(CommandContext ctxt) throws CommandException {            
+            return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias());                   
     }
     
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
index a01aee1db9e..ae1a89c3661 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
@@ -34,12 +34,8 @@ public ValidateDatasetJsonCommand(DataverseRequest aRequest, Dataverse target, S
 
     @Override
     public String execute(CommandContext ctxt) throws CommandException {
-        try {
-            String retVal = ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson);
-            return retVal;
-        } catch (Exception ex) {
-            String error = "Exception caught in getting the schema for this collection. Error: " + ex;
-            return error;
-        }
+
+            return ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson);
+
     }   
 }

From 2ce299738bb4aa121e09af6ea18bb30662f2f72a Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Thu, 9 Nov 2023 17:13:46 -0500
Subject: [PATCH 139/546] Fix ProvIT enabling the provenance feature

---
 src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
index 9b3b66538d7..3bfa3d72fbd 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
@@ -17,6 +17,8 @@
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+
 public class ProvIT {
     
     @BeforeAll
@@ -27,6 +29,8 @@ public static void setUpClass() {
     
     @Test
     public void testFreeformDraftActions() {
+
+        UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
         Response createDepositor = UtilIT.createRandomUser();
         createDepositor.prettyPrint();
         createDepositor.then().assertThat()
@@ -71,6 +75,7 @@ public void testFreeformDraftActions() {
         JsonObject provFreeFormGood = Json.createObjectBuilder()
                 .add("text", "I inherited this file from my grandfather.")
                 .build();
+        
         Response uploadProvFreeForm = UtilIT.uploadProvFreeForm(dataFileId.toString(), provFreeFormGood, apiTokenForDepositor);
         uploadProvFreeForm.prettyPrint();
         uploadProvFreeForm.then().assertThat()
@@ -81,12 +86,15 @@ public void testFreeformDraftActions() {
         datasetVersions.then().assertThat()
                 .body("data[0].versionState", equalTo("DRAFT"));
         
+        UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
         
     }
     
     @Test
     public void testAddProvFile() {
 
+        UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+
         Response createDepositor = UtilIT.createRandomUser();
         createDepositor.prettyPrint();
         createDepositor.then().assertThat()
@@ -196,6 +204,7 @@ public void testAddProvFile() {
                 .body("data.json", notNullValue(String.class));
         assertEquals(200, getProvJson.getStatusCode());
         
+        
         // TODO: Test that if provenance already exists in CPL (e.g. cplId in fileMetadata is not 0) upload returns error.
         //       There are currently no api endpoints to set up up this test.
         
@@ -204,6 +213,7 @@ public void testAddProvFile() {
         deleteProvJson.then().assertThat()
                 .statusCode(FORBIDDEN.getStatusCode()); //cannot delete json of a published dataset
 
+        UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
 // Command removed, redundant        
 //        Response deleteProvFreeForm = UtilIT.deleteProvFreeForm(dataFileId.toString(), apiTokenForDepositor);
 //        deleteProvFreeForm.prettyPrint();

From 6485444c26ecc2bab226aae75c7ee99f0b1b45e6 Mon Sep 17 00:00:00 2001
From: Eryk Kulikowski <eryk.kulikowski@kuleuven.be>
Date: Fri, 10 Nov 2023 15:26:02 +0100
Subject: [PATCH 140/546] single retry when datafile checksum validation fails

---
 .../harvard/iq/dataverse/util/FileUtil.java   | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index 1ad389fb0e2..8e408e51f39 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -1449,6 +1449,16 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) {
     	return s3io;
     }
     
+    private static InputStream getInputStream(StorageIO<DataFile> storage, boolean isTabularData) throws IOException {
+        if (!isTabularData) {
+            return storage.getInputStream();
+        } else {
+            // if this is a tabular file, read the preserved original "auxiliary file"
+            // instead:
+            return storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
+        }
+    }
+
     public static void validateDataFileChecksum(DataFile dataFile) throws IOException {
         DataFile.ChecksumType checksumType = dataFile.getChecksumType();
         if (checksumType == null) {
@@ -1462,14 +1472,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
 
         try {
             storage.open(DataAccessOption.READ_ACCESS);
-
-            if (!dataFile.isTabularData()) {
-                in = storage.getInputStream();
-            } else {
-                // if this is a tabular file, read the preserved original "auxiliary file"
-                // instead:
-                in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
-            }
+            in = getInputStream(storage, dataFile.isTabularData());
         } catch (IOException ioex) {
             in = null;
         }
@@ -1484,7 +1487,18 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
         try {
             recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
         } catch (RuntimeException rte) {
+            logger.log(Level.SEVERE, "failed to calculated checksum, one retry", rte);
             recalculatedChecksum = null;
+            IOUtils.closeQuietly(in);
+            storage = dataFile.getStorageIO();
+            try {
+                storage.open(DataAccessOption.READ_ACCESS);
+                in = getInputStream(storage, dataFile.isTabularData());
+                recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
+            } catch (RuntimeException rte2) {
+                logger.log(Level.SEVERE, "failed to calculated checksum, no retry", rte2);
+                recalculatedChecksum = null;
+            }
         } finally {
             IOUtils.closeQuietly(in);
         }

From 160f9f77f7a958f28faa2bafdfff775b69dff695 Mon Sep 17 00:00:00 2001
From: Eryk Kulikowski <eryk.kulikowski@kuleuven.be>
Date: Fri, 10 Nov 2023 16:19:40 +0100
Subject: [PATCH 141/546] rename getInputStream -> getOriginalFileInputStream
 and moved storage.open inside that method

---
 .../java/edu/harvard/iq/dataverse/util/FileUtil.java     | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index 8e408e51f39..4fdeed421f1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -1449,7 +1449,8 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) {
     	return s3io;
     }
     
-    private static InputStream getInputStream(StorageIO<DataFile> storage, boolean isTabularData) throws IOException {
+    private static InputStream getOriginalFileInputStream(StorageIO<DataFile> storage, boolean isTabularData) throws IOException {
+        storage.open(DataAccessOption.READ_ACCESS);
         if (!isTabularData) {
             return storage.getInputStream();
         } else {
@@ -1471,8 +1472,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
         InputStream in = null;
 
         try {
-            storage.open(DataAccessOption.READ_ACCESS);
-            in = getInputStream(storage, dataFile.isTabularData());
+            in = getOriginalFileInputStream(storage, dataFile.isTabularData());
         } catch (IOException ioex) {
             in = null;
         }
@@ -1492,8 +1492,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
             IOUtils.closeQuietly(in);
             storage = dataFile.getStorageIO();
             try {
-                storage.open(DataAccessOption.READ_ACCESS);
-                in = getInputStream(storage, dataFile.isTabularData());
+                in = getOriginalFileInputStream(storage, dataFile.isTabularData());
                 recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
             } catch (RuntimeException rte2) {
                 logger.log(Level.SEVERE, "failed to calculated checksum, no retry", rte2);

From eee3d87716dab80a12e650a88497ecfb1d8ecb07 Mon Sep 17 00:00:00 2001
From: Eryk Kulikowski <eryk.kulikowski@kuleuven.be>
Date: Fri, 10 Nov 2023 16:41:31 +0100
Subject: [PATCH 142/546] try-with-resources

---
 .../harvard/iq/dataverse/util/FileUtil.java   | 29 +++++--------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index 4fdeed421f1..26cfb97a0b6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -1469,37 +1469,24 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
         }
 
         StorageIO<DataFile> storage = dataFile.getStorageIO();
-        InputStream in = null;
+        String recalculatedChecksum = null;
 
-        try {
-            in = getOriginalFileInputStream(storage, dataFile.isTabularData());
+        try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) {
+            recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType);
         } catch (IOException ioex) {
-            in = null;
-        }
-
-        if (in == null) {
             String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString()));
             logger.log(Level.INFO, info);
             throw new IOException(info);
-        }
-
-        String recalculatedChecksum = null;
-        try {
-            recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
         } catch (RuntimeException rte) {
             logger.log(Level.SEVERE, "failed to calculated checksum, one retry", rte);
             recalculatedChecksum = null;
-            IOUtils.closeQuietly(in);
+        }
+
+        if (recalculatedChecksum == null) { //retry once
             storage = dataFile.getStorageIO();
-            try {
-                in = getOriginalFileInputStream(storage, dataFile.isTabularData());
-                recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
-            } catch (RuntimeException rte2) {
-                logger.log(Level.SEVERE, "failed to calculated checksum, no retry", rte2);
-                recalculatedChecksum = null;
+            try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) {
+                recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType);
             }
-        } finally {
-            IOUtils.closeQuietly(in);
         }
 
         if (recalculatedChecksum == null) {

From 50c3620106b1b4ab98242c9d9e4e2d9914d70d48 Mon Sep 17 00:00:00 2001
From: Eryk Kulikowski <eryk.kulikowski@kuleuven.be>
Date: Fri, 10 Nov 2023 16:45:05 +0100
Subject: [PATCH 143/546] fixed compile error

---
 src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index 26cfb97a0b6..6c540e88fb3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -1504,6 +1504,7 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
             boolean fixed = false;
             if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) {
                 // try again, see if the .orig file happens to be there:
+                InputStream in = null;
                 try {
                     in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
                 } catch (IOException ioex) {

From 7715ff9aa78364d2d49296ceb079145838cb401c Mon Sep 17 00:00:00 2001
From: Eryk Kulikowski <eryk.kulikowski@kuleuven.be>
Date: Fri, 10 Nov 2023 16:49:09 +0100
Subject: [PATCH 144/546] try-with-resources

---
 .../harvard/iq/dataverse/util/FileUtil.java    | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index 6c540e88fb3..df0c3e5a019 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -1504,20 +1504,12 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio
             boolean fixed = false;
             if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) {
                 // try again, see if the .orig file happens to be there:
-                InputStream in = null;
-                try {
-                    in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
-                } catch (IOException ioex) {
-                    in = null;
+                try (InputStream in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION)) {
+                    recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
+                } catch (RuntimeException rte) {
+                    recalculatedChecksum = null;
                 }
-                if (in != null) {
-                    try {
-                        recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType);
-                    } catch (RuntimeException rte) {
-                        recalculatedChecksum = null;
-                    } finally {
-                        IOUtils.closeQuietly(in);
-                    }
+                if (recalculatedChecksum != null) {
                     // try again:
                     if (recalculatedChecksum.equals(dataFile.getChecksumValue())) {
                         fixed = true;

From aa7eceeb762eca045127cf91acb35d6c62b00d79 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 09:43:52 -0500
Subject: [PATCH 145/546] add return null if commandexception

---
 src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
index 9fb584a9133..8b09291d052 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
@@ -320,7 +320,7 @@ public String save() {
             logger.info("Guestbook Page Command Exception. Dataverse: " + dataverse.getName());
             logger.info(ex.toString());
             FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("guestbook.save.fail"), " - " + ex.toString()));
-            //logger.severe(ex.getMessage());
+            return null;
         }
         editMode = null;
         String msg = (create)? BundleUtil.getStringFromBundle("guestbook.create"): BundleUtil.getStringFromBundle("guestbook.save");

From ba4d178f5c541ec88ea0879ec5c715bda529f2c9 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 11:25:30 -0500
Subject: [PATCH 146/546] allow longer custom questions

---
 src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java
index 2cb6f27c3e4..d880da5b4a8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java
+++ b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java
@@ -2,7 +2,7 @@
 import java.io.Serializable;
 import java.util.List;
 import jakarta.persistence.*;
-import org.hibernate.validator.constraints.NotBlank;
+import jakarta.validation.constraints.NotBlank;
 
 /**
  *
@@ -41,7 +41,7 @@ public void setId(Long id) {
     private String questionType;
     
     @NotBlank(message = "{custom.questiontext}")
-    @Column( nullable = false )
+    @Column( nullable = false, columnDefinition = "TEXT")
     private String questionString;
     private boolean required;
     

From 6cb1b9a961e5c4c5179f425c4c89dc478c29341a Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 11:25:48 -0500
Subject: [PATCH 147/546] fix gb error message display

---
 src/main/webapp/resources/iqbs/messages.xhtml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/webapp/resources/iqbs/messages.xhtml b/src/main/webapp/resources/iqbs/messages.xhtml
index bd17cf34d21..f8e1f5e8e9d 100644
--- a/src/main/webapp/resources/iqbs/messages.xhtml
+++ b/src/main/webapp/resources/iqbs/messages.xhtml
@@ -63,7 +63,7 @@
                         <f:convertDateTime pattern="yyyy-MM-dd HH:mm:ss" type="date" />
                     </h:outputText>
                     <strong>Server: </strong>
-                    #{systemConfig.dataverseServer}
+                    #{systemConfig.dataverseSiteUrl}
                 </ui:fragment>
             </div>
             #{msg.rendered()}

From 1652764b44abbdd887a2957ec42ad31e108dd864 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 13:36:27 -0500
Subject: [PATCH 148/546] flyway script and note

---
 .../10117-support for longer custom questions in guestbooks.md   | 1 +
 .../V6.0.0.4__10017-failure-with-long-custom-question.sql        | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 doc/release-notes/10117-support for longer custom questions in guestbooks.md
 create mode 100644 src/main/resources/db/migration/V6.0.0.4__10017-failure-with-long-custom-question.sql

diff --git a/doc/release-notes/10117-support for longer custom questions in guestbooks.md b/doc/release-notes/10117-support for longer custom questions in guestbooks.md
new file mode 100644
index 00000000000..ab5e84d78fe
--- /dev/null
+++ b/doc/release-notes/10117-support for longer custom questions in guestbooks.md	
@@ -0,0 +1 @@
+Custom questions in Guestbooks can now be more than 255 characters and the bug causing a silent failure when questions were longer than this limit has been fixed.
\ No newline at end of file
diff --git a/src/main/resources/db/migration/V6.0.0.4__10017-failure-with-long-custom-question.sql b/src/main/resources/db/migration/V6.0.0.4__10017-failure-with-long-custom-question.sql
new file mode 100644
index 00000000000..9a3002378b3
--- /dev/null
+++ b/src/main/resources/db/migration/V6.0.0.4__10017-failure-with-long-custom-question.sql
@@ -0,0 +1 @@
+ALTER TABLE customquestion ALTER COLUMN questionstring TYPE text;

From 2424a55235cc015cfba283146dc2f97d2b8699af Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 13:38:23 -0500
Subject: [PATCH 149/546] Revert "flyway script and note"

This reverts commit 1652764b44abbdd887a2957ec42ad31e108dd864.
---
 .../10117-support for longer custom questions in guestbooks.md   | 1 -
 .../V6.0.0.4__10017-failure-with-long-custom-question.sql        | 1 -
 2 files changed, 2 deletions(-)
 delete mode 100644 doc/release-notes/10117-support for longer custom questions in guestbooks.md
 delete mode 100644 src/main/resources/db/migration/V6.0.0.4__10017-failure-with-long-custom-question.sql

diff --git a/doc/release-notes/10117-support for longer custom questions in guestbooks.md b/doc/release-notes/10117-support for longer custom questions in guestbooks.md
deleted file mode 100644
index ab5e84d78fe..00000000000
--- a/doc/release-notes/10117-support for longer custom questions in guestbooks.md	
+++ /dev/null
@@ -1 +0,0 @@
-Custom questions in Guestbooks can now be more than 255 characters and the bug causing a silent failure when questions were longer than this limit has been fixed.
\ No newline at end of file
diff --git a/src/main/resources/db/migration/V6.0.0.4__10017-failure-with-long-custom-question.sql b/src/main/resources/db/migration/V6.0.0.4__10017-failure-with-long-custom-question.sql
deleted file mode 100644
index 9a3002378b3..00000000000
--- a/src/main/resources/db/migration/V6.0.0.4__10017-failure-with-long-custom-question.sql
+++ /dev/null
@@ -1 +0,0 @@
-ALTER TABLE customquestion ALTER COLUMN questionstring TYPE text;

From 6fc0f358ae53fec619b93a6deafd9297ce6a9294 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 13:38:30 -0500
Subject: [PATCH 150/546] Revert "fix gb error message display"

This reverts commit 6cb1b9a961e5c4c5179f425c4c89dc478c29341a.
---
 src/main/webapp/resources/iqbs/messages.xhtml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/webapp/resources/iqbs/messages.xhtml b/src/main/webapp/resources/iqbs/messages.xhtml
index f8e1f5e8e9d..bd17cf34d21 100644
--- a/src/main/webapp/resources/iqbs/messages.xhtml
+++ b/src/main/webapp/resources/iqbs/messages.xhtml
@@ -63,7 +63,7 @@
                         <f:convertDateTime pattern="yyyy-MM-dd HH:mm:ss" type="date" />
                     </h:outputText>
                     <strong>Server: </strong>
-                    #{systemConfig.dataverseSiteUrl}
+                    #{systemConfig.dataverseServer}
                 </ui:fragment>
             </div>
             #{msg.rendered()}

From 00a17071c358b7ebee09e77130cb7319c665dfb5 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 13:38:36 -0500
Subject: [PATCH 151/546] Revert "allow longer custom questions"

This reverts commit ba4d178f5c541ec88ea0879ec5c715bda529f2c9.
---
 src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java
index d880da5b4a8..2cb6f27c3e4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java
+++ b/src/main/java/edu/harvard/iq/dataverse/CustomQuestion.java
@@ -2,7 +2,7 @@
 import java.io.Serializable;
 import java.util.List;
 import jakarta.persistence.*;
-import jakarta.validation.constraints.NotBlank;
+import org.hibernate.validator.constraints.NotBlank;
 
 /**
  *
@@ -41,7 +41,7 @@ public void setId(Long id) {
     private String questionType;
     
     @NotBlank(message = "{custom.questiontext}")
-    @Column( nullable = false, columnDefinition = "TEXT")
+    @Column( nullable = false )
     private String questionString;
     private boolean required;
     

From d3fbee58262ac439a0b10f4ca7e1494dea4a6c5d Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 13:38:43 -0500
Subject: [PATCH 152/546] Revert "add return null if commandexception"

This reverts commit aa7eceeb762eca045127cf91acb35d6c62b00d79.
---
 src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
index 8b09291d052..9fb584a9133 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
@@ -320,7 +320,7 @@ public String save() {
             logger.info("Guestbook Page Command Exception. Dataverse: " + dataverse.getName());
             logger.info(ex.toString());
             FacesContext.getCurrentInstance().addMessage(null, new FacesMessage(FacesMessage.SEVERITY_FATAL, BundleUtil.getStringFromBundle("guestbook.save.fail"), " - " + ex.toString()));
-            return null;
+            //logger.severe(ex.getMessage());
         }
         editMode = null;
         String msg = (create)? BundleUtil.getStringFromBundle("guestbook.create"): BundleUtil.getStringFromBundle("guestbook.save");

From 4b347c7ec13591ba38ffa55fbde394cce2b8bcfe Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 17:47:17 -0500
Subject: [PATCH 153/546] doc update

---
 .../source/developers/big-data-support.rst    | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst
index 1917967b3f3..d38f7f27a68 100644
--- a/doc/sphinx-guides/source/developers/big-data-support.rst
+++ b/doc/sphinx-guides/source/developers/big-data-support.rst
@@ -149,20 +149,30 @@ Globus File Transfer
 
 Note: Globus file transfer is still experimental but feedback is welcome! See :ref:`support`.
 
-Users can transfer files via `Globus <ttps://www.globus.org>`_ into and out of datasets when their Dataverse installation is configured to use a Globus accessible S3 store and a community-developed `dataverse-globus <https://github.com/scholarsportal/dataverse-globus>`_ "transfer" app has been properly installed and configured.
+Users can transfer files via `Globus <ttps://www.globus.org>`_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) 
+and a community-developed `dataverse-globus <https://github.com/scholarsportal/dataverse-globus>`_ app has been properly installed and configured.
 
 Due to differences in the access control models of a Dataverse installation and Globus, enabling the Globus capability on a store will disable the ability to restrict and embargo files in that store.
 
-As Globus aficionados know, Globus endpoints can be in a variety of places, from data centers to personal computers. This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa).
+Globus endpoints can be in a variety of places, from data centers to personal computers. 
+This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa).
 
-Globus transfer uses a very efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files:
+Globus transfer uses an efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files:
 
 * robust file transfer capable of restarting after network or endpoint failures
 * third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation
 
-Globus transfer requires use of the Globus S3 connector which requires a paid Globus subscription at the host institution. Users will need a Globus account which could be obtained via their institution or directly from Globus (at no cost).
+Dataverse supports three options for using Globus, two involving transfer to Dataverse-managed endpoints and one allowing Dataverse to reference files on remote endpoints.
+Dataverse-managed endpoints must be Globus 'guest collections' hosted on either a file-system-based endpoint or an S3-based endpoint (the latter requires use of the Globus
+S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint.
+Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost).
 
-The setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document <https://docs.google.com/document/d/1mwY3IVv8_wTspQC0d4ddFrD2deqwr-V5iAGHgOy4Ch8/edit?usp=sharing>`_ and the references therein.
+For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. 
+Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access).
+
+All of Dataverse's Globus capabilities are now store-based (see the store documentation) and therefore different collections/datasets can be configured to use different Globus-capable stores (or normal file, S3 stores, etc.)
+
+More details of the setup required to enable Globus is described in the `Community Dataverse-Globus Setup and Configuration document <https://docs.google.com/document/d/1mwY3IVv8_wTspQC0d4ddFrD2deqwr-V5iAGHgOy4Ch8/edit?usp=sharing>`_ and the references therein.
 
 As described in that document, Globus transfers can be initiated by choosing the Globus option in the dataset upload panel. (Globus, which does asynchronous transfers, is not available during dataset creation.) Analogously, "Globus Transfer" is one of the download options in the "Access Dataset" menu and optionally the file landing page download menu (if/when supported in the dataverse-globus app).
 

From 6ad55eb689071921857a9f97135e97dd2e71c076 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 17:50:16 -0500
Subject: [PATCH 154/546] Support multiple ref endpoints for non-managed case

---
 .../harvard/iq/dataverse/api/Datasets.java    |  72 ++++----
 .../dataaccess/GlobusAccessibleStore.java     |  14 +-
 .../dataaccess/GlobusOverlayAccessIO.java     | 166 +++++++++++++-----
 .../dataaccess/RemoteOverlayAccessIO.java     |  47 +++--
 .../dataverse/globus/GlobusServiceBean.java   |  31 +++-
 5 files changed, 226 insertions(+), 104 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index b1c528f3fd9..a57f373f106 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3601,13 +3601,11 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
         }
 
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
-        if (managed) {
-
-            allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusTransferPaths")
+        String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths";
+        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName)
                     .add(URLTokenUtil.HTTP_METHOD, "POST")
-                    .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusTransferPaths")
+                    .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusPaths")
                     .add(URLTokenUtil.TIMEOUT, 300));
-        }
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles")
                 .add(URLTokenUtil.HTTP_METHOD, "POST")
                 .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles")
@@ -3632,7 +3630,7 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
      */
     @POST
     @AuthRequired
-    @Path("{id}/requestGlobusTransferPaths")
+    @Path("{id}/requestGlobusPaths")
     @Consumes(MediaType.APPLICATION_JSON)
     @Produces(MediaType.APPLICATION_JSON)
     public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody
@@ -3666,35 +3664,45 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP
         } catch (WrappedResponse wr) {
             return wr.getResponse();
         }
-
-        if(!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) {
-            return badRequest("This dataset does not have managed Globus storage");
-        }
-            
         if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset)
                 .canIssue(UpdateDatasetVersionCommand.class)) {
-            try {
+
             JsonObject params = JsonUtil.getJsonObject(jsonBody);
-            String principal = params.getString("principal");
-            int numberOfPaths = params.getInt("numberOfFiles");
-            if(numberOfPaths <=0) {
-                return badRequest("numberOfFiles must be positive");
-            }
-
-            JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths);
-            switch (response.getInt("status")) {
-            case 201:
-                return ok(response.getJsonObject("paths"));
-            case 400:
-                return badRequest("Unable to grant permission");
-            case 409:
-                return conflict("Permission already exists");
-            default:
-                return error(null, "Unexpected error when granting permission");
-            }
-            } catch (NullPointerException|ClassCastException e) {
-                return badRequest("Error retrieving principal and numberOfFiles from JSON request body");
-                
+            if (!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) {
+                try {
+                    JsonArray referencedFiles = params.getJsonArray("referencedFiles");
+                    if (referencedFiles == null || referencedFiles.size() == 0) {
+                        return badRequest("No referencedFiles specified");
+                    }
+                    JsonObject fileMap = globusService.requestReferenceFileIdentifiers(dataset, referencedFiles);
+                    return (ok(fileMap));
+                } catch (Exception e) {
+                    return badRequest(e.getLocalizedMessage());
+                }
+            } else {
+                try {
+                    String principal = params.getString("principal");
+                    int numberOfPaths = params.getInt("numberOfFiles");
+                    if (numberOfPaths <= 0) {
+                        return badRequest("numberOfFiles must be positive");
+                    }
+
+                    JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths);
+                    switch (response.getInt("status")) {
+                    case 201:
+                        return ok(response.getJsonObject("paths"));
+                    case 400:
+                        return badRequest("Unable to grant permission");
+                    case 409:
+                        return conflict("Permission already exists");
+                    default:
+                        return error(null, "Unexpected error when granting permission");
+                    }
+
+                } catch (NullPointerException | ClassCastException e) {
+                    return badRequest("Error retrieving principal and numberOfFiles from JSON request body");
+
+                }
             }
         } else {
             return forbidden("User doesn't have permission to upload to this dataset");
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
index 1d98044b2b5..afc7556481a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
@@ -7,8 +7,7 @@
 public interface GlobusAccessibleStore {
 
     static final String MANAGED = "managed";
-    static final String GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH = "globus-transfer-endpoint-with-basepath";
-    static final String GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS = "globus-reference-endpoints-with-basepaths";
+    static final String TRANSFER_ENDPOINT_WITH_BASEPATH = "transfer-endpoint-with-basepath";
     static final String GLOBUS_TOKEN = "globus-token";
     
     public static boolean isDataverseManaged(String driverId) {
@@ -16,37 +15,36 @@ public static boolean isDataverseManaged(String driverId) {
     }
     
     public static String getTransferEndpointId(String driverId) {
-        String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH);
+        String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH);
         int pathStart = endpointWithBasePath.indexOf("/");
         return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
     }
     
     public static String getTransferPath(String driverId) {
-        String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH);
+        String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH);
         int pathStart = endpointWithBasePath.indexOf("/");
         return pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "";
 
     }
 
     public static JsonArray getReferenceEndpointsWithPaths(String driverId) {
-        String[] endpoints = StorageIO.getConfigParamForDriver(driverId, GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*");
+        String[] endpoints = StorageIO.getConfigParamForDriver(driverId, RemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*");
         JsonArrayBuilder builder = Json.createArrayBuilder();
         for(int i=0;i<endpoints.length;i++) {
             builder.add(endpoints[i]);
         }
         return builder.build();
     }
-
     
     public static boolean acceptsGlobusTransfers(String storeId) {
-        if(StorageIO.getConfigParamForDriver(storeId, GLOBUS_TRANSFER_ENDPOINT_WITH_BASEPATH) != null) {
+        if(StorageIO.getConfigParamForDriver(storeId, TRANSFER_ENDPOINT_WITH_BASEPATH) != null) {
             return true;
         }
         return false;
     }
 
     public static boolean allowsGlobusReferences(String storeId) {
-        if(StorageIO.getConfigParamForDriver(storeId, GLOBUS_REFERENCE_ENDPOINTS_WITH_BASEPATHS) != null) {
+        if(StorageIO.getConfigParamForDriver(storeId, RemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS) != null) {
             return true;
         }
         return false;
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 97884bd5722..0dec7133fb5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -12,6 +12,8 @@
 import java.io.InputStream;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.nio.file.InvalidPathException;
+import java.nio.file.Paths;
 import java.util.Arrays;
 import java.util.logging.Logger;
 
@@ -39,7 +41,8 @@
  * <globusEndpointId/basepath>/<dataset authority>/<dataset
  * identifier>/<baseStorageIdentifier>
  *
- * baseUrl: globus://<globusEndpointId/basePath>
+ * transfer and reference endpoint formats: <globusEndpointId/basePath>
+ * reference endpoints separated by a comma
  * 
  */
 public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAccessIO<T> implements GlobusAccessibleStore {
@@ -50,7 +53,7 @@ public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAcce
      * Dataverse/the globus app manage file locations, access controls, deletion,
      * etc.
      */
-    private boolean dataverseManaged = false;
+    private Boolean dataverseManaged = null;
 
     private String relativeDirectoryPath;
     
@@ -58,22 +61,59 @@ public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAcce
     
     private String filename;
 
+    private String[] allowedEndpoints;
     private String endpoint;
 
     public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
-        dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId);
     }
 
+
+    public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException {
+        this.driverId = driverId;
+        configureStores(null, driverId, storageLocation);
+        if (isManaged()) {
+            String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation);
+            path = parts[1];
+        } else {
+            this.setIsLocalFile(false);
+            path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
+            validatePath(path);
+            logger.fine("Referenced path: " + path);
+        }
+    }
+    private boolean isManaged() {
+        if(dataverseManaged==null) {
+            dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId);
+        }
+        return dataverseManaged;
+    }
+    
+    private String retrieveGlobusAccessToken() {
+        String globusToken = getConfigParam(GlobusAccessibleStore.GLOBUS_TOKEN);
+        
+
+        AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
+        return accessToken.getOtherTokens().get(0).getAccessToken();
+    }
+
+
     private void parsePath() {
         int filenameStart = path.lastIndexOf("/") + 1;
-        String endpointWithBasePath = baseUrl.substring(baseUrl.lastIndexOf(DataAccess.SEPARATOR) + 3);
+        String endpointWithBasePath = null;
+        if (!isManaged()) {
+            endpointWithBasePath = findMatchingEndpoint(path, allowedEndpoints);
+        } else {
+            endpointWithBasePath = allowedEndpoints[0];
+        }
+        //String endpointWithBasePath = baseEndpointPath.substring(baseEndpointPath.lastIndexOf(DataAccess.SEPARATOR) + 3);
         int pathStart = endpointWithBasePath.indexOf("/");
         logger.info("endpointWithBasePath: " + endpointWithBasePath);
         endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : "");
         logger.info("endpointPath: " + endpointPath);
+        
 
-        if (dataverseManaged && (dvObject!=null)) {
+        if (isManaged() && (dvObject!=null)) {
             
             Dataset ds = null;
             if (dvObject instanceof Dataset) {
@@ -95,40 +135,36 @@ private void parsePath() {
         
     }
 
-    public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException {
-        this.driverId = driverId;
-        configureStores(null, driverId, storageLocation);
-        this.dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId);
-        if (dataverseManaged) {
-            String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation);
-            path = parts[1];
-        } else {
-            this.setIsLocalFile(false);
-            path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
-            validatePath(path);
-            logger.fine("Relative path: " + path);
+    private static String findMatchingEndpoint(String path, String[] allowedEndpoints) {
+        for(int i=0;i<allowedEndpoints.length;i++) {
+            if (path.startsWith(allowedEndpoints[i])) {
+                return allowedEndpoints[i];
+            }
         }
+        logger.warning("Could not find matching endpoint for path: " + path);
+        return null;
     }
-    
-    private String retrieveGlobusAccessToken() {
-        String globusToken = getConfigParam(GlobusAccessibleStore.GLOBUS_TOKEN);
-        
-
-        AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
-        return accessToken.getOtherTokens().get(0).getAccessToken();
-    }
-
 
+    @Override
+    protected void validatePath(String relPath) throws IOException {
+        if (isManaged()) {
+            if (!usesStandardNamePattern(relPath)) {
+                throw new IOException("Unacceptable identifier pattern in submitted identifier: " + relPath);
+            }
+        } else {
+            try {
+                String endpoint = findMatchingEndpoint(relPath, allowedEndpoints);
+                logger.info(endpoint + "  " + relPath);
 
-    private void validatePath(String relPath) throws IOException {
-        try {
-            URI absoluteURI = new URI(baseUrl + "/" + relPath);
-            if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
-                throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url");
+                if (endpoint == null || !Paths.get(endpoint, relPath).normalize().startsWith(endpoint)) {
+                    throw new IOException(
+                            "storageidentifier doesn't start with one of " + this.driverId + "'s allowed endpoints");
+                }
+            } catch (InvalidPathException e) {
+                throw new IOException("Could not interpret storageidentifier in globus store " + this.driverId);
             }
-        } catch (URISyntaxException use) {
-            throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId);
         }
+
     }
 
     // Call the Globus API to get the file size
@@ -245,7 +281,7 @@ public void delete() throws IOException {
             }
         } catch (Exception e) {
             logger.warning(e.getMessage());
-            throw new IOException("Error deleting: " + baseUrl + "/" + path);
+            throw new IOException("Error deleting: " + endpoint + "/" + path);
 
         }
 
@@ -258,13 +294,14 @@ public void delete() throws IOException {
     public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName)
             throws IOException {
 //Fix
+        parsePath();
         // ToDo - support remote auxiliary Files
         if (auxiliaryTag == null) {
             String secretKey = getConfigParam(SECRET_KEY);
             if (secretKey == null) {
-                return baseUrl + "/" + path;
+                return endpoint + "/" + path;
             } else {
-                return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey);
+                return UrlSignerUtil.signUrl(endpoint + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey);
             }
         } else {
             return baseStore.generateTemporaryDownloadUrl(auxiliaryTag, auxiliaryType, auxiliaryFileName);
@@ -273,10 +310,10 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
 
     static boolean isValidIdentifier(String driverId, String storageId) {
         String baseIdentifier = storageId.substring(storageId.lastIndexOf("//") + 2);
-        String baseUrl = getConfigParamForDriver(driverId, BASE_URL);
-        if (baseUrl == null) {
-            return false;
-        }
+        try {
+            
+        String[] allowedEndpoints =getAllowedEndpoints(driverId);
+            
         // Internally managed endpoints require standard name pattern (submitted via
         // /addFile(s) api)
         if (GlobusAccessibleStore.isDataverseManaged(driverId)) {
@@ -290,30 +327,65 @@ static boolean isValidIdentifier(String driverId, String storageId) {
         }
         // Remote endpoints require a valid URI within the baseUrl
         try {
-            URI absoluteURI = new URI(baseUrl + "/" + baseIdentifier);
-            if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
-                logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId);
+            String endpoint = findMatchingEndpoint(baseIdentifier, allowedEndpoints);
+            
+            if(endpoint==null || !Paths.get(endpoint, baseIdentifier).normalize().startsWith(endpoint)) {
+                logger.warning("storageidentifier doesn't start with one of " + driverId + "'s allowed endpoints");
                 return false;
             }
-        } catch (URISyntaxException use) {
-            logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId);
-            logger.warning(use.getLocalizedMessage());
+        } catch (InvalidPathException e) {
+            logger.warning("Could not interpret storageidentifier in globus store " + driverId);
             return false;
         }
         return true;
+        } catch (IOException e) {
+            return false;
+        }
+        
     }
 
     @Override
     public String getStorageLocation() throws IOException {
         parsePath();
-        if (dataverseManaged) {
+        if (isManaged()) {
             return this.driverId + DataAccess.SEPARATOR + relativeDirectoryPath + "/" + filename;
         } else {
             return super.getStorageLocation();
         }
     }
     
+    /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but
+     * the derived GlobusOverlayAccessIO can support multiple endpoints.
+     * @throws IOException
+     */
+    @Override
+    protected void configureEndpoints() throws IOException {
+        allowedEndpoints = getAllowedEndpoints(this.driverId);
+        logger.info("Set allowed endpoints: " + Arrays.toString(allowedEndpoints));
+    }
     
+    private static String[] getAllowedEndpoints(String driverId) throws IOException {
+        String[] allowedEndpoints = null;
+        if (GlobusAccessibleStore.isDataverseManaged(driverId)) {
+            allowedEndpoints = new String[1];
+            allowedEndpoints[0] = getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH);
+            if (allowedEndpoints[0] == null) {
+                throw new IOException(
+                        "dataverse.files." + driverId + "." + TRANSFER_ENDPOINT_WITH_BASEPATH + " is required");
+            }
+        } else {
+            String rawEndpoints = getConfigParamForDriver(driverId, REFERENCE_ENDPOINTS_WITH_BASEPATHS);
+            if (rawEndpoints != null) {
+                allowedEndpoints = getConfigParamForDriver(driverId, REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*");
+            }
+            if (rawEndpoints == null || allowedEndpoints.length == 0) {
+                throw new IOException("dataverse.files." + driverId + ".base-url is required");
+            }
+        }
+        return allowedEndpoints;
+    }
+
+
     public static void main(String[] args) {
         System.out.println("Running the main method");
         if (args.length > 0) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index aafab038ae2..5463254140d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -65,7 +65,10 @@ public class RemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
 
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
 
+    // A single baseUrl of the form http(s)://<host(:port)/basePath> where this store can reference data
     static final String BASE_URL = "base-url";
+    // Multiple endpoints where data can be referenced from. Multiple endpoints are separated by a comma. Multiple endpoints are only supported by the GlobalOverlayAccessIO at present. 
+    static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths";
     static final String BASE_STORE = "base-store";
     static final String SECRET_KEY = "secret-key";
     static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes";
@@ -74,7 +77,7 @@ public class RemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
     
     protected StorageIO<DvObject> baseStore = null;
     protected String path = null;
-    protected String baseUrl = null;
+    private String baseUrl = null;
 
     protected static HttpClientContext localContext = HttpClientContext.create();
     protected PoolingHttpClientConnectionManager cm = null;
@@ -110,7 +113,7 @@ public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOE
         logger.fine("Relative path: " + path);
     }
 
-    private void validatePath(String relPath) throws IOException {
+    protected void validatePath(String relPath) throws IOException {
         try {
             URI absoluteURI = new URI(baseUrl + "/" + relPath);
             if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
@@ -457,19 +460,8 @@ int getUrlExpirationMinutes() {
     }
 
     protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
-        baseUrl = getConfigParam(BASE_URL);
-        if (baseUrl == null) {
-            throw new IOException("dataverse.files." + this.driverId + ".base-url is required");
-        } else {
-            try {
-                new URI(baseUrl);
-            } catch (Exception e) {
-                logger.warning(
-                        "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage());
-                throw new IOException("Can't interpret base-url as a URI");
-            }
-
-        }
+        configureEndpoints();
+        
 
         if (baseStore == null) {
             String baseDriverId = getBaseStoreIdFor(driverId);
@@ -543,6 +535,31 @@ protected void configureStores(DataAccessRequest req, String driverId, String st
         }
     }
 
+    /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but
+     * the derived GlobusOverlayAccessIO can support multiple endpoints.
+     * @throws IOException
+     */
+    protected void configureEndpoints() throws IOException {
+        baseUrl = getConfigParam(BASE_URL);
+        if (baseUrl == null) {
+            //Will accept the first endpoint using the newer setting
+            baseUrl = getConfigParam(REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*")[0];
+            if (baseUrl == null) {
+                throw new IOException("dataverse.files." + this.driverId + ".base-url is required");
+            }
+        }
+        if (baseUrl != null) {
+            try {
+                new URI(baseUrl);
+            } catch (Exception e) {
+                logger.warning(
+                        "Trouble interpreting base-url for store: " + this.driverId + " : " + e.getLocalizedMessage());
+                throw new IOException("Can't interpret base-url as a URI");
+            }
+
+        }
+    }
+
     // Convenience method to assemble the path, starting with the DOI
     // authority/identifier/, that is needed to create a base store via
     // DataAccess.getDirectStorageIO - the caller has to add the store type specific
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index dab0e36852c..3dee3bd498f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -6,7 +6,6 @@
 import com.google.gson.FieldNamingPolicy;
 import com.google.gson.GsonBuilder;
 import edu.harvard.iq.dataverse.*;
-
 import jakarta.ejb.Asynchronous;
 import jakarta.ejb.EJB;
 import jakarta.ejb.Stateless;
@@ -20,6 +19,8 @@
 import jakarta.json.JsonObject;
 import jakarta.json.JsonObjectBuilder;
 import jakarta.json.JsonPatch;
+import jakarta.json.JsonString;
+import jakarta.json.JsonValue.ValueType;
 import jakarta.json.stream.JsonParsingException;
 import jakarta.servlet.http.HttpServletRequest;
 import jakarta.ws.rs.HttpMethod;
@@ -57,7 +58,6 @@
 import edu.harvard.iq.dataverse.authorization.users.User;
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
-import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
@@ -284,6 +284,33 @@ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int
             return response.build();
     }
 
+    public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) {
+        String driverId = dataset.getEffectiveStorageDriverId();
+        JsonArray endpoints = GlobusAccessibleStore.getReferenceEndpointsWithPaths(driverId);
+
+        JsonObjectBuilder fileMap = Json.createObjectBuilder();
+        referencedFiles.forEach(value -> {
+            if (value.getValueType() != ValueType.STRING) {
+                throw new JsonParsingException("ReferencedFiles must be strings", null);
+            }
+            String referencedFile = ((JsonString) value).getString();
+            boolean valid = false;
+            for (int i = 0; i < endpoints.size(); i++) {
+                if (referencedFile.startsWith(((JsonString) endpoints.get(i)).getString())) {
+                    valid = true;
+                }
+            }
+            if (!valid) {
+                throw new IllegalArgumentException(
+                        "Referenced file " + referencedFile + " is not in an allowed endpoint/path");
+            }
+            String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId);
+            fileMap.add(referencedFile,
+                    storageIdentifier + "//" + referencedFile);
+        });
+        return fileMap.build();
+    }
+
     //Single cache of open rules/permission requests
     private final Cache<String, Long> rulesCache = Caffeine.newBuilder()
             .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))

From 48f02dde7f22b21e28c8d635df904b79532f042a Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 10 Nov 2023 17:56:56 -0500
Subject: [PATCH 155/546] handle file not found case

---
 .../iq/dataverse/dataaccess/GlobusOverlayAccessIO.java     | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 0dec7133fb5..f42f5443108 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -25,6 +25,7 @@
 import org.apache.http.util.EntityUtils;
 
 import jakarta.json.Json;
+import jakarta.json.JsonArray;
 import jakarta.json.JsonObject;
 import jakarta.json.JsonObjectBuilder;
 
@@ -189,7 +190,11 @@ public long retrieveSizeFromMedia() {
                 String responseString = EntityUtils.toString(response.getEntity());
                 logger.info("Response from " + get.getURI().toString() + " is: " + responseString);
                 JsonObject responseJson = JsonUtil.getJsonObject(responseString);
-                return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size");
+                JsonArray dataArray = responseJson.getJsonArray("DATA");
+                if (dataArray != null && dataArray.size() != 0) {
+                    //File found
+                    return (long) responseJson.getJsonArray("DATA").getJsonObject(0).getInt("size");
+                }
             } else {
                 logger.warning("Response from " + get.getURI().toString() + " was "
                         + response.getStatusLine().getStatusCode());

From c33f07aad938f4707e6985ddeeec801969e4a3fc Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Sat, 11 Nov 2023 14:38:00 -0500
Subject: [PATCH 156/546] Add logic to leave settings as found before test

---
 .../edu/harvard/iq/dataverse/api/ProvIT.java  | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
index 3bfa3d72fbd..6b9b59f431d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
@@ -30,7 +30,12 @@ public static void setUpClass() {
     @Test
     public void testFreeformDraftActions() {
 
-        UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        boolean provEnabled = provCollectionStatus.getStatusCode() == 200;
+        if(!provEnabled){
+            UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        }
+  
         Response createDepositor = UtilIT.createRandomUser();
         createDepositor.prettyPrint();
         createDepositor.then().assertThat()
@@ -85,15 +90,20 @@ public void testFreeformDraftActions() {
         datasetVersions.prettyPrint();
         datasetVersions.then().assertThat()
                 .body("data[0].versionState", equalTo("DRAFT"));
-        
-        UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
-        
+        if(!provEnabled){
+            UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        }
+               
     }
     
     @Test
     public void testAddProvFile() {
 
-        UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        boolean provEnabled = provCollectionStatus.getStatusCode() == 200;
+        if(!provEnabled){
+            UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        }
 
         Response createDepositor = UtilIT.createRandomUser();
         createDepositor.prettyPrint();
@@ -213,12 +223,8 @@ public void testAddProvFile() {
         deleteProvJson.then().assertThat()
                 .statusCode(FORBIDDEN.getStatusCode()); //cannot delete json of a published dataset
 
-        UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
-// Command removed, redundant        
-//        Response deleteProvFreeForm = UtilIT.deleteProvFreeForm(dataFileId.toString(), apiTokenForDepositor);
-//        deleteProvFreeForm.prettyPrint();
-//        deleteProvFreeForm.then().assertThat()
-//                .statusCode(OK.getStatusCode());
-        
+        if(!provEnabled){
+            UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        }
     }
 }

From 6beafcef4855c2a35cfe6d61408a5625a285885e Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Sat, 11 Nov 2023 22:09:22 -0500
Subject: [PATCH 157/546] Change format to MD of the QA guide

---
 doc/sphinx-guides/source/index.rst            |  2 +-
 doc/sphinx-guides/source/qa/conclusion.md     | 11 --------
 doc/sphinx-guides/source/qa/index.md          | 10 +++++++
 doc/sphinx-guides/source/qa/index.rst         | 14 ----------
 .../{manual-testing.rst => manual-testing.md} | 27 +++++++++----------
 ...her-approaches.rst => other-approaches.md} | 24 ++++++++---------
 .../source/qa/{overview.rst => overview.md}   | 23 ++++++++--------
 ...ormance-tests.rst => performance-tests.md} | 21 ++++++++-------
 ...ion.rst => test-automation-integration.md} | 24 ++++++++---------
 ...tructure.rst => testing-infrastructure.md} | 15 +++++------
 10 files changed, 77 insertions(+), 94 deletions(-)
 delete mode 100644 doc/sphinx-guides/source/qa/conclusion.md
 create mode 100644 doc/sphinx-guides/source/qa/index.md
 delete mode 100755 doc/sphinx-guides/source/qa/index.rst
 rename doc/sphinx-guides/source/qa/{manual-testing.rst => manual-testing.md} (92%)
 rename doc/sphinx-guides/source/qa/{other-approaches.rst => other-approaches.md} (95%)
 rename doc/sphinx-guides/source/qa/{overview.rst => overview.md} (95%)
 rename doc/sphinx-guides/source/qa/{performance-tests.rst => performance-tests.md} (91%)
 rename doc/sphinx-guides/source/qa/{test-automation-integration.rst => test-automation-integration.md} (78%)
 rename doc/sphinx-guides/source/qa/{testing-infrastructure.rst => testing-infrastructure.md} (82%)

diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst
index 9d3d49ef4f2..3184160b387 100755
--- a/doc/sphinx-guides/source/index.rst
+++ b/doc/sphinx-guides/source/index.rst
@@ -20,7 +20,7 @@ These documentation guides are for the |version| version of Dataverse. To find g
   developers/index
   container/index
   style/index
-  qa/index
+  qa/index.md
 
 How the Guides Are Organized
 ----------------------------
diff --git a/doc/sphinx-guides/source/qa/conclusion.md b/doc/sphinx-guides/source/qa/conclusion.md
deleted file mode 100644
index 233dc3cdf3d..00000000000
--- a/doc/sphinx-guides/source/qa/conclusion.md
+++ /dev/null
@@ -1,11 +0,0 @@
-Conclusion
-==========
-
-QA is awesome. Do you know what else is awesome? Markdown.
-
-It's easy to create a [link](https://dataverse.org), for example, and nested bullets don't need extra indentation:
-
-- foo
-  - one
-  - two
-- bar
diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md
new file mode 100644
index 00000000000..c190d823bef
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/index.md
@@ -0,0 +1,10 @@
+# QA Guide
+
+```{toctree}
+overview.md
+testing-infrastructure.md
+performance-tests.md
+manual-testing.md
+test-automation-integration.md
+other-approaches.md
+```
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/index.rst b/doc/sphinx-guides/source/qa/index.rst
deleted file mode 100755
index dd8c046fddc..00000000000
--- a/doc/sphinx-guides/source/qa/index.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-QA Guide
-========
-
-**Contents:**
-
-.. toctree::
-
-   overview
-   testing-infrastructure
-   performance-tests
-   manual-testing
-   test-automation-integration
-   other-approaches
-   conclusion
diff --git a/doc/sphinx-guides/source/qa/manual-testing.rst b/doc/sphinx-guides/source/qa/manual-testing.md
similarity index 92%
rename from doc/sphinx-guides/source/qa/manual-testing.rst
rename to doc/sphinx-guides/source/qa/manual-testing.md
index 8e50e6b6b08..bf6f16f7911 100644
--- a/doc/sphinx-guides/source/qa/manual-testing.rst
+++ b/doc/sphinx-guides/source/qa/manual-testing.md
@@ -1,23 +1,22 @@
-Manual Testing Approach
-=======================
+# Manual Testing Approach
 
-.. contents:: |toctitle|
-    :local:
+```{contents}
+:depth: 3
+```
+## Introduction
 
-Introduction
-------------
 We use a risk-based, manual testing approach to achieve the most benefit with limited resources. This means we want to catch bugs where they are likely to exist, ensure core functions work, and failures do not have catastrophic results. In practice this means we do a brief positive check of core functions on each build called a smoke test, we test the most likely place for new bugs to exist, the area where things have changed, and attempt to prevent catastrophic failure by asking about the scope and reach of the code and how failures may occur. 
 
 If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by the Design group and by the community.
 
-Examining a Pull Pequest for Test Cases:
-----------------------------------------
-What Problem Does it Solve?
-++++++++++++++++++++++++++++++++++++++++++++
+## Examining a Pull Pequest for Test Cases:
+
+### What Problem Does it Solve?
+
 Read the top part of the pull request for a description, notes for reviewers, and usually a how-to test section. Does it make sense? If not, read the underlying ticket it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it.
 
-How is it Configured?
-+++++++++++++++++++++
+### How is it Configured?
+ 
 Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. An admin will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! 
 
 Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields are added, try the export feature. Of course, try the suggestions under how to test. Those may be sufficient, but you should always think about it based on what it does.
@@ -32,8 +31,8 @@ Check permissions. Is this feature limited to a specific set of users? Can it be
 
 Think about risk. Is the feature or function part of a critical area such as permissions? Does the functionality modify data? You may do more testing when the risk is higher.
 
-Smoke Test
------------
+## Smoke Test
+
 
 1.	Go to the homepage on https://dataverse-internal.iq.harvard.edu. Scroll to the bottom to ensure the build number is the one you intend to test from Jenkins.
 2.	Create a new user: I use a formulaic name with my initials and date and make the username and password the same, eg. kc080622.
diff --git a/doc/sphinx-guides/source/qa/other-approaches.rst b/doc/sphinx-guides/source/qa/other-approaches.md
similarity index 95%
rename from doc/sphinx-guides/source/qa/other-approaches.rst
rename to doc/sphinx-guides/source/qa/other-approaches.md
index bd92e7d22d8..b50d9d0cf11 100644
--- a/doc/sphinx-guides/source/qa/other-approaches.rst
+++ b/doc/sphinx-guides/source/qa/other-approaches.md
@@ -1,13 +1,13 @@
-Other approaches to deploying and testing
-=========================================
+# Other approaches to deploying and testing
 
-.. contents:: |toctitle|
-    :local:
+```{contents}
+:depth: 3
+```
 
 This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. I’m assuming if a developer is testing, they would likely just deploy to their dev environment. That might be ok but not sure the env is fully configured enough to offer a real-world testing scenario. An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built.
 
-Tips and tricks
----------------
+## Tips and tricks
+
 
 -	Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (I have one on Google Drive). This potentially will help with future testing.
 -	When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice.
@@ -17,8 +17,8 @@ Tips and tricks
 -	When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it.
 -	Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup.
 
-Workflow for Completing QA on a PR
------------------------------------
+## Workflow for Completing QA on a PR
+
 
 1.	Assign the PR you are working on to yourself.
 
@@ -106,8 +106,8 @@ Workflow for Completing QA on a PR
     Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions.
 
 
-Checklist for Completing QA on a PR
-------------------------------------
+## Checklist for Completing QA on a PR
+
 
 1. Build the docs 
 2. Smoke test the pr 
@@ -115,8 +115,8 @@ Checklist for Completing QA on a PR
 4. Regression test 
 5. Test any upgrade instructions
 
-Checklist for QA on Release
----------------------------
+## Checklist for QA on Release
+
 
 1.	Review Consolidated Release Notes, in particular upgrade instructions.
 2.	Conduct performance testing and compare with the previous release.
diff --git a/doc/sphinx-guides/source/qa/overview.rst b/doc/sphinx-guides/source/qa/overview.md
similarity index 95%
rename from doc/sphinx-guides/source/qa/overview.rst
rename to doc/sphinx-guides/source/qa/overview.md
index 153fab1a28f..51b38ee0921 100644
--- a/doc/sphinx-guides/source/qa/overview.rst
+++ b/doc/sphinx-guides/source/qa/overview.md
@@ -1,26 +1,25 @@
-Overview
-========
+# Overview
 
-.. contents:: |toctitle|
-    :local:
+```{contents}
+:depth: 3
+```
 
+## Introduction
 
-Introduction
-------------
 This document describes the testing process used by QA at IQSS and provides a guide for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community.
 
-Workflow
---------
+## Workflow
+
 The basic workflow is bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub Kanban board. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common develop branch and ultimately released as part of the product. Before a pull request is merged it must be reviewed by a member of the development team from a coding perspective, it must pass automated integration tests before moving to QA. There it is tested manually, exercising the UI using three common browser types and any business logic it implements.  Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions are tested. Once this passes and any bugs that are found are corrected, the automated integration tests are confirmed to be passing, the PR is merged into development, the PR is closed, and the branch is deleted. At this point, the pr moves from the QA column automatically into the Done column and the process repeats with the next pr until it is decided to make a release.
 
-Release Cadence and Sprints
----------------------------
+## Release Cadence and Sprints
+
 A release likely spans multiple two-week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time.
 
 The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub.
 
-Performance Testing and Deployment
-----------------------------------
+## Performance Testing and Deployment
+
 The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named Locust, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds (I believe), it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
 
 Once the performance has been tested and recorded in a Google spreadsheet for this proposed version, the release will be prepared and posted.
diff --git a/doc/sphinx-guides/source/qa/performance-tests.rst b/doc/sphinx-guides/source/qa/performance-tests.md
similarity index 91%
rename from doc/sphinx-guides/source/qa/performance-tests.rst
rename to doc/sphinx-guides/source/qa/performance-tests.md
index 1bfde798100..7075d7f1776 100644
--- a/doc/sphinx-guides/source/qa/performance-tests.rst
+++ b/doc/sphinx-guides/source/qa/performance-tests.md
@@ -1,21 +1,22 @@
-Performance Testing
-===================
+# Performance Testing
 
-.. contents:: |toctitle|
-    :local:
+```{contents}
+:depth: 3
+```
+
+## Introduction
 
-Introduction
-------------
 To run performance tests, we have a performance test cluster on AWS that employs web, database, and Solr. The database contains a copy of production that is updated weekly on Sundays. To ensure the homepage content is consistent between test runs across releases, two scripts set the datasets that will appear on the homepage. There is a script on the web server in the default CentOS user dir and one on the database server in the default CentOS user dir. Run these scripts before conducting the tests. 
 
-Access
-------
+## Access
+
 Access to performance cluster instances requires ssh keys, see Leonid. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. 
 
-Special Notes ⚠️
------------------
+## Special Notes ⚠️ 
+
 Please note the performance database is also used occasionally by Julian and the Curation team to generate prod reports so a courtesy check with Julian would be good before taking over the env.
 
+
 Executing the Performance Script
 --------------------------------
 To execute the performance test script, you need to install a local copy of the database-helper-scripts project (https://github.com/IQSS/dataverse-helper-scripts), written by Raman. I have since produced a stripped-down script that calls just the DB and ds and works with python3. 
diff --git a/doc/sphinx-guides/source/qa/test-automation-integration.rst b/doc/sphinx-guides/source/qa/test-automation-integration.md
similarity index 78%
rename from doc/sphinx-guides/source/qa/test-automation-integration.rst
rename to doc/sphinx-guides/source/qa/test-automation-integration.md
index 13c48105f91..5e9d00cd461 100644
--- a/doc/sphinx-guides/source/qa/test-automation-integration.rst
+++ b/doc/sphinx-guides/source/qa/test-automation-integration.md
@@ -1,15 +1,15 @@
-Test automation and integration test
-====================================
+# Test automation and integration test
 
-.. contents:: |toctitle|
-    :local:
+```{contents}
+:depth: 3
+```
 
 This test suite is added to and maintained by development. It is generally advisable for code contributors to add integration tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base’s code paths as possible, every time to catch bugs. 
 
 This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency.
 
-Building and Deploying a Pull Request from Jenkins to Dataverse-Internal:
--------------------------------------------------------------------------
+## Building and Deploying a Pull Request from Jenkins to Dataverse-Internal:
+
 
 1.	Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. 
 
@@ -17,12 +17,12 @@ Building and Deploying a Pull Request from Jenkins to Dataverse-Internal:
 
 3.	Log on to jenkins.dataverse.org, select the IQSS_Dataverse_Internal project, and configure the repository URL and branch specifier to match the ones from the pull request. For example:
 
-    - 8372-gdcc-xoai-library has IQSS implied
-        | **Repository URL:** https://github.com/IQSS/dataverse.git 
-        | **Branch specifier:** \*/8372-gdcc-xoai-library
-    - GlobalDataverseCommunityConsortium:GDCC/DC-3B
-        | **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git 
-        | **Branch specifier:** \*/GDCC/DC-3B. 
+    * 8372-gdcc-xoai-library has IQSS implied
+        - **Repository URL:** https://github.com/IQSS/dataverse.git 
+        - **Branch specifier:** */8372-gdcc-xoai-library
+    * GlobalDataverseCommunityConsortium:GDCC/DC-3B
+        - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git 
+        - **Branch specifier:** */GDCC/DC-3B. 
 
 4.	Click Build Now and note the build number in progress.
 
diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.rst b/doc/sphinx-guides/source/qa/testing-infrastructure.md
similarity index 82%
rename from doc/sphinx-guides/source/qa/testing-infrastructure.rst
rename to doc/sphinx-guides/source/qa/testing-infrastructure.md
index d35bc6e9a23..fb66bc4d099 100644
--- a/doc/sphinx-guides/source/qa/testing-infrastructure.rst
+++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md
@@ -1,16 +1,15 @@
-Infrastructure for Testing
-==========================
+# Infrastructure for Testing
 
-.. contents:: |toctitle|
-    :local:
+```{contents}
+:depth: 3
+```
 
+## Dataverse Internal
 
-Dataverse Internal
--------------------
 To build and test a PR, we use a build named IQSS_Dataverse_Internal on jenkins.dataverse.org, which deploys the .war file to an AWS instance named dataverse-internal.iq.harvard.edu.
 Login to Jenkins requires a username and password. Check with Don Sizemore. Login to the dataverse-internal server requires a key, see Leonid. 
 
-Guides Server
--------------
+## Guides Server
+
 There is also a guides build project named guides.dataverse.org. Any test builds of guides are deployed to a named directory** on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. 
 Login to the guides server requires a key, see Don Sizemore.  

From 3407fb9f813984c857ef7708af7d6dc239b8f8ee Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 13 Nov 2023 07:04:15 -0500
Subject: [PATCH 158/546] Add ProvIT to integration-tests.txt

---
 tests/integration-tests.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt
index 18911b3164a..bb3bc7f9ce6 100644
--- a/tests/integration-tests.txt
+++ b/tests/integration-tests.txt
@@ -1 +1 @@
-DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT
+DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,ProvIT

From 2842cdaf246c531b04449ac4c8b20fc4a09c2668 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Mon, 13 Nov 2023 08:42:31 -0500
Subject: [PATCH 159/546] Move this change into BeforeAll/AfterAll

---
 .../edu/harvard/iq/dataverse/api/ProvIT.java  | 37 ++++++++++---------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
index 6b9b59f431d..69a87869fe1 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
@@ -11,6 +11,9 @@
 import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
 import static jakarta.ws.rs.core.Response.Status.FORBIDDEN;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.AfterAll;
+
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.notNullValue;
 
@@ -20,22 +23,24 @@
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 
 public class ProvIT {
+
+    private static boolean provEnabled = false;
     
     @BeforeAll
-    public static void setUpClass() {
+    public static void setUpClass() {        
         RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
+        Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        
+        provEnabled = provCollectionStatus.getStatusCode() == 200;
+        if(!provEnabled){
+            UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
+        }
     }
 
     
     @Test
     public void testFreeformDraftActions() {
 
-        Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
-        boolean provEnabled = provCollectionStatus.getStatusCode() == 200;
-        if(!provEnabled){
-            UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
-        }
-  
         Response createDepositor = UtilIT.createRandomUser();
         createDepositor.prettyPrint();
         createDepositor.then().assertThat()
@@ -90,20 +95,11 @@ public void testFreeformDraftActions() {
         datasetVersions.prettyPrint();
         datasetVersions.then().assertThat()
                 .body("data[0].versionState", equalTo("DRAFT"));
-        if(!provEnabled){
-            UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
-        }
-               
+     
     }
     
     @Test
-    public void testAddProvFile() {
-
-        Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
-        boolean provEnabled = provCollectionStatus.getStatusCode() == 200;
-        if(!provEnabled){
-            UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
-        }
+    public void testAddProvFile() {        
 
         Response createDepositor = UtilIT.createRandomUser();
         createDepositor.prettyPrint();
@@ -223,6 +219,11 @@ public void testAddProvFile() {
         deleteProvJson.then().assertThat()
                 .statusCode(FORBIDDEN.getStatusCode()); //cannot delete json of a published dataset
 
+        
+    }
+
+    @AfterAll
+    public static void tearDownClass() {
         if(!provEnabled){
             UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
         }

From 437e7ccd480dbae405238faffb9fff8a8317218d Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 13 Nov 2023 09:56:16 -0500
Subject: [PATCH 160/546] #9464 remove unused import

---
 src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
index fabb33e328a..557b7df202b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
@@ -128,7 +128,6 @@
 import java.util.Optional;
 import java.util.stream.Collectors;
 import jakarta.servlet.http.HttpServletResponse;
-import jakarta.validation.constraints.NotNull;
 import jakarta.ws.rs.WebApplicationException;
 import jakarta.ws.rs.core.Context;
 import jakarta.ws.rs.core.StreamingOutput;

From d029cacc9aae5e361869b73f7e76661c5ab8d549 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 13 Nov 2023 11:35:28 -0500
Subject: [PATCH 161/546] remove extra whitespace #10112

---
 src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
index 69a87869fe1..a944c6aa926 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
@@ -27,12 +27,12 @@ public class ProvIT {
     private static boolean provEnabled = false;
     
     @BeforeAll
-    public static void setUpClass() {        
+    public static void setUpClass() {
         RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
         Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
-        
+
         provEnabled = provCollectionStatus.getStatusCode() == 200;
-        if(!provEnabled){
+        if (!provEnabled) {
             UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled);
         }
     }
@@ -99,7 +99,7 @@ public void testFreeformDraftActions() {
     }
     
     @Test
-    public void testAddProvFile() {        
+    public void testAddProvFile() {
 
         Response createDepositor = UtilIT.createRandomUser();
         createDepositor.prettyPrint();

From c09034d638147c5cd618e5ff4a460e1840b8cd0a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 13 Nov 2023 11:37:16 -0500
Subject: [PATCH 162/546] organize imports #10112

---
 .../java/edu/harvard/iq/dataverse/api/ProvIT.java  | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
index a944c6aa926..33323ff4239 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java
@@ -1,27 +1,23 @@
 package edu.harvard.iq.dataverse.api;
 
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import io.restassured.RestAssured;
 import io.restassured.path.json.JsonPath;
 import io.restassured.response.Response;
 import jakarta.json.Json;
 import jakarta.json.JsonArray;
 import jakarta.json.JsonObject;
-import static jakarta.ws.rs.core.Response.Status.CREATED;
-import static jakarta.ws.rs.core.Response.Status.OK;
 import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
+import static jakarta.ws.rs.core.Response.Status.CREATED;
 import static jakarta.ws.rs.core.Response.Status.FORBIDDEN;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.junit.jupiter.api.AfterAll;
-
+import static jakarta.ws.rs.core.Response.Status.OK;
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.notNullValue;
-
+import org.junit.jupiter.api.AfterAll;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
-import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
-
 public class ProvIT {
 
     private static boolean provEnabled = false;

From a3d323599be4bcc6ad688a8b99135bd4447fbb02 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 13 Nov 2023 16:07:53 -0500
Subject: [PATCH 163/546] various improvements to the QA Guide #10101

---
 doc/sphinx-guides/source/developers/intro.rst |  2 +
 .../source/developers/testing.rst             |  4 +
 .../source/developers/version-control.rst     |  2 +
 doc/sphinx-guides/source/qa/index.md          |  4 +-
 doc/sphinx-guides/source/qa/manual-testing.md | 31 +++----
 .../source/qa/other-approaches.md             | 91 +++++++++----------
 doc/sphinx-guides/source/qa/overview.md       | 15 ++-
 .../source/qa/performance-tests.md            |  6 +-
 .../source/qa/test-automation-integration.md  | 35 -------
 .../source/qa/test-automation.md              | 35 +++++++
 .../source/qa/testing-infrastructure.md       | 12 ++-
 11 files changed, 119 insertions(+), 118 deletions(-)
 delete mode 100644 doc/sphinx-guides/source/qa/test-automation-integration.md
 create mode 100644 doc/sphinx-guides/source/qa/test-automation.md

diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst
index a01a8066897..3eddfbe8d2d 100755
--- a/doc/sphinx-guides/source/developers/intro.rst
+++ b/doc/sphinx-guides/source/developers/intro.rst
@@ -37,6 +37,8 @@ Roadmap
 
 For the Dataverse Software development roadmap, please see https://www.iq.harvard.edu/roadmap-dataverse-project
 
+.. _kanban-board:
+
 Kanban Board
 ------------
 
diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst
index abecaa09fad..57733f25406 100755
--- a/doc/sphinx-guides/source/developers/testing.rst
+++ b/doc/sphinx-guides/source/developers/testing.rst
@@ -426,6 +426,10 @@ target/coverage-it/index.html is the place to start reading the code coverage re
 Load/Performance Testing
 ------------------------
 
+See also :doc:`/qa/performance-tests` in the QA Guide.
+
+.. _locust:
+
 Locust
 ~~~~~~
 
diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst
index 31fc0a4e602..f46411ebd7f 100644
--- a/doc/sphinx-guides/source/developers/version-control.rst
+++ b/doc/sphinx-guides/source/developers/version-control.rst
@@ -34,6 +34,8 @@ The "master" Branch
 
 The "`master <https://github.com/IQSS/dataverse/tree/master>`_" branch represents released versions of the Dataverse Software. As mentioned in the :doc:`making-releases` section, at release time we update the master branch to include all the code for that release. Commits are never made directly to master. Rather, master is updated only when we merge code into it from the "develop" branch.
 
+.. _develop-branch:
+
 The "develop" Branch
 ********************
 
diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md
index c190d823bef..08deb7ee27d 100644
--- a/doc/sphinx-guides/source/qa/index.md
+++ b/doc/sphinx-guides/source/qa/index.md
@@ -5,6 +5,6 @@ overview.md
 testing-infrastructure.md
 performance-tests.md
 manual-testing.md
-test-automation-integration.md
+test-automation.md
 other-approaches.md
-```
\ No newline at end of file
+```
diff --git a/doc/sphinx-guides/source/qa/manual-testing.md b/doc/sphinx-guides/source/qa/manual-testing.md
index bf6f16f7911..9f365aae59f 100644
--- a/doc/sphinx-guides/source/qa/manual-testing.md
+++ b/doc/sphinx-guides/source/qa/manual-testing.md
@@ -9,23 +9,23 @@ We use a risk-based, manual testing approach to achieve the most benefit with li
 
 If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by the Design group and by the community.
 
-## Examining a Pull Pequest for Test Cases:
+## Examining a Pull Request for Test Cases
 
-### What Problem Does it Solve?
+### What Problem Does It Solve?
 
-Read the top part of the pull request for a description, notes for reviewers, and usually a how-to test section. Does it make sense? If not, read the underlying ticket it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it.
+Read the top part of the pull request for a description, notes for reviewers, and usually a "how to test" section. Does it make sense? If not, read the underlying issue it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it.
 
-### How is it Configured?
+### How is It Configured?
  
-Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. An admin will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! 
+Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. A sysadmin or superuser will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! 
 
-Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields are added, try the export feature. Of course, try the suggestions under how to test. Those may be sufficient, but you should always think about it based on what it does.
+Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields are added, try the export feature. Of course, try the suggestions under "how to test." Those may be sufficient, but you should always think about the pull request based on what it does.
 
 Try adding, modifying, and deleting any objects involved. This is probably covered by using the feature but a good basic approach to keep in mind.
 
-Make sure any server logging is appropriate. You should tail the server log while running your tests. Watch for unreported errors or stack traces especially chatty logging. If you do find a bug you will need to report the stack trace from the server.log
+Make sure any server logging is appropriate. You should tail the server log while running your tests. Watch for unreported errors or stack traces especially chatty logging. If you do find a bug you will need to report the stack trace from the server.log. Err on the side of providing the developer too much of server.log rather than too little.
 
-Exercise the UI if there is one. I tend to use Chrome for most of my basic testing as it’s used twice as much as the next most commonly used browser, according to our site’s Google Analytics. I first go through all the options in the UI. Then, if all works, I’ll spot-check using Firefox and Safari.
+Exercise the UI if there is one. We tend to use Chrome for most of my basic testing as it's used twice as much as the next most commonly used browser, according to our site's Google Analytics. First go through all the options in the UI. Then, if all works, spot-check using Firefox and Safari.
 
 Check permissions. Is this feature limited to a specific set of users? Can it be accessed by a guest or by a non-privileged user? How about pasting a privileged page URL into a non-privileged user’s browser?
 
@@ -33,11 +33,10 @@ Think about risk. Is the feature or function part of a critical area such as per
 
 ## Smoke Test
 
-
-1.	Go to the homepage on https://dataverse-internal.iq.harvard.edu. Scroll to the bottom to ensure the build number is the one you intend to test from Jenkins.
-2.	Create a new user: I use a formulaic name with my initials and date and make the username and password the same, eg. kc080622.
-3.	Create a dataverse: I use the same username
-4.	Create a dataset: I use the same username; I fill in the required fields (I do not use a template).
-5.	Upload 3 different types of files: I use a tabular file, 50by1000.dta, an image file, and a text file.
-6.	Publish the dataset.
-7.	Download a file.
+1. Go to the homepage on <https://dataverse-internal.iq.harvard.edu>. Scroll to the bottom to ensure the build number is the one you intend to test from Jenkins.
+1. Create a new user: It's fine to use a formulaic name with your initials and date and make the username and password the same, eg. kc080622.
+1. Create a dataverse: You can use the same username.
+1. Create a dataset: You can use the same username; fill in the required fields (do not use a template).
+1. Upload 3 different types of files: You can use a tabular file, 50by1000.dta, an image file, and a text file.
+1. Publish the dataset.
+1. Download a file.
diff --git a/doc/sphinx-guides/source/qa/other-approaches.md b/doc/sphinx-guides/source/qa/other-approaches.md
index b50d9d0cf11..cf679c3f442 100644
--- a/doc/sphinx-guides/source/qa/other-approaches.md
+++ b/doc/sphinx-guides/source/qa/other-approaches.md
@@ -1,125 +1,120 @@
-# Other approaches to deploying and testing
+# Other Approaches to Deploying and Testing
 
 ```{contents}
 :depth: 3
 ```
 
-This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. I’m assuming if a developer is testing, they would likely just deploy to their dev environment. That might be ok but not sure the env is fully configured enough to offer a real-world testing scenario. An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built.
+This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built.
 
-## Tips and tricks
+## Tips and Tricks
 
-
--	Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (I have one on Google Drive). This potentially will help with future testing.
--	When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice.
--	Always tail the server.log file while testing. Open a terminal window to the test instance and tail -F server.log. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure.
--	When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done.
--	When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked.
--	When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it.
--	Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup.
+- Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing.
+- When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice.
+- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure.
+- When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done.
+- When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked.
+- When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it.
+- Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup.
 
 ## Workflow for Completing QA on a PR
 
+1. Assign the PR you are working on to yourself.
 
-1.	Assign the PR you are working on to yourself.
-
-2.	What does it do?
+1. What does it do?
 
     Read the description at the top of the PR, any release notes, documentation, and the original issue.
 
-3.	Does it address the issue it closes? 
+1. Does it address the issue it closes? 
 
     The PR should address the issue entirely unless otherwise noted.
 
-4.	How do you test it?
+1. How do you test it?
     
-    Look at the “how to test section” at the top of the pull request. Does it make sense? This likely won’t be the only testing you perform. You can develop further tests from the original issue or problem description, from the description of functionality, the documentation, configuration, and release notes. Also consider trying to reveal bugs by trying to break it: try bad or missing data, very large values or volume of data, exceed any place that may have a limit or boundary.
+    Look at the “how to test" section at the top of the pull request. Does it make sense? This likely won’t be the only testing you perform. You can develop further tests from the original issue or problem description, from the description of functionality, the documentation, configuration, and release notes. Also consider trying to reveal bugs by trying to break it: try bad or missing data, very large values or volume of data, exceed any place that may have a limit or boundary.
 
-5.	Does it have or need documentation?
+1. Does it have or need documentation?
 
-    Small changes or fixes usually don’t have doc but new features or extensions of a feature or new configuration options should have documentation.
+    Small changes or fixes usually don’t have docs but new features or extensions of a feature or new configuration options should have documentation.
 
-6.	Does it have or need release notes?
+1. Does it have or need release notes?
 
     Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed.
 
-7.	Does it use a DB, flyway script?
+1. Does it use a DB, Flyway script?
     
     Good to know since it may collide with another existing one by version or it could be a one way transform of your DB so back up your test DB before. Also, happens during deployment so be on the lookout for any issues.
 
-8.	Validate the documentation.
+1. Validate the documentation.
 
     Build the doc using Jenkins, does it build without errors?
     Read it through for sense.
     Use it for test cases and to understand the feature.
 
-9.	Build and deploy the pull request.
+1. Build and deploy the pull request.
 
     Normally this is done using Jenkins and automatically deployed to the QA test machine.
 
-10.	Configure if required
+1. Configure if required
 
     If needed to operate and everyone installing or upgrading will use this, configure now as all testing will use it.
 
-11.	Smoke test the branch.
+1. Smoke test the branch.
     
     Standard, minimal test of core functionality.
 
-12.	Regression test-related or potentially affected features
+1. Regression test-related or potentially affected features
 
     If config is optional and testing without config turned on, do some spot checks/ regression tests of related or potentially affected areas. 
 
-13.	Configure if optional
+1. Configure if optional
 
     What is the default, enabled or disabled? Is that clearly indicated? Test both.
     By config here we mean enabling the functionality versus choosing a particular config option. Some complex features have config options in addition to enabling. Those will also need to be tested.
 
-14.	Test all the new or changed functionality.
+1. Test all the new or changed functionality.
 
     The heart of the PR, what is this PR adding or fixing? Is it all there and working?
 
-15.	Regression test related or potentially affected features.
+1. Regression test related or potentially affected features.
     
-    Sometimes new stuff modifies and extends other functionality or functionality that is shared with other aspects of the system, e.g. Export, Import. Check the underlying functionality that was also modified but in a spot check or briefer manner.
+    Sometimes new stuff modifies and extends other functionality or functionality that is shared with other aspects of the system, e.g. export, import. Check the underlying functionality that was also modified but in a spot check or briefer manner.
 
-16.	Report any issues found within the PR
+1. Report any issues found within the PR
 
     It can be easy to lose track of what you’ve found, steps to reproduce, and any errors or stack traces from the server log. Add these in a numbered list to a comment in the pr. Easier to check off when fixed and to work on. Add large amounts of text as in the server log as attached, meaningfully named files.
 
-17.	Retest all fixes, spot check feature functionality, smoke test
+1. Retest all fixes, spot check feature functionality, smoke test
     
     Similar to your initial testing, it is only narrower.
 
-18.	Test Upgrade Instructions, if required
+1. Test upgrade instructions, if required
 
     Some features build upon the existing architecture but require modifications, such as adding a new column to the DB or changing or adding data. It is crucial that this works properly for our 100+ installations. This testing should be performed at the least on the prior version with basic data objects (collection, dataset, files) and any other data that will be updated by this feature. Using the sample data from the prior version would be good or deploying to dataverse-internal and upgrading there would be a good test. Remember to back up your DB before doing a transformative upgrade so that you can repeat it later if you find a bug.
 
-19.	Make sure the integration tests in the PR have been completed and passed.
-    
+1. Make sure the API tests in the PR have been completed and passed.
+   
     They are run with each commit to the PR and take approximately 42 minutes to run.
 
-20.	Merge PR
+1. Merge PR
     
     Click merge to include this PR into the common develop branch.
 
-21.	Delete merged branch
+1. Delete merged branch
     
     Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions.
 
 
 ## Checklist for Completing QA on a PR
 
-
 1. Build the docs 
-2. Smoke test the pr 
-3. Test the new functionality
-4. Regression test 
-5. Test any upgrade instructions
+1. Smoke test the pr 
+1. Test the new functionality
+1. Regression test 
+1. Test any upgrade instructions
 
 ## Checklist for QA on Release
 
-
-1.	Review Consolidated Release Notes, in particular upgrade instructions.
-2.	Conduct performance testing and compare with the previous release.
-3.	Perform clean install and smoke test.
-4.	Potentially follow upgrade instructions. Though they have been performed incrementally for each PR, the sequence may need checking
-
+1. Review Consolidated Release Notes, in particular upgrade instructions.
+1. Conduct performance testing and compare with the previous release.
+1. Perform clean install and smoke test.
+1. Potentially follow upgrade instructions. Though they have been performed incrementally for each PR, the sequence may need checking
diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md
index 51b38ee0921..d3364fbbbf9 100644
--- a/doc/sphinx-guides/source/qa/overview.md
+++ b/doc/sphinx-guides/source/qa/overview.md
@@ -6,11 +6,11 @@
 
 ## Introduction
 
-This document describes the testing process used by QA at IQSS and provides a guide for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community.
+This guide describes the testing process used by QA at IQSS and provides a reference for others filling in for that role. Please note that many variations are possible, and the main thing is to catch bugs and provide a good quality product to the user community.
 
 ## Workflow
 
-The basic workflow is bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub Kanban board. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common develop branch and ultimately released as part of the product. Before a pull request is merged it must be reviewed by a member of the development team from a coding perspective, it must pass automated integration tests before moving to QA. There it is tested manually, exercising the UI using three common browser types and any business logic it implements.  Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions are tested. Once this passes and any bugs that are found are corrected, the automated integration tests are confirmed to be passing, the PR is merged into development, the PR is closed, and the branch is deleted. At this point, the pr moves from the QA column automatically into the Done column and the process repeats with the next pr until it is decided to make a release.
+The basic workflow is as follows. Bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub {ref}`kanban-board`. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common {ref}`develop branch <develop-branch>` and ultimately released as part of the product. Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements.  Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Done column and the process repeats with the next PR until it is decided to {doc}`make a release </developers/making-releases>`.
 
 ## Release Cadence and Sprints
 
@@ -20,13 +20,10 @@ The decision to make a release can be based on the time since the last release,
 
 ## Performance Testing and Deployment
 
-The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named Locust, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds (I believe), it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
+The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named {ref}`Locust <locust>`, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds, it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product, and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
 
-Once the performance has been tested and recorded in a Google spreadsheet for this proposed version, the release will be prepared and posted.
+Once the performance has been tested and recorded in a [Google spreadsheet](https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit?usp=sharing) for this proposed version, the release will be prepared and posted.
 
-Preparing the release consists of writing and reviewing the release notes compiled from individual notes in PRs that have been merged for this release. A PR is made for the notes and merged. Next, increment the version numbers in certain code files, produce a PR with those changes, and merge that into the common development branch. Last, a PR is made to merge and develop into the master branch. Once that is merged a guide build with the new release version is made from the master branch. Last, a release war file is built from the master and an installer is built from the master branch and includes the newly built war file. 
-
-Publishing the release consists of creating a new draft release on GitHub, posting the release notes, uploading the .war file and the installer .zip file, and any ancillary files used to configure this release. The latest link for the guides should be updated on the guides server to point to the newest version. Once that is all in place, specify the version name and the master branch at the top of the GitHub draft release and publish. This will tag the master branch with the version number and make the release notes and files available to the public.
-
-Once released, post to Dataverse general about the release and when possible, deploy to demo and production.
+## Making a Release
 
+See {doc}`/developers/making-releases` in the Developer Guide.
diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md
index 7075d7f1776..a5981dcfbe9 100644
--- a/doc/sphinx-guides/source/qa/performance-tests.md
+++ b/doc/sphinx-guides/source/qa/performance-tests.md
@@ -10,7 +10,7 @@ To run performance tests, we have a performance test cluster on AWS that employs
 
 ## Access
 
-Access to performance cluster instances requires ssh keys, see Leonid. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. 
+Access to performance cluster instances requires ssh keys. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. 
 
 ## Special Notes ⚠️ 
 
@@ -19,6 +19,4 @@ Please note the performance database is also used occasionally by Julian and the
 
 Executing the Performance Script
 --------------------------------
-To execute the performance test script, you need to install a local copy of the database-helper-scripts project (https://github.com/IQSS/dataverse-helper-scripts), written by Raman. I have since produced a stripped-down script that calls just the DB and ds and works with python3. 
-
-The automated integration test runs happen on each commit to a PR on an AWS instance and should be reviewed to be passing before merging into development. Their status can be seen on the PR page near the bottom, above the merge button. See Don Sizemore or Phil for questions.
+To execute the performance test script, you need to install a local copy of the database-helper-scripts project at <https://github.com/IQSS/dataverse-helper-scripts>. We have since produced a stripped-down script that calls just the DB and ds and works with python3. 
diff --git a/doc/sphinx-guides/source/qa/test-automation-integration.md b/doc/sphinx-guides/source/qa/test-automation-integration.md
deleted file mode 100644
index 5e9d00cd461..00000000000
--- a/doc/sphinx-guides/source/qa/test-automation-integration.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Test automation and integration test
-
-```{contents}
-:depth: 3
-```
-
-This test suite is added to and maintained by development. It is generally advisable for code contributors to add integration tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base’s code paths as possible, every time to catch bugs. 
-
-This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency.
-
-## Building and Deploying a Pull Request from Jenkins to Dataverse-Internal:
-
-
-1.	Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. 
-
-2.	From the pull request page, click the copy icon next to the pull request branch name.
-
-3.	Log on to jenkins.dataverse.org, select the IQSS_Dataverse_Internal project, and configure the repository URL and branch specifier to match the ones from the pull request. For example:
-
-    * 8372-gdcc-xoai-library has IQSS implied
-        - **Repository URL:** https://github.com/IQSS/dataverse.git 
-        - **Branch specifier:** */8372-gdcc-xoai-library
-    * GlobalDataverseCommunityConsortium:GDCC/DC-3B
-        - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git 
-        - **Branch specifier:** */GDCC/DC-3B. 
-
-4.	Click Build Now and note the build number in progress.
-
-5.	Once complete, go to https://dataverse-internal.iq.harvard.edu and check that the deployment succeeded, and that the homepage displays the latest build number.
-
-6.	If for some reason it didn’t deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (su - dataverse, /usr/local/payara5/bin/asadmin list-applications,  /usr/local/payara5/bin/asadmin undeploy dataverse-5.11.1,  /usr/local/payara5/bin/asadmin deploy /tmp/dataverse-5.11.1.war)
-
-7.	If that didn’t work, you may have run into a flyway DB script collision error but that should be indicated by the server.log
-
-8.	Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to tail -F server.log in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting.
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md
new file mode 100644
index 00000000000..ba8e5296d47
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/test-automation.md
@@ -0,0 +1,35 @@
+# Test Automation
+
+```{contents}
+:depth: 3
+```
+
+The API test suite is added to and maintained by development. (See {doc}`/developers/testing` in the Developer Guide.) It is generally advisable for code contributors to add API tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base's code paths as possible, every time to catch bugs. 
+
+This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency.
+
+## Building and Deploying a Pull Request from Jenkins to Dataverse-Internal
+
+
+1. Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. 
+
+1. From the pull request page, click the copy icon next to the pull request branch name.
+
+1. Log on to <https://jenkins.dataverse.org>, select the `IQSS_Dataverse_Internal` project, and configure the repository URL and branch specifier to match the ones from the pull request. For example:
+
+    * 8372-gdcc-xoai-library has IQSS implied
+        - **Repository URL:** https://github.com/IQSS/dataverse.git 
+        - **Branch specifier:** */8372-gdcc-xoai-library
+    * GlobalDataverseCommunityConsortium:GDCC/DC-3B
+        - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git 
+        - **Branch specifier:** */GDCC/DC-3B. 
+
+1. Click "Build Now" and note the build number in progress.
+
+1. Once complete, go to <https://dataverse-internal.iq.harvard.edu> and check that the deployment succeeded, and that the homepage displays the latest build number.
+
+1. If for some reason it didn’t deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara5/bin/asadmin list-applications; /usr/local/payara5/bin/asadmin undeploy dataverse-5.11.1; /usr/local/payara5/bin/asadmin deploy /tmp/dataverse-5.11.1.war`)
+
+1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide.
+
+1.	Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting.
diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md
index fb66bc4d099..45b3b360ac7 100644
--- a/doc/sphinx-guides/source/qa/testing-infrastructure.md
+++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md
@@ -6,10 +6,14 @@
 
 ## Dataverse Internal
 
-To build and test a PR, we use a build named IQSS_Dataverse_Internal on jenkins.dataverse.org, which deploys the .war file to an AWS instance named dataverse-internal.iq.harvard.edu.
-Login to Jenkins requires a username and password. Check with Don Sizemore. Login to the dataverse-internal server requires a key, see Leonid. 
+To build and test a PR, we use a build named `IQSS_Dataverse_Internal` on <https://jenkins.dataverse.org>, which deploys the .war file to an AWS instance named <https://dataverse-internal.iq.harvard.edu>.
 
 ## Guides Server
 
-There is also a guides build project named guides.dataverse.org. Any test builds of guides are deployed to a named directory** on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. 
-Login to the guides server requires a key, see Don Sizemore.  
+There is also a guides build project named `guides.dataverse.org`. Any test builds of guides are deployed to a named directory on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. 
+
+Note that changes to guides can also be previewed on Read the Docs. In the pull request, look for a link like <https://dataverse-guide--10103.org.readthedocs.build/en/10103/qa/index.html>. This Read the Docs preview is also mentioned under also {doc}`/developers/documentation`.
+
+## Other Servers
+
+We can spin up additional AWS EC2 instances as needed. See {doc}`/developers/deployment` in the Developer Guide for the scripts we use.

From 7650eb308ed5cb8805981e77b252ceb2e3c760c2 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Mon, 13 Nov 2023 16:35:25 -0500
Subject: [PATCH 164/546] Removes the title from content and add label

---
 doc/sphinx-guides/source/qa/manual-testing.md         | 3 ++-
 doc/sphinx-guides/source/qa/other-approaches.md       | 3 ++-
 doc/sphinx-guides/source/qa/overview.md               | 3 ++-
 doc/sphinx-guides/source/qa/performance-tests.md      | 3 ++-
 doc/sphinx-guides/source/qa/test-automation.md        | 3 ++-
 doc/sphinx-guides/source/qa/testing-infrastructure.md | 3 ++-
 6 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/doc/sphinx-guides/source/qa/manual-testing.md b/doc/sphinx-guides/source/qa/manual-testing.md
index 9f365aae59f..580e5153394 100644
--- a/doc/sphinx-guides/source/qa/manual-testing.md
+++ b/doc/sphinx-guides/source/qa/manual-testing.md
@@ -1,6 +1,7 @@
 # Manual Testing Approach
 
-```{contents}
+```{contents} Contents:
+:local: 
 :depth: 3
 ```
 ## Introduction
diff --git a/doc/sphinx-guides/source/qa/other-approaches.md b/doc/sphinx-guides/source/qa/other-approaches.md
index cf679c3f442..2e2ef906191 100644
--- a/doc/sphinx-guides/source/qa/other-approaches.md
+++ b/doc/sphinx-guides/source/qa/other-approaches.md
@@ -1,6 +1,7 @@
 # Other Approaches to Deploying and Testing
 
-```{contents}
+```{contents} Contents:
+:local: 
 :depth: 3
 ```
 
diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md
index d3364fbbbf9..c4f66446ca3 100644
--- a/doc/sphinx-guides/source/qa/overview.md
+++ b/doc/sphinx-guides/source/qa/overview.md
@@ -1,6 +1,7 @@
 # Overview
 
-```{contents}
+```{contents} Contents:
+:local: 
 :depth: 3
 ```
 
diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md
index a5981dcfbe9..f433226d4ff 100644
--- a/doc/sphinx-guides/source/qa/performance-tests.md
+++ b/doc/sphinx-guides/source/qa/performance-tests.md
@@ -1,6 +1,7 @@
 # Performance Testing
 
-```{contents}
+```{contents} Contents:
+:local: 
 :depth: 3
 ```
 
diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md
index ba8e5296d47..c2b649df498 100644
--- a/doc/sphinx-guides/source/qa/test-automation.md
+++ b/doc/sphinx-guides/source/qa/test-automation.md
@@ -1,6 +1,7 @@
 # Test Automation
 
-```{contents}
+```{contents} Contents:
+:local: 
 :depth: 3
 ```
 
diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md
index 45b3b360ac7..7a4bda626fc 100644
--- a/doc/sphinx-guides/source/qa/testing-infrastructure.md
+++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md
@@ -1,6 +1,7 @@
 # Infrastructure for Testing
 
-```{contents}
+```{contents} Contents:
+:local: 
 :depth: 3
 ```
 

From 75789e0f94d36fce1270b0714bd5e516f356d8ee Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 13 Nov 2023 19:06:26 -0500
Subject: [PATCH 165/546] current state of the flyway script (work in
 progress/likely to change) #8549

---
 .../V6.0.0.3__8549-collection-quotas.sql      | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql

diff --git a/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql
new file mode 100644
index 00000000000..f74d9bebe30
--- /dev/null
+++ b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql
@@ -0,0 +1,70 @@
+-- Storage size column added:
+ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS storagesize BIGINT;
+
+-- (work in progress! the table structure may change/the column may be moved out into
+-- its own table. but the mechanics of the recursion are working)
+
+-- The somewhat convoluted queries below populate the storage sizes for the entire
+-- DvObject tree, fast. It IS possible, to do it all with one recursive PostgresQL
+-- query, that will crawl the tree from the leaves (DataFiles) up and add up the
+-- sizes for all the Datasets/Collections above. Unfortunately, that takes some hours
+-- on a database the size of the one at IQSS. So what we are doing instead is compute
+-- the total sizes of all the *directly* linked objects, with 3 linear queries. This
+-- will correctly calculate the sizes of all the Datasets (since they can only
+-- contain DataFiles, directly, without any extra hierarchy possible) and those
+-- Collections that only contain Datasets; but not the sizes of Collections that
+-- have sub-collections. To take any sub-collections into account we are then running
+-- a recursive query - but then we only need to run it on the tree of Collections only,
+-- which should make it manageably fast on any real life instance. 
+
+UPDATE dvobject SET storagesize=0;
+-- For datafiles, the storage size = main file size by default:
+-- (we are excluding any harvested files)
+UPDATE dvobject SET storagesize=COALESCE(f.filesize,0) FROM datafile f, dataset d WHERE f.id = dvobject.id AND dvobject.owner_id = d.id AND d.harvestingclient_id IS null;
+-- ... but for ingested tabular files the size of the saved original needs to be added, since
+-- those also take space:
+-- (should be safe to assume that there are no *harvested ingested* files)
+UPDATE dvobject SET storagesize=dvobject.storagesize + COALESCE(datatable.originalFileSize,0) FROM datatable WHERE datatable.datafile_id = dvobject.id;
+-- Now we can calculate storage sizes of each individual dataset (a simple sum
+-- of the storage sizes of all the files in the dataset):
+-- (excluding the harvested datasets; this is less important, since there should be
+-- significantly fewer datasets than files, but might as well)
+UPDATE dvobject SET storagesize=o.combinedStorageSize
+FROM (SELECT datasetobject.id, SUM(fileobject.storagesize) AS combinedStorageSize
+FROM dvobject fileobject, dvobject datasetobject
+WHERE fileobject.owner_id = datasetobject.id
+GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null;
+-- ... and then we can repeat the same for collections, by setting the storage size
+-- to the sum of the storage sizes of the datasets *directly* in each collection:
+-- (no attemp is made yet to recursively count the sizes all the chilld sub-collections)
+UPDATE dvobject SET storagesize=o.combinedStorageSize
+FROM (SELECT collectionobject.id, SUM(datasetobject.storagesize) AS combinedStorageSize
+FROM dvobject datasetobject, dvobject collectionobject
+WHERE datasetobject.owner_id = collectionobject.id
+AND datasetobject.storagesize IS NOT null 
+GROUP BY collectionobject.id) o WHERE o.id = dvobject.id AND dvobject.dtype='Dataverse';
+
+-- And now we will update the storage sizes of all the Collection ("Dataverse") objects
+-- that contain sub-collections, *recursively*, to add their sizes to the totals:
+WITH RECURSIVE treestorage (id, owner_id, storagesize, dtype) AS
+(
+    -- All dataverses:
+    SELECT id, owner_id, storagesize, dtype
+    FROM dvobject
+    WHERE dtype = 'Dataverse'
+
+    UNION
+
+    -- Recursive Member:
+    SELECT dvobject.id, treestorage.owner_id, dvobject.storagesize, treestorage.dtype
+    FROM treestorage, dvobject
+    WHERE treestorage.id = dvobject.owner_id
+    AND dvobject.dtype = 'Dataverse'
+)
+
+UPDATE dvobject SET storagesize=storagesize+(SELECT COALESCE(SUM(storagesize),0)
+FROM treestorage WHERE owner_id=dvobject.id)
+--FROM treestorage ts
+--WHERE ts.owner_id=dvobject.id
+WHERE dvobject.dtype = 'Dataverse'
+AND dvobject.id IN (SELECT owner_id FROM treestorage WHERE owner_id IS NOT null);

From c49036bf3d67d22cec384a8fe4f7cb23ed3d9a46 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 15 Nov 2023 12:06:43 +0000
Subject: [PATCH 166/546] Added: includeDeaccessioned support to
 getDatasetVersionCitation API endpoint

---
 .../harvard/iq/dataverse/api/Datasets.java    |  9 ++++++--
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 21 ++++++++++++++++++-
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  3 ++-
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 292aba0cee3..68c618b0f1f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3995,9 +3995,14 @@ public Response getPrivateUrlDatasetVersionCitation(@PathParam("privateUrlToken"
     @GET
     @AuthRequired
     @Path("{id}/versions/{versionId}/citation")
-    public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) {
+    public Response getDatasetVersionCitation(@Context ContainerRequestContext crc,
+                                              @PathParam("id") String datasetId,
+                                              @PathParam("versionId") String versionId,
+                                              @QueryParam("includeDeaccessioned") boolean includeDeaccessioned,
+                                              @Context UriInfo uriInfo,
+                                              @Context HttpHeaders headers) {
         return response(req -> ok(
-                getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getCitation(true, false)), getRequestUser(crc));
+                getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned).getCitation(true, false)), getRequestUser(crc));
     }
 
     @POST
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 56bf53c1c99..d20f1e8a58b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -3371,13 +3371,32 @@ public void getDatasetVersionCitation() {
         createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode());
         int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
 
-        Response getDatasetVersionCitationResponse = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_DRAFT, apiToken);
+        Response getDatasetVersionCitationResponse = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_DRAFT, false, apiToken);
         getDatasetVersionCitationResponse.prettyPrint();
 
         getDatasetVersionCitationResponse.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 // We check that the returned message contains information expected for the citation string
                 .body("data.message", containsString("DRAFT VERSION"));
+
+        // Test Deaccessioned
+        Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken);
+        publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode());
+        Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken);
+        publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken);
+        deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        // includeDeaccessioned false
+        Response getDatasetVersionCitationNotDeaccessioned = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, false, apiToken);
+        getDatasetVersionCitationNotDeaccessioned.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        // includeDeaccessioned true
+        Response getDatasetVersionCitationDeaccessioned =  UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, true, apiToken);
+        getDatasetVersionCitationDeaccessioned.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.message", containsString("DEACCESSIONED VERSION"));
     }
 
     @Test
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index e3a7fd0cfc3..2336bf8beb8 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -3345,10 +3345,11 @@ static Response getPrivateUrlDatasetVersionCitation(String privateUrlToken) {
         return response;
     }
 
-    static Response getDatasetVersionCitation(Integer datasetId, String version, String apiToken) {
+    static Response getDatasetVersionCitation(Integer datasetId, String version, boolean includeDeaccessioned, String apiToken) {
         Response response = given()
                 .header(API_TOKEN_HTTP_HEADER, apiToken)
                 .contentType("application/json")
+                .queryParam("includeDeaccessioned", includeDeaccessioned)
                 .get("/api/datasets/" + datasetId + "/versions/" + version + "/citation");
         return response;
     }

From 75ff2fbad275a4543525ac0dc62f65d3eaa0e5c1 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 15 Nov 2023 12:10:14 +0000
Subject: [PATCH 167/546] Added: API docs for #10104

---
 doc/sphinx-guides/source/api/native-api.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 1992390410c..2e3a0b2af08 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -2502,6 +2502,16 @@ Get Citation
 
   curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER"
 
+By default, deaccessioned dataset versions are not included in the search when applying the :latest or :latest-published identifiers. Additionally, when filtering by a specific version tag, you will get a "not found" error if the version is deaccessioned and you do not enable the ``includeDeaccessioned`` option described below.
+
+If you want to include deaccessioned dataset versions, you must set ``includeDeaccessioned`` query parameter to ``true``.
+
+Usage example:
+
+.. code-block:: bash
+
+  curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER&includeDeaccessioned=true"
+
 Get Citation by Private URL Token
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

From be631af6e5fd5dd181aebdb0ee8a2dd1da3ff789 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 15 Nov 2023 12:12:31 +0000
Subject: [PATCH 168/546] Added: release notes for #10104

---
 doc/release-notes/10104-dataset-citation-deaccessioned.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/release-notes/10104-dataset-citation-deaccessioned.md

diff --git a/doc/release-notes/10104-dataset-citation-deaccessioned.md b/doc/release-notes/10104-dataset-citation-deaccessioned.md
new file mode 100644
index 00000000000..0ba06d729c4
--- /dev/null
+++ b/doc/release-notes/10104-dataset-citation-deaccessioned.md
@@ -0,0 +1 @@
+The getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation.

From 2fb81f6b5e1a5c735b937600b0dd74ee47d236a1 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 15 Nov 2023 10:01:52 -0500
Subject: [PATCH 169/546] altering circuit breakers for qa

---
 conf/solr/9.3.0/solrconfig.xml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/conf/solr/9.3.0/solrconfig.xml b/conf/solr/9.3.0/solrconfig.xml
index b89315cdaa9..9705faa7009 100644
--- a/conf/solr/9.3.0/solrconfig.xml
+++ b/conf/solr/9.3.0/solrconfig.xml
@@ -588,10 +588,10 @@
      check for "Circuit Breakers tripped" in logs and the corresponding error message should tell
      you what transpired (if the failure was caused by tripped circuit breakers).
     -->
-    <!--
+
     <str name="memEnabled">true</str>
-    <str name="memThreshold">75</str>
-    -->
+    <str name="memThreshold">5</str>
+
 
     <!-- CPU Circuit Breaker Configuration
 
@@ -599,10 +599,10 @@
      whether the circuit breaker is enabled and the average load over the last minute at which the
      circuit breaker should start rejecting queries.
     -->
-    <!--
+
     <str name="cpuEnabled">true</str>
-    <str name="cpuThreshold">75</str>
-    -->
+    <str name="cpuThreshold">5</str>
+
   </circuitBreaker>
 
   <!-- Request Dispatcher

From 072cbeaa475eb734596df14c104736f7cc461e1d Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 15 Nov 2023 10:23:23 -0500
Subject: [PATCH 170/546] revert circuit breaker changes

---
 conf/solr/9.3.0/solrconfig.xml | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/conf/solr/9.3.0/solrconfig.xml b/conf/solr/9.3.0/solrconfig.xml
index 9705faa7009..36ed4f23390 100644
--- a/conf/solr/9.3.0/solrconfig.xml
+++ b/conf/solr/9.3.0/solrconfig.xml
@@ -588,10 +588,11 @@
      check for "Circuit Breakers tripped" in logs and the corresponding error message should tell
      you what transpired (if the failure was caused by tripped circuit breakers).
     -->
-
+    
+    <!--
     <str name="memEnabled">true</str>
-    <str name="memThreshold">5</str>
-
+    <str name="memThreshold">75</str>
+    -->
 
     <!-- CPU Circuit Breaker Configuration
 
@@ -599,9 +600,11 @@
      whether the circuit breaker is enabled and the average load over the last minute at which the
      circuit breaker should start rejecting queries.
     -->
-
+    
+    <!--
     <str name="cpuEnabled">true</str>
-    <str name="cpuThreshold">5</str>
+    <str name="cpuThreshold">75</str>
+    -->
 
   </circuitBreaker>
 

From 74d36b64d0fc36afafa5382952050239737ebe1a Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 16 Nov 2023 11:24:30 -0500
Subject: [PATCH 171/546] #9686 preliminary check in

---
 .../java/edu/harvard/iq/dataverse/Dataset.java  | 14 +-------------
 .../java/edu/harvard/iq/dataverse/DvObject.java | 17 +++++++++++++++++
 .../V6.0.0.3__9686-move-harvestingclient-id.sql |  8 ++++++++
 3 files changed, 26 insertions(+), 13 deletions(-)
 create mode 100644 src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql

diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
index 245bdf0efd2..ad72ada20e9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
@@ -742,21 +742,9 @@ public void setDatasetExternalCitations(List<DatasetExternalCitations> datasetEx
         this.datasetExternalCitations = datasetExternalCitations;
     }
 
-    @ManyToOne
-    @JoinColumn(name="harvestingClient_id")
-    private  HarvestingClient harvestedFrom;
-
-    public HarvestingClient getHarvestedFrom() {
-        return this.harvestedFrom;
-    }
 
-    public void setHarvestedFrom(HarvestingClient harvestingClientConfig) {
-        this.harvestedFrom = harvestingClientConfig;
-    }
     
-    public boolean isHarvested() {
-        return this.harvestedFrom != null;
-    }
+
 
     private String harvestIdentifier;
      
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
index 9e7f3f3fe96..16237203d78 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -1,6 +1,7 @@
 package edu.harvard.iq.dataverse;
 
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
 import edu.harvard.iq.dataverse.pidproviders.PidUtil;
 
 import java.sql.Timestamp;
@@ -351,6 +352,22 @@ public GlobalId getGlobalId() {
         return globalId;
     }
     
+    @ManyToOne
+    @JoinColumn(name="harvestingClient_id")
+    private  HarvestingClient harvestedFrom;
+
+    public HarvestingClient getHarvestedFrom() {
+        return this.harvestedFrom;
+    }
+
+    public void setHarvestedFrom(HarvestingClient harvestingClientConfig) {
+        this.harvestedFrom = harvestingClientConfig;
+    }
+    
+    public boolean isHarvested() {
+        return this.harvestedFrom != null;
+    }
+    
     public abstract <T> T accept(Visitor<T> v);
 
     @Override
diff --git a/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql
new file mode 100644
index 00000000000..23d66701b99
--- /dev/null
+++ b/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql
@@ -0,0 +1,8 @@
+ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS harvestingclient_id BIGINT;
+
+update dvobject dvo set harvestingclient_id = s.harvestingclient_id from
+(select id, harvestingclient_id from dataset d) s
+where s.id = dvo.id; 
+
+--ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id;
+

From 5c045120d6660ee0b07501cadfb06aaf9f083f6b Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 16 Nov 2023 13:42:51 -0500
Subject: [PATCH 172/546] #9686 rename migration script

---
 ...lient-id.sql => V6.0.0.4__9686-move-harvestingclient-id.sql} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename src/main/resources/db/migration/{V6.0.0.3__9686-move-harvestingclient-id.sql => V6.0.0.4__9686-move-harvestingclient-id.sql} (72%)

diff --git a/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.0.0.4__9686-move-harvestingclient-id.sql
similarity index 72%
rename from src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql
rename to src/main/resources/db/migration/V6.0.0.4__9686-move-harvestingclient-id.sql
index 23d66701b99..0e4c9a58a93 100644
--- a/src/main/resources/db/migration/V6.0.0.3__9686-move-harvestingclient-id.sql
+++ b/src/main/resources/db/migration/V6.0.0.4__9686-move-harvestingclient-id.sql
@@ -1,7 +1,7 @@
 ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS harvestingclient_id BIGINT;
 
 update dvobject dvo set harvestingclient_id = s.harvestingclient_id from
-(select id, harvestingclient_id from dataset d) s
+(select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s
 where s.id = dvo.id; 
 
 --ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id;

From a376b4e3f4bacc8dc651b7048d9a323535dc92f7 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 17 Nov 2023 10:01:33 -0500
Subject: [PATCH 173/546] Add condition for 401 when a invalid key is provided
 and create changelog on API Guide

---
 doc/sphinx-guides/source/api/changelog.rst          | 13 +++++++++++++
 doc/sphinx-guides/source/api/index.rst              |  1 +
 .../java/edu/harvard/iq/dataverse/api/AccessIT.java | 11 ++++++-----
 3 files changed, 20 insertions(+), 5 deletions(-)
 create mode 100644 doc/sphinx-guides/source/api/changelog.rst

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
new file mode 100644
index 00000000000..b78d268db33
--- /dev/null
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -0,0 +1,13 @@
+API Changelog 
+=============
+
+.. contents:: |toctitle|
+    :local:
+    :depth: 1
+
+6.0.0
+-----
+
+Changes
+~~~~~~~
+    - **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. 
diff --git a/doc/sphinx-guides/source/api/index.rst b/doc/sphinx-guides/source/api/index.rst
index c9e79098546..dd195aa9d62 100755
--- a/doc/sphinx-guides/source/api/index.rst
+++ b/doc/sphinx-guides/source/api/index.rst
@@ -24,3 +24,4 @@ API Guide
    linkeddatanotification
    apps
    faq
+   changelog
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java
index 42e21e53101..d08f916243f 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java
@@ -198,6 +198,8 @@ public void testDownloadSingleFile() {
         //Not logged in non-restricted
         Response anonDownloadOriginal = UtilIT.downloadFileOriginal(tabFile1Id);
         Response anonDownloadConverted = UtilIT.downloadFile(tabFile1Id);
+        Response anonDownloadConvertedNullKey = UtilIT.downloadFile(tabFile1Id, null);
+
         // ... and download the same tabular data file, but without the variable name header added:
         Response anonDownloadTabularNoHeader = UtilIT.downloadTabularFileNoVarHeader(tabFile1Id);
         // ... and download the same tabular file, this time requesting the "format=tab" explicitly:
@@ -206,6 +208,8 @@ public void testDownloadSingleFile() {
         assertEquals(OK.getStatusCode(), anonDownloadConverted.getStatusCode());
         assertEquals(OK.getStatusCode(), anonDownloadTabularNoHeader.getStatusCode());
         assertEquals(OK.getStatusCode(), anonDownloadTabularWithFormatName.getStatusCode());
+        assertEquals(UNAUTHORIZED.getStatusCode(), anonDownloadConvertedNullKey.getStatusCode());
+        
         int origSizeAnon = anonDownloadOriginal.getBody().asByteArray().length;
         int convertSizeAnon = anonDownloadConverted.getBody().asByteArray().length;
         int tabularSizeNoVarHeader = anonDownloadTabularNoHeader.getBody().asByteArray().length;
@@ -423,10 +427,7 @@ private HashMap<String,ByteArrayOutputStream> readZipResponse(InputStream iStrea
                 }
 
                 String name = entry.getName(); 
-//                String s = String.format("Entry: %s len %d added %TD",
-//                                entry.getName(), entry.getSize(),
-//                                new Date(entry.getTime()));
-//                System.out.println(s);
+
 
                 // Once we get the entry from the zStream, the zStream is
                 // positioned read to read the raw data, and we keep
@@ -466,7 +467,7 @@ private HashMap<String,ByteArrayOutputStream> readZipResponse(InputStream iStrea
     
     @Test
     public void testRequestAccess() throws InterruptedException {
-
+    
         String pathToJsonFile = "scripts/api/data/dataset-create-new.json";
         Response createDatasetResponse = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken);
         createDatasetResponse.prettyPrint();

From 63725d75c115352ff9d0bb94f2e5b6b4d7ca5d05 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 17 Nov 2023 11:07:17 -0500
Subject: [PATCH 174/546] remove cruft: mdc logs #9115

---
 mdc-logs/raw-mdc-2019-01-07.log | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 mdc-logs/raw-mdc-2019-01-07.log

diff --git a/mdc-logs/raw-mdc-2019-01-07.log b/mdc-logs/raw-mdc-2019-01-07.log
deleted file mode 100644
index d7a6386160e..00000000000
--- a/mdc-logs/raw-mdc-2019-01-07.log
+++ /dev/null
@@ -1,6 +0,0 @@
-#Fields: event_time	client_ip	session_cookie_id	user_cookie_id	user_id	request_url	identifier	filename	size	user-agent	title	publisher	publisher_id	authors	publication_date	version	other_id	target_url	publication_year
-2019-01-07T15:14:51-0500	0:0:0:0:0:0:0:1	9f4209d3c177d3cb77f4d06cf3ba	-	:guest	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	doi:10.5072/FK2/XTT5BV	-	-	Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36	Dataset One	-	1	Smith, Robert| Kew, Susie	2019-01-07T18:20:54Z	1	-	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	2019
-2019-01-07T15:15:15-0500	0:0:0:0:0:0:0:1	9f4209d3c177d3cb77f4d06cf3ba	-	:guest	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	doi:10.5072/FK2/XTT5BV	-	-	Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36	Dataset One	-	1	Smith, Robert| Kew, Susie	2019-01-07T18:20:54Z	1	-	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	2019
-2019-01-07T15:16:04-0500	0:0:0:0:0:0:0:1	9f4209d3c177d3cb77f4d06cf3ba	-	:guest	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	doi:10.5072/FK2/XTT5BV	-	-	Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36	Dataset One	-	1	Smith, Robert| Kew, Susie	2019-01-07T18:20:54Z	1	-	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	2019
-2019-01-07T15:16:14-0500	0:0:0:0:0:0:0:1	9f4209d3c177d3cb77f4d06cf3ba	-	:guest	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	doi:10.5072/FK2/XTT5BV	168298bae7c-2c5bbc1a9c8c	1	Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36	Dataset One	-	1	Smith, Robert| Kew, Susie	2019-01-07T18:20:54Z	1	-	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	2019
-2019-01-07T15:16:19-0500	0:0:0:0:0:0:0:1	9f4209d3c177d3cb77f4d06cf3ba	-	:guest	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	doi:10.5072/FK2/XTT5BV	168298bb8ce-337d8df49763	4026	Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36	Dataset One	-	1	Smith, Robert| Kew, Susie	2019-01-07T18:20:54Z	1	-	http://localhost:8080/dataset.xhtml?persistentId=doi:10.5072/FK2/XTT5BV	2019

From 2433114ec7b8430753bc730056a07e24ac0bb5d3 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 17 Nov 2023 11:20:03 -0500
Subject: [PATCH 175/546] fix bullet #10060 #10070

---
 doc/sphinx-guides/source/api/changelog.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index b78d268db33..a1cffd84f33 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -10,4 +10,4 @@ API Changelog
 
 Changes
 ~~~~~~~
-    - **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. 
+- **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. 

From e0350e735551270f9bd23bfa226b6946282df467 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 17 Nov 2023 11:38:53 -0500
Subject: [PATCH 176/546] Change 6.0.0 to 6.0

---
 doc/sphinx-guides/source/api/changelog.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index a1cffd84f33..086ff4a20e5 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -5,7 +5,7 @@ API Changelog
     :local:
     :depth: 1
 
-6.0.0
+6.0
 -----
 
 Changes

From 437e3b94edf89a2245310709c07d8238c0df4235 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva
 <142103991+jp-tosca@users.noreply.github.com>
Date: Fri, 17 Nov 2023 11:42:17 -0500
Subject: [PATCH 177/546] Update doc/sphinx-guides/source/api/changelog.rst

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 doc/sphinx-guides/source/api/changelog.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index 086ff4a20e5..2698ba3debf 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -10,4 +10,4 @@ API Changelog
 
 Changes
 ~~~~~~~
-- **api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. 
+- **/api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. 

From 640f69e39f71244b9ba1d7f534180a6b4c8b58cc Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 17 Nov 2023 13:19:14 -0500
Subject: [PATCH 178/546] add release note for API changelog #10060

---
 doc/release-notes/10060-api-changelog.md | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 doc/release-notes/10060-api-changelog.md

diff --git a/doc/release-notes/10060-api-changelog.md b/doc/release-notes/10060-api-changelog.md
new file mode 100644
index 00000000000..56ac96e3564
--- /dev/null
+++ b/doc/release-notes/10060-api-changelog.md
@@ -0,0 +1,3 @@
+We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html
+
+See also #10060.

From 83a66aac65db2f7634b3917d332b0e4253be3c84 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva
 <142103991+jp-tosca@users.noreply.github.com>
Date: Fri, 17 Nov 2023 14:55:58 -0500
Subject: [PATCH 179/546] Update doc/sphinx-guides/source/api/changelog.rst

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 doc/sphinx-guides/source/api/changelog.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index 2698ba3debf..f518a9b542d 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -10,4 +10,4 @@ API Changelog
 
 Changes
 ~~~~~~~
-- **/api/access/datafile**: When a null or invalid API Key is provided to download a public with this API call, it will result on a ``401`` error response. 
+- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed to happy (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch.

From 70edaa789e84c99b110036c232155337afb5c459 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 17 Nov 2023 15:02:32 -0500
Subject: [PATCH 180/546] Remove "to happy "

---
 doc/sphinx-guides/source/api/changelog.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index f518a9b542d..d6742252d27 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -10,4 +10,4 @@ API Changelog
 
 Changes
 ~~~~~~~
-- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed to happy (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch.
+- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch.

From 73593acb1bcdb9ba1d62e47310753e905b2546dd Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Fri, 17 Nov 2023 15:17:28 -0500
Subject: [PATCH 181/546] #9464 query by dvo. update IT

---
 .../dataverse/metrics/MetricsServiceBean.java | 33 ++++++++++---------
 .../harvard/iq/dataverse/api/MetricsIT.java   | 14 +++++---
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
index 79369207963..832dda5ced9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
@@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
                 + "from datasetversion\n"
                 + "where versionstate='RELEASED' \n"
                 + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n")
-                + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
-                + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n "  : "")
+                + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
+                + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n "  : "")
                 + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
                 + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n")
                 + "group by dataset_id) as subq group by subq.date order by date;"
@@ -156,11 +156,11 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
      * @param d
      */
     public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
-        String dataLocationLine = "(date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n";
+        String dataLocationLine = "(date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
-            String harvestBaseLine = "(date_trunc('month', createtime) <=  to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n";
+            String harvestBaseLine = "(date_trunc('month', createtime) <=  to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n";
             if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
                 dataLocationLine = harvestBaseLine; // replace
             } else if (DATA_LOCATION_ALL.equals(dataLocation)) {
@@ -189,7 +189,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
                         + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
                         + "from datasetversion\n"
                         + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                        + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+                        + "join dvobject on dvobject.id = dataset.id\n"
                         + "where versionstate='RELEASED' \n"
                         + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
                         + "and \n"
@@ -212,8 +212,9 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" +
                 "       from datasetversion\n" +
                 "       join dataset on dataset.id = datasetversion.dataset_id\n" +
+                "       join dvobject on dataset.id = dvobject.id\n" +
                 "       where versionstate='RELEASED'\n" +
-                "       	     and dataset.harvestingclient_id is null\n" +
+                "       	     and dvobject.harvestingclient_id is null\n" +
                 "       	     and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
                 "       group by dataset_id\n" +
                 "))\n";
@@ -225,7 +226,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
             // so the query is simpler:
             String harvestOriginClause = "(\n" +
                     "   datasetversion.dataset_id = dataset.id\n" +
-                    "   AND dataset.harvestingclient_id IS NOT null \n" +
+                    "   AND dvobject.harvestingclient_id IS NOT null \n" +
                     "   AND date_trunc('month', datasetversion.createtime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
                     ")\n";
 
@@ -244,7 +245,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n"
                 + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n"
                 + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n"
-                + ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n")
+                + "JOIN dvobject ON dvobject.id = dataset.id\n"
                 + "WHERE\n"
                 + originClause
                 + "AND datasetfieldtype.name = 'subject'\n"
@@ -258,11 +259,11 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
     }
 
     public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
-        String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n";
+        String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
-            String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n";
+            String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n";
             if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
                 dataLocationLine = harvestBaseLine; // replace
             } else if (DATA_LOCATION_ALL.equals(dataLocation)) {
@@ -276,7 +277,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
                         + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n"
                         + "from datasetversion\n"
                         + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                        + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+                        + "join dvobject on dvobject.id = dataset.id\n"
                         + "where versionstate='RELEASED' \n"
                         + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
                         + "and \n"
@@ -304,7 +305,7 @@ public JsonArray filesTimeSeries(Dataverse d) {
                         + "where datasetversion.id=filemetadata.datasetversion_id\n"
                         + "and versionstate='RELEASED' \n"
                         + "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n"
-                        + "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n "
+                        + "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n "
                         + ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ")
                         + "group by filemetadata.id) as subq group by subq.date order by date;");
         logger.log(Level.FINE, "Metric query: {0}", query);
@@ -327,11 +328,11 @@ public long filesToMonth(String yyyymm, Dataverse d) {
                 + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
                 + "from datasetversion\n"
                 + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+                +  "join dvobject on dvobject.id = dataset.id\n"
                 + "where versionstate='RELEASED'\n"
                 + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
                 + "and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
-                + "and dataset.harvestingclient_id is null\n"
+                + "and dvobject.harvestingclient_id is null\n"
                 + "group by dataset_id \n"
                 + ");"
         );
@@ -350,11 +351,11 @@ public long filesPastDays(int days, Dataverse d) {
                 + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
                 + "from datasetversion\n"
                 + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+                + "join dvobject on dvobject.id = dataset.id\n"
                 + "where versionstate='RELEASED'\n"
                 + "and releasetime > current_date - interval '" + days + "' day\n"
                 + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
-                + "and dataset.harvestingclient_id is null\n"
+                + "and dvobject.harvestingclient_id is null\n"
                 + "group by dataset_id \n"
                 + ");"
         );
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
index e3328eefb4a..b961a86dc0b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
@@ -30,7 +30,7 @@ public static void cleanUpClass() {
 
     @Test
     public void testGetDataversesToMonth() {
-        String yyyymm = "2018-04";
+        String yyyymm = "2023-04";
 //        yyyymm = null;
         Response response = UtilIT.metricsDataversesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -54,7 +54,7 @@ public void testGetDataversesToMonth() {
 
     @Test
     public void testGetDatasetsToMonth() {
-        String yyyymm = "2018-04";
+        String yyyymm = "2023-04";
 //        yyyymm = null;
         Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -77,7 +77,7 @@ public void testGetDatasetsToMonth() {
 
     @Test
     public void testGetFilesToMonth() {
-        String yyyymm = "2018-04";
+        String yyyymm = "2023-04";
 //        yyyymm = null;
         Response response = UtilIT.metricsFilesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -100,7 +100,7 @@ public void testGetFilesToMonth() {
 
     @Test
     public void testGetDownloadsToMonth() {
-        String yyyymm = "2018-04";
+        String yyyymm = "2023-04";
 //        yyyymm = null;
         Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -283,6 +283,12 @@ public void testGetDatasetsBySubject() {
         response = UtilIT.metricsDatasetsBySubject("dataLocation=local");
         response.then().assertThat()
                 .statusCode(OK.getStatusCode());
+        
+        //Test ok when passing remote
+        response = UtilIT.metricsDatasetsBySubject("dataLocation=remote");
+        response.prettyPrint();
+        response.then().assertThat()
+                .statusCode(OK.getStatusCode());
     }
 
     @Test

From d0fc9affdf52dfd60461520adb20a6c7d30e7d6b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 17 Nov 2023 15:31:50 -0500
Subject: [PATCH 182/546] refactor to avoid overloaded methods in constructors

---
 .../AbstractRemoteOverlayAccessIO.java        | 335 ++++++++++++++++++
 .../dataaccess/GlobusAccessibleStore.java     |   4 +-
 .../dataaccess/GlobusOverlayAccessIO.java     |  51 ++-
 .../dataaccess/RemoteOverlayAccessIO.java     | 315 +---------------
 .../dataaccess/RemoteOverlayAccessIOTest.java |   1 -
 5 files changed, 390 insertions(+), 316 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
new file mode 100644
index 00000000000..8adaf746210
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
@@ -0,0 +1,335 @@
+package edu.harvard.iq.dataverse.dataaccess;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.channels.Channel;
+import java.nio.file.Path;
+import java.security.KeyManagementException;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
+import java.util.List;
+import java.util.function.Predicate;
+import java.util.logging.Logger;
+
+import javax.net.ssl.SSLContext;
+
+import org.apache.http.Header;
+import org.apache.http.client.config.CookieSpecs;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpHead;
+import org.apache.http.client.protocol.HttpClientContext;
+import org.apache.http.config.Registry;
+import org.apache.http.config.RegistryBuilder;
+import org.apache.http.conn.socket.ConnectionSocketFactory;
+import org.apache.http.conn.ssl.NoopHostnameVerifier;
+import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
+import org.apache.http.conn.ssl.TrustAllStrategy;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
+import org.apache.http.protocol.HTTP;
+import org.apache.http.ssl.SSLContextBuilder;
+import org.apache.http.util.EntityUtils;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.DvObject;
+
+public abstract class AbstractRemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
+
+    protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
+    protected static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths";
+    static final String BASE_STORE = "base-store";
+    protected static final String SECRET_KEY = "secret-key";
+    static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes";
+    protected static final String REMOTE_STORE_NAME = "remote-store-name";
+    protected static final String REMOTE_STORE_URL = "remote-store-url";
+    protected StorageIO<DvObject> baseStore = null;
+    protected String path = null;
+    protected PoolingHttpClientConnectionManager cm = null;
+    CloseableHttpClient httpclient = null;
+    protected static HttpClientContext localContext = HttpClientContext.create();
+
+    protected int timeout = 1200;
+    protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000)
+                .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000)
+                .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build();
+    protected static boolean trustCerts = false;
+    protected int httpConcurrency = 4;
+
+    public static String getBaseStoreIdFor(String driverId) {
+        return getConfigParamForDriver(driverId, BASE_STORE);
+    }
+
+    public AbstractRemoteOverlayAccessIO() {
+        super();
+    }
+
+    public AbstractRemoteOverlayAccessIO(String storageLocation, String driverId) {
+        super(storageLocation, driverId);
+    }
+
+    public AbstractRemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) {
+        super(dvObject, req, driverId);
+    }
+
+    @Override
+    public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException {
+        return baseStore.openAuxChannel(auxItemTag, options);
+    }
+
+    @Override
+    public boolean isAuxObjectCached(String auxItemTag) throws IOException {
+        return baseStore.isAuxObjectCached(auxItemTag);
+    }
+
+    @Override
+    public long getAuxObjectSize(String auxItemTag) throws IOException {
+        return baseStore.getAuxObjectSize(auxItemTag);
+    }
+
+    @Override
+    public Path getAuxObjectAsPath(String auxItemTag) throws IOException {
+        return baseStore.getAuxObjectAsPath(auxItemTag);
+    }
+
+    @Override
+    public void backupAsAux(String auxItemTag) throws IOException {
+        baseStore.backupAsAux(auxItemTag);
+    }
+
+    @Override
+    public void revertBackupAsAux(String auxItemTag) throws IOException {
+        baseStore.revertBackupAsAux(auxItemTag);
+    }
+
+    @Override
+    public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException {
+        baseStore.savePathAsAux(fileSystemPath, auxItemTag);
+    }
+
+    @Override
+    public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException {
+        baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize);
+    }
+
+    /**
+     * @param inputStream InputStream we want to save
+     * @param auxItemTag  String representing this Auxiliary type ("extension")
+     * @throws IOException if anything goes wrong.
+     */
+    @Override
+    public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException {
+        baseStore.saveInputStreamAsAux(inputStream, auxItemTag);
+    }
+
+    @Override
+    public List<String> listAuxObjects() throws IOException {
+        return baseStore.listAuxObjects();
+    }
+
+    @Override
+    public void deleteAuxObject(String auxItemTag) throws IOException {
+        baseStore.deleteAuxObject(auxItemTag);
+    }
+
+    @Override
+    public void deleteAllAuxObjects() throws IOException {
+        baseStore.deleteAllAuxObjects();
+    }
+
+    @Override
+    public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException {
+        return baseStore.getAuxFileAsInputStream(auxItemTag);
+    }
+
+    protected int getUrlExpirationMinutes() {
+        String optionValue = getConfigParam(URL_EXPIRATION_MINUTES);
+        if (optionValue != null) {
+            Integer num;
+            try {
+                num = Integer.parseInt(optionValue);
+            } catch (NumberFormatException ex) {
+                num = null;
+            }
+            if (num != null) {
+                return num;
+            }
+        }
+        return 60;
+    }
+
+    public CloseableHttpClient getSharedHttpClient() {
+        if (httpclient == null) {
+            try {
+                initHttpPool();
+                httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build();
+    
+            } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) {
+                logger.warning(ex.getMessage());
+            }
+        }
+        return httpclient;
+    }
+
+    private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException {
+        if (trustCerts) {
+            // use the TrustSelfSignedStrategy to allow Self Signed Certificates
+            SSLContext sslContext;
+            SSLConnectionSocketFactory connectionFactory;
+    
+            sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build();
+            // create an SSL Socket Factory to use the SSLContext with the trust self signed
+            // certificate strategy
+            // and allow all hosts verifier.
+            connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE);
+    
+            Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
+                    .register("https", connectionFactory).build();
+            cm = new PoolingHttpClientConnectionManager(registry);
+        } else {
+            cm = new PoolingHttpClientConnectionManager();
+        }
+        cm.setDefaultMaxPerRoute(httpConcurrency);
+        cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20);
+    }
+
+    @Override
+    abstract public long retrieveSizeFromMedia();
+    
+    @Override
+    public boolean exists() {
+        logger.fine("Exists called");
+        return (retrieveSizeFromMedia() != -1);
+    }
+
+    @Override
+    public List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOException {
+        return baseStore.cleanUp(filter, dryRun);
+    }
+    
+    @Override
+    public String getStorageLocation() throws IOException {
+        String fullStorageLocation = dvObject.getStorageIdentifier();
+        logger.fine("storageidentifier: " + fullStorageLocation);
+        int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR);
+        if (driverIndex >= 0) {
+            fullStorageLocation = fullStorageLocation
+                    .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+        }
+        if (this.getDvObject() instanceof Dataset) {
+            throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject");
+        } else if (this.getDvObject() instanceof DataFile) {
+            fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation;
+        } else if (dvObject instanceof Dataverse) {
+            throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
+        }
+        logger.fine("fullStorageLocation: " + fullStorageLocation);
+        return fullStorageLocation;
+    }
+    protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
+
+        if (baseStore == null) {
+            String baseDriverId = getBaseStoreIdFor(driverId);
+            String fullStorageLocation = null;
+            String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE,
+                    DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+
+            if (dvObject instanceof Dataset) {
+                baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId);
+            } else {
+                if (this.getDvObject() != null) {
+                    fullStorageLocation = getStoragePath();
+
+                    // S3 expects <id>://<bucketname>/<key>
+                    switch (baseDriverType) {
+                    case DataAccess.S3:
+                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+                                + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/"
+                                + fullStorageLocation;
+                        break;
+                    case DataAccess.FILE:
+                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+                                + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files")
+                                + "/" + fullStorageLocation;
+                        break;
+                    default:
+                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
+                                + getConfigParamForDriver(baseDriverId, StorageIO.TYPE));
+                        throw new IOException("Not supported");
+                    }
+
+                } else if (storageLocation != null) {
+                    // <remoteDriverId>://<baseStorageIdentifier>//<baseUrlPath>
+                    // remoteDriverId:// is removed if coming through directStorageIO
+                    int index = storageLocation.indexOf(DataAccess.SEPARATOR);
+                    if (index > 0) {
+                        storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length());
+                    }
+                    // The base store needs the baseStoreIdentifier and not the relative URL (if it exists)
+                    int endOfId = storageLocation.indexOf("//");
+                    fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation;
+
+                    switch (baseDriverType) {
+                    case DataAccess.S3:
+                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+                                + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/"
+                                + fullStorageLocation;
+                        break;
+                    case DataAccess.FILE:
+                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+                                + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files")
+                                + "/" + fullStorageLocation;
+                        break;
+                    default:
+                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
+                                + getConfigParamForDriver(baseDriverId, StorageIO.TYPE));
+                        throw new IOException("Not supported");
+                    }
+                }
+                baseStore = DataAccess.getDirectStorageIO(fullStorageLocation);
+            }
+            if (baseDriverType.contentEquals(DataAccess.S3)) {
+                ((S3AccessIO<?>) baseStore).setMainDriver(false);
+            }
+        }
+        remoteStoreName = getConfigParam(REMOTE_STORE_NAME);
+        try {
+            remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL));
+        } catch (MalformedURLException mfue) {
+            logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId);
+        }
+    }
+
+    protected String getStoragePath() throws IOException {
+        String fullStoragePath = dvObject.getStorageIdentifier();
+        logger.fine("storageidentifier: " + fullStoragePath);
+        int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR);
+        if (driverIndex >= 0) {
+            fullStoragePath = fullStoragePath
+                    .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+        }
+        int suffixIndex = fullStoragePath.indexOf("//");
+        if (suffixIndex >= 0) {
+            fullStoragePath = fullStoragePath.substring(0, suffixIndex);
+        }
+        if (getDvObject() instanceof Dataset) {
+            fullStoragePath = getDataset().getAuthorityForFileStorage() + "/"
+                    + getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath;
+        } else if (getDvObject() instanceof DataFile) {
+            fullStoragePath = getDataFile().getOwner().getAuthorityForFileStorage() + "/"
+                    + getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath;
+        } else if (dvObject instanceof Dataverse) {
+            throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
+        }
+        logger.fine("fullStoragePath: " + fullStoragePath);
+        return fullStoragePath;
+    }
+
+
+
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
index afc7556481a..ce75395c883 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
@@ -28,7 +28,7 @@ public static String getTransferPath(String driverId) {
     }
 
     public static JsonArray getReferenceEndpointsWithPaths(String driverId) {
-        String[] endpoints = StorageIO.getConfigParamForDriver(driverId, RemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*");
+        String[] endpoints = StorageIO.getConfigParamForDriver(driverId, AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*");
         JsonArrayBuilder builder = Json.createArrayBuilder();
         for(int i=0;i<endpoints.length;i++) {
             builder.add(endpoints[i]);
@@ -44,7 +44,7 @@ public static boolean acceptsGlobusTransfers(String storeId) {
     }
 
     public static boolean allowsGlobusReferences(String storeId) {
-        if(StorageIO.getConfigParamForDriver(storeId, RemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS) != null) {
+        if(StorageIO.getConfigParamForDriver(storeId, AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS) != null) {
             return true;
         }
         return false;
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index f42f5443108..a37cd23ad2b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -13,6 +13,7 @@
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.nio.file.InvalidPathException;
+import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Arrays;
 import java.util.logging.Logger;
@@ -46,7 +47,7 @@
  * reference endpoints separated by a comma
  * 
  */
-public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAccessIO<T> implements GlobusAccessibleStore {
+public class GlobusOverlayAccessIO<T extends DvObject> extends AbstractRemoteOverlayAccessIO<T> implements GlobusAccessibleStore {
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO");
 
     /*
@@ -67,11 +68,19 @@ public class GlobusOverlayAccessIO<T extends DvObject> extends RemoteOverlayAcce
 
     public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
+        configureGlobusEndpoints();
+        configureStores(req, driverId, null);
+        logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier());
+        path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
+        validatePath(path);
+
+        logger.fine("Relative path: " + path);
     }
 
 
     public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException {
         this.driverId = driverId;
+        configureGlobusEndpoints();
         configureStores(null, driverId, storageLocation);
         if (isManaged()) {
             String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation);
@@ -83,6 +92,7 @@ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOE
             logger.fine("Referenced path: " + path);
         }
     }
+    
     private boolean isManaged() {
         if(dataverseManaged==null) {
             dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId);
@@ -146,7 +156,6 @@ private static String findMatchingEndpoint(String path, String[] allowedEndpoint
         return null;
     }
 
-    @Override
     protected void validatePath(String relPath) throws IOException {
         if (isManaged()) {
             if (!usesStandardNamePattern(relPath)) {
@@ -363,8 +372,7 @@ public String getStorageLocation() throws IOException {
      * the derived GlobusOverlayAccessIO can support multiple endpoints.
      * @throws IOException
      */
-    @Override
-    protected void configureEndpoints() throws IOException {
+    protected void configureGlobusEndpoints() throws IOException {
         allowedEndpoints = getAllowedEndpoints(this.driverId);
         logger.info("Set allowed endpoints: " + Arrays.toString(allowedEndpoints));
     }
@@ -435,5 +443,40 @@ public static void main(String[] args) {
         }
 
     }
+
+
+    @Override
+    public void open(DataAccessOption... option) throws IOException {
+        // TODO Auto-generated method stub
+        
+    }
+
+
+    @Override
+    public Path getFileSystemPath() throws IOException {
+        // TODO Auto-generated method stub
+        return null;
+    }
+
+
+    @Override
+    public void savePath(Path fileSystemPath) throws IOException {
+        // TODO Auto-generated method stub
+        
+    }
+
+
+    @Override
+    public void saveInputStream(InputStream inputStream) throws IOException {
+        // TODO Auto-generated method stub
+        
+    }
+
+
+    @Override
+    public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
+        // TODO Auto-generated method stub
+        
+    }
     
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index 5463254140d..1616bfabf96 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -11,45 +11,23 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
-import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URISyntaxException;
-import java.net.URL;
 import java.nio.channels.Channel;
 import java.nio.channels.Channels;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.WritableByteChannel;
 import java.nio.file.Path;
-import java.security.KeyManagementException;
-import java.security.KeyStoreException;
-import java.security.NoSuchAlgorithmException;
 import java.util.List;
-import java.util.function.Predicate;
-import java.util.logging.Logger;
 
 import org.apache.http.Header;
-import org.apache.http.client.config.CookieSpecs;
-import org.apache.http.client.config.RequestConfig;
 import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpDelete;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpHead;
-import org.apache.http.client.protocol.HttpClientContext;
-import org.apache.http.config.Registry;
-import org.apache.http.config.RegistryBuilder;
-import org.apache.http.conn.socket.ConnectionSocketFactory;
-import org.apache.http.conn.ssl.NoopHostnameVerifier;
-import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
-import org.apache.http.conn.ssl.TrustAllStrategy;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.HttpClients;
-import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
 import org.apache.http.protocol.HTTP;
-import org.apache.http.ssl.SSLContextBuilder;
 import org.apache.http.util.EntityUtils;
 
-import javax.net.ssl.SSLContext;
-
 /**
  * @author qqmyers
  */
@@ -61,40 +39,20 @@
  * 
  * baseUrl: http(s)://<host(:port)/basePath>
  */
-public class RemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
-
-    private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
+public class RemoteOverlayAccessIO<T extends DvObject> extends AbstractRemoteOverlayAccessIO<T> {
 
     // A single baseUrl of the form http(s)://<host(:port)/basePath> where this store can reference data
     static final String BASE_URL = "base-url";
-    // Multiple endpoints where data can be referenced from. Multiple endpoints are separated by a comma. Multiple endpoints are only supported by the GlobalOverlayAccessIO at present. 
-    static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths";
-    static final String BASE_STORE = "base-store";
-    static final String SECRET_KEY = "secret-key";
-    static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes";
-    static final String REMOTE_STORE_NAME = "remote-store-name";
-    static final String REMOTE_STORE_URL = "remote-store-url";
-    
-    protected StorageIO<DvObject> baseStore = null;
-    protected String path = null;
-    private String baseUrl = null;
-
-    protected static HttpClientContext localContext = HttpClientContext.create();
-    protected PoolingHttpClientConnectionManager cm = null;
-    CloseableHttpClient httpclient = null;
-    protected int timeout = 1200;
-    protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000)
-            .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000)
-            .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build();
-    protected static boolean trustCerts = false;
-    protected int httpConcurrency = 4;
+    String baseUrl = null;
 
     public RemoteOverlayAccessIO() {
+        super();
     }
     
     public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
         super(dvObject, req, driverId);
         this.setIsLocalFile(false);
+        configureRemoteEndpoints();
         configureStores(req, driverId, null);
         logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier());
         path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
@@ -106,6 +64,7 @@ public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId)
     public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException {
         super(null, null, driverId);
         this.setIsLocalFile(false);
+        configureRemoteEndpoints();
         configureStores(null, driverId, storageLocation);
 
         path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
@@ -296,105 +255,12 @@ public void delete() throws IOException {
 
     }
 
-    @Override
-    public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException {
-        return baseStore.openAuxChannel(auxItemTag, options);
-    }
-
-    @Override
-    public boolean isAuxObjectCached(String auxItemTag) throws IOException {
-        return baseStore.isAuxObjectCached(auxItemTag);
-    }
-
-    @Override
-    public long getAuxObjectSize(String auxItemTag) throws IOException {
-        return baseStore.getAuxObjectSize(auxItemTag);
-    }
-
-    @Override
-    public Path getAuxObjectAsPath(String auxItemTag) throws IOException {
-        return baseStore.getAuxObjectAsPath(auxItemTag);
-    }
-
-    @Override
-    public void backupAsAux(String auxItemTag) throws IOException {
-        baseStore.backupAsAux(auxItemTag);
-    }
-
-    @Override
-    public void revertBackupAsAux(String auxItemTag) throws IOException {
-        baseStore.revertBackupAsAux(auxItemTag);
-    }
-
-    @Override
-    // this method copies a local filesystem Path into this DataAccess Auxiliary
-    // location:
-    public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException {
-        baseStore.savePathAsAux(fileSystemPath, auxItemTag);
-    }
-
-    @Override
-    public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException {
-        baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize);
-    }
-
-    /**
-     * @param inputStream InputStream we want to save
-     * @param auxItemTag  String representing this Auxiliary type ("extension")
-     * @throws IOException if anything goes wrong.
-     */
-    @Override
-    public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException {
-        baseStore.saveInputStreamAsAux(inputStream, auxItemTag);
-    }
-
-    @Override
-    public List<String> listAuxObjects() throws IOException {
-        return baseStore.listAuxObjects();
-    }
-
-    @Override
-    public void deleteAuxObject(String auxItemTag) throws IOException {
-        baseStore.deleteAuxObject(auxItemTag);
-    }
-
-    @Override
-    public void deleteAllAuxObjects() throws IOException {
-        baseStore.deleteAllAuxObjects();
-    }
-
-    @Override
-    public String getStorageLocation() throws IOException {
-        String fullStorageLocation = dvObject.getStorageIdentifier();
-        logger.fine("storageidentifier: " + fullStorageLocation);
-        int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR);
-        if (driverIndex >= 0) {
-            fullStorageLocation = fullStorageLocation
-                    .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
-        }
-        if (this.getDvObject() instanceof Dataset) {
-            throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject");
-        } else if (this.getDvObject() instanceof DataFile) {
-            fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation;
-        } else if (dvObject instanceof Dataverse) {
-            throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
-        }
-        logger.fine("fullStorageLocation: " + fullStorageLocation);
-        return fullStorageLocation;
-    }
-
     @Override
     public Path getFileSystemPath() throws UnsupportedDataAccessOperationException {
         throw new UnsupportedDataAccessOperationException(
                 "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it.");
     }
 
-    @Override
-    public boolean exists() {
-        logger.fine("Exists called");
-        return (retrieveSizeFromMedia() != -1);
-    }
-
     @Override
     public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException {
         throw new UnsupportedDataAccessOperationException(
@@ -407,11 +273,6 @@ public OutputStream getOutputStream() throws UnsupportedDataAccessOperationExcep
                 "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects.");
     }
 
-    @Override
-    public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException {
-        return baseStore.getAuxFileAsInputStream(auxItemTag);
-    }
-
     @Override
     public boolean downloadRedirectEnabled() {
         String optionValue = getConfigParam(StorageIO.DOWNLOAD_REDIRECT);
@@ -443,103 +304,12 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
         }
     }
 
-    int getUrlExpirationMinutes() {
-        String optionValue = getConfigParam(URL_EXPIRATION_MINUTES);
-        if (optionValue != null) {
-            Integer num;
-            try {
-                num = Integer.parseInt(optionValue);
-            } catch (NumberFormatException ex) {
-                num = null;
-            }
-            if (num != null) {
-                return num;
-            }
-        }
-        return 60;
-    }
-
-    protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
-        configureEndpoints();
-        
-
-        if (baseStore == null) {
-            String baseDriverId = getBaseStoreIdFor(driverId);
-            String fullStorageLocation = null;
-            String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE,
-                    DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
-
-            if (dvObject instanceof Dataset) {
-                baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId);
-            } else {
-                if (this.getDvObject() != null) {
-                    fullStorageLocation = getStoragePath();
-
-                    // S3 expects <id>://<bucketname>/<key>
-                    switch (baseDriverType) {
-                    case DataAccess.S3:
-                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/"
-                                + fullStorageLocation;
-                        break;
-                    case DataAccess.FILE:
-                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files")
-                                + "/" + fullStorageLocation;
-                        break;
-                    default:
-                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
-                                + getConfigParamForDriver(baseDriverId, StorageIO.TYPE));
-                        throw new IOException("Not supported");
-                    }
-
-                } else if (storageLocation != null) {
-                    // <remoteDriverId>://<baseStorageIdentifier>//<baseUrlPath>
-                    // remoteDriverId:// is removed if coming through directStorageIO
-                    int index = storageLocation.indexOf(DataAccess.SEPARATOR);
-                    if (index > 0) {
-                        storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length());
-                    }
-                    // The base store needs the baseStoreIdentifier and not the relative URL (if it exists)
-                    int endOfId = storageLocation.indexOf("//");
-                    fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation;
-
-                    switch (baseDriverType) {
-                    case DataAccess.S3:
-                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/"
-                                + fullStorageLocation;
-                        break;
-                    case DataAccess.FILE:
-                        fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
-                                + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files")
-                                + "/" + fullStorageLocation;
-                        break;
-                    default:
-                        logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
-                                + getConfigParamForDriver(baseDriverId, StorageIO.TYPE));
-                        throw new IOException("Not supported");
-                    }
-                }
-                baseStore = DataAccess.getDirectStorageIO(fullStorageLocation);
-            }
-            if (baseDriverType.contentEquals(DataAccess.S3)) {
-                ((S3AccessIO<?>) baseStore).setMainDriver(false);
-            }
-        }
-        remoteStoreName = getConfigParam(REMOTE_STORE_NAME);
-        try {
-            remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL));
-        } catch (MalformedURLException mfue) {
-            logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId);
-        }
-    }
 
     /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but
      * the derived GlobusOverlayAccessIO can support multiple endpoints.
      * @throws IOException
      */
-    protected void configureEndpoints() throws IOException {
+    protected void configureRemoteEndpoints() throws IOException {
         baseUrl = getConfigParam(BASE_URL);
         if (baseUrl == null) {
             //Will accept the first endpoint using the newer setting
@@ -560,70 +330,6 @@ protected void configureEndpoints() throws IOException {
         }
     }
 
-    // Convenience method to assemble the path, starting with the DOI
-    // authority/identifier/, that is needed to create a base store via
-    // DataAccess.getDirectStorageIO - the caller has to add the store type specific
-    // prefix required.
-    protected String getStoragePath() throws IOException {
-        String fullStoragePath = dvObject.getStorageIdentifier();
-        logger.fine("storageidentifier: " + fullStoragePath);
-        int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR);
-        if (driverIndex >= 0) {
-            fullStoragePath = fullStoragePath
-                    .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
-        }
-        int suffixIndex = fullStoragePath.indexOf("//");
-        if (suffixIndex >= 0) {
-            fullStoragePath = fullStoragePath.substring(0, suffixIndex);
-        }
-        if (this.getDvObject() instanceof Dataset) {
-            fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/"
-                    + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath;
-        } else if (this.getDvObject() instanceof DataFile) {
-            fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/"
-                    + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath;
-        } else if (dvObject instanceof Dataverse) {
-            throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
-        }
-        logger.fine("fullStoragePath: " + fullStoragePath);
-        return fullStoragePath;
-    }
-
-    public CloseableHttpClient getSharedHttpClient() {
-        if (httpclient == null) {
-            try {
-                initHttpPool();
-                httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build();
-
-            } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) {
-                logger.warning(ex.getMessage());
-            }
-        }
-        return httpclient;
-    }
-
-    private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException {
-        if (trustCerts) {
-            // use the TrustSelfSignedStrategy to allow Self Signed Certificates
-            SSLContext sslContext;
-            SSLConnectionSocketFactory connectionFactory;
-
-            sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build();
-            // create an SSL Socket Factory to use the SSLContext with the trust self signed
-            // certificate strategy
-            // and allow all hosts verifier.
-            connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE);
-
-            Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
-                    .register("https", connectionFactory).build();
-            cm = new PoolingHttpClientConnectionManager(registry);
-        } else {
-            cm = new PoolingHttpClientConnectionManager();
-        }
-        cm.setDefaultMaxPerRoute(httpConcurrency);
-        cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20);
-    }
-
     @Override
     public void savePath(Path fileSystemPath) throws IOException {
         throw new UnsupportedDataAccessOperationException(
@@ -660,13 +366,4 @@ static boolean isValidIdentifier(String driverId, String storageId) {
         }
         return true;
     }
-
-    public static String getBaseStoreIdFor(String driverId) {
-        return getConfigParamForDriver(driverId, BASE_STORE);
-    }
-
-    @Override
-    public List<String> cleanUp(Predicate<String> filter, boolean dryRun) throws IOException {
-        return baseStore.cleanUp(filter, dryRun);
-    }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java
index 5affc01aff0..1c371881ba6 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java
@@ -8,7 +8,6 @@
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.GlobalId;
-import edu.harvard.iq.dataverse.GlobalIdServiceBean;
 import edu.harvard.iq.dataverse.mocks.MocksFactory;
 import edu.harvard.iq.dataverse.util.UrlSignerUtil;
 

From 2500bccc5fa438bf2dff4e5aa887e816099a51e3 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 15 Nov 2023 14:04:22 -0500
Subject: [PATCH 183/546] assert current /bag-info.txt behavior #8760

Also, add a superuser-only API for downloading files
(such as bags) from the file system so we can make
assertions about them in our tests.
---
 .../iq/dataverse/api/AbstractApiBean.java     |   7 ++
 .../edu/harvard/iq/dataverse/api/Admin.java   |  25 ++++-
 .../edu/harvard/iq/dataverse/api/BagIT.java   | 101 +++++++++++++++++-
 3 files changed, 128 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
index 027f9e0fcb1..58565bcc9d6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
@@ -45,11 +45,13 @@
 import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.json.JsonParser;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
 import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean;
+import java.io.InputStream;
 import java.net.URI;
 import java.util.Arrays;
 import java.util.Collections;
@@ -726,6 +728,11 @@ protected Response ok(String data, MediaType mediaType, String downloadFilename)
         return res.build();
     }
 
+    protected Response ok(InputStream inputStream) {
+        ResponseBuilder res = Response.ok().entity(inputStream).type(MediaType.valueOf(FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT));
+        return res.build();
+    }
+
     protected Response created( String uri, JsonObjectBuilder bld ) {
         return Response.created( URI.create(uri) )
                 .entity( Json.createObjectBuilder()
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
index fd3b9a89e54..684ed32dff8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
@@ -107,6 +107,7 @@
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.UrlSignerUtil;
 
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 
@@ -2425,5 +2426,27 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur
         
         return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl));
     }
- 
+
+    /**
+     * For testing only. Download a file from the file system.
+     */
+    @GET
+    @AuthRequired
+    @Path("/localfile")
+    public Response getLocalFile(@Context ContainerRequestContext crc, @QueryParam("pathToFile") String pathToFile) {
+        try {
+            AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
+            if (!user.isSuperuser()) {
+                return error(Response.Status.FORBIDDEN, "Superusers only.");
+            }
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+        try {
+            return ok(new FileInputStream(pathToFile));
+        } catch (IOException ex) {
+            return error(Status.BAD_REQUEST, ex.toString());
+        }
+    }
+
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java
index e7210bc45a9..fae9cf95156 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java
@@ -1,17 +1,32 @@
 package edu.harvard.iq.dataverse.api;
 
-import io.restassured.RestAssured;
-import io.restassured.response.Response;
 import edu.harvard.iq.dataverse.engine.command.impl.LocalSubmitToArchiveCommand;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import io.restassured.RestAssured;
+import static io.restassured.RestAssured.given;
+import io.restassured.response.Response;
 import static jakarta.ws.rs.core.Response.Status.CREATED;
 import static jakarta.ws.rs.core.Response.Status.OK;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.util.Enumeration;
+import java.util.Scanner;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
 import org.junit.jupiter.api.AfterAll;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
 public class BagIT {
 
+    static String bagitExportDir = "/tmp";
+
     @BeforeAll
     public static void setUpClass() {
 
@@ -25,14 +40,14 @@ public static void setUpClass() {
         setArchiverSettings.then().assertThat()
                 .statusCode(OK.getStatusCode());
 
-        Response setBagItLocalPath = UtilIT.setSetting(":BagItLocalPath", "/tmp");
+        Response setBagItLocalPath = UtilIT.setSetting(":BagItLocalPath", bagitExportDir);
         setBagItLocalPath.then().assertThat()
                 .statusCode(OK.getStatusCode());
 
     }
 
     @Test
-    public void testBagItExport() {
+    public void testBagItExport() throws IOException {
 
         Response createUser = UtilIT.createRandomUser();
         createUser.then().assertThat().statusCode(OK.getStatusCode());
@@ -63,6 +78,78 @@ public void testBagItExport() {
         archiveDataset.prettyPrint();
         archiveDataset.then().assertThat().statusCode(OK.getStatusCode());
 
+        // spaceName comes from LocalSubmitToArchiveCommand
+        String spaceName = datasetPid.replace(':', '-').replace('/', '-')
+                .replace('.', '-').toLowerCase();
+        // spacename: doi-10-5072-fk2-fosg5q
+
+        String pathToZip = bagitExportDir + "/" + spaceName + "v1.0" + ".zip";
+
+        try {
+            // give the bag time to generate
+            Thread.sleep(3000);
+        } catch (InterruptedException ex) {
+        }
+
+        // A bag could look like this:
+        //doi-10-5072-FK2-DKUTDUv-1-0/data/
+        //doi-10-5072-FK2-DKUTDUv-1-0/data/Darwin's Finches/
+        //doi-10-5072-FK2-DKUTDUv-1-0/metadata/
+        //doi-10-5072-FK2-DKUTDUv-1-0/metadata/pid-mapping.txt
+        //doi-10-5072-FK2-DKUTDUv-1-0/manifest-md5.txt
+        //doi-10-5072-FK2-DKUTDUv-1-0/bagit.txt
+        //doi-10-5072-FK2-DKUTDUv-1-0/metadata/oai-ore.jsonld
+        //doi-10-5072-FK2-DKUTDUv-1-0/metadata/datacite.xml
+        //doi-10-5072-FK2-DKUTDUv-1-0/bag-info.txt
+        // ---
+        // bag-info.txt could look like this:
+        //Contact-Name: Finch, Fiona
+        //Contact-Email: finch@mailinator.com
+        //Source-Organization: Dataverse Installation (<Site Url>)
+        //Organization-Address: <Full address>
+        //Organization-Email: <Email address>
+        //External-Description: Darwin's finches (also known as the Galápagos finches) are a group of about
+        // fifteen species of passerine birds.
+        //Bagging-Date: 2023-11-14
+        //External-Identifier: https://doi.org/10.5072/FK2/LZIGBC
+        //Bag-Size: 0 bytes
+        //Payload-Oxum: 0.0
+        //Internal-Sender-Identifier: Root:Darwin's Finches
+        Response downloadBag = downloadLocalFile(pathToZip, apiToken);
+        downloadBag.then().assertThat().statusCode(OK.getStatusCode());
+        Path outputPath = Paths.get("/tmp/foo.zip");
+        java.nio.file.Files.copy(downloadBag.getBody().asInputStream(), outputPath, StandardCopyOption.REPLACE_EXISTING);
+
+        ZipFile zipFile = new ZipFile(outputPath.toString());
+        Enumeration<? extends ZipEntry> entries = zipFile.entries();
+        String sourceOrg = null;
+        String orgAddress = null;
+        String orgEmail = null;
+        while (entries.hasMoreElements()) {
+            ZipEntry entry = entries.nextElement();
+            String name = entry.getName();
+            System.out.println("name: " + name);
+            if (name.endsWith("bag-info.txt")) {
+                InputStream stream = zipFile.getInputStream(entry);
+                Scanner s = new Scanner(stream).useDelimiter("\\A");
+                String result = s.hasNext() ? s.next() : "";
+                System.out.println("result: " + result);
+                String[] lines = result.split("\n");
+                for (String line : lines) {
+                    if (line.startsWith("Source-Organization")) {
+                        sourceOrg = line;
+                    } else if (line.startsWith("Organization-Address")) {
+                        orgAddress = line;
+                    } else if (line.startsWith("Organization-Email")) {
+                        orgEmail = line;
+                    } else {
+                    }
+                }
+            }
+        }
+        assertEquals("Source-Organization: Dataverse Installation (<Site Url>)", sourceOrg.trim());
+        assertEquals("Organization-Address: <Full address>", orgAddress.trim());
+        assertEquals("Organization-Email: <Email address>", orgEmail.trim());
     }
 
     @AfterAll
@@ -75,4 +162,10 @@ public static void tearDownClass() {
 
     }
 
+    static Response downloadLocalFile(String pathToFile, String apiToken) {
+        return given()
+                .header("X-Dataverse-key", apiToken)
+                .get("/api/admin/localfile?pathToFile=" + pathToFile);
+    }
+
 }

From 7240e870d35fda4ec96a4ee0e0b488a9c4fc3d4f Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 15 Nov 2023 16:03:15 -0500
Subject: [PATCH 184/546] configurable BagIt source org name, address, email
 #8760

These values were used while testing:

DATAVERSE_BAGIT_SOURCEORG_NAME=LibraScholar
DATAVERSE_BAGIT_SOURCEORG_ADDRESS=123 Wisdom Way\nCambridge, MA\nUSA
DATAVERSE_BAGIT_SOURCEORG_EMAIL=hello@dataverse.librascholar.edu
---
 .../iq/dataverse/settings/JvmSettings.java        |  7 +++++++
 .../iq/dataverse/util/bagit/BagGenerator.java     | 15 ++++++++++-----
 src/main/java/propertyFiles/Bundle.properties     |  4 ----
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index cc3272413c7..2f59350906c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -150,6 +150,13 @@ public enum JvmSettings {
     SCOPE_NETCDF(PREFIX, "netcdf"),
     GEO_EXTRACT_S3_DIRECT_UPLOAD(SCOPE_NETCDF, "geo-extract-s3-direct-upload"),
 
+    // BAGIT SETTINGS
+    SCOPE_BAGIT(PREFIX, "bagit"),
+    SCOPE_BAGIT_SOURCEORG(SCOPE_BAGIT, "sourceorg"),
+    BAGIT_SOURCE_ORG_NAME(SCOPE_BAGIT_SOURCEORG, "name"),
+    BAGIT_SOURCEORG_ADDRESS(SCOPE_BAGIT_SOURCEORG, "address"),
+    BAGIT_SOURCEORG_EMAIL(SCOPE_BAGIT_SOURCEORG, "email"),
+
     ;
 
     private static final String SCOPE_SEPARATOR = ".";
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
index baba1a0cb43..b7c44014b80 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java
@@ -74,7 +74,9 @@
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.DataFile.ChecksumType;
 import edu.harvard.iq.dataverse.pidproviders.PidUtil;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.util.json.JsonLDTerm;
+import java.util.Optional;
 
 public class BagGenerator {
 
@@ -822,17 +824,20 @@ private String generateInfoFile() {
             logger.warning("No contact info available for BagIt Info file");
         }
 
-        info.append("Source-Organization: " + BundleUtil.getStringFromBundle("bagit.sourceOrganization"));
+        String orgName = JvmSettings.BAGIT_SOURCE_ORG_NAME.lookupOptional(String.class).orElse("Dataverse Installation (<Site Url>)");
+        String orgAddress = JvmSettings.BAGIT_SOURCEORG_ADDRESS.lookupOptional(String.class).orElse("<Full address>");
+        String orgEmail = JvmSettings.BAGIT_SOURCEORG_EMAIL.lookupOptional(String.class).orElse("<Email address>");
+
+        info.append("Source-Organization: " + orgName);
         // ToDo - make configurable
         info.append(CRLF);
 
-        info.append("Organization-Address: " + WordUtils.wrap(
-                BundleUtil.getStringFromBundle("bagit.sourceOrganizationAddress"), 78, CRLF + " ", true));
+        info.append("Organization-Address: " + WordUtils.wrap(orgAddress, 78, CRLF + " ", true));
+
         info.append(CRLF);
 
         // Not a BagIt standard name
-        info.append(
-                "Organization-Email: " + BundleUtil.getStringFromBundle("bagit.sourceOrganizationEmail"));
+        info.append("Organization-Email: " + orgEmail);
         info.append(CRLF);
 
         info.append("External-Description: ");
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 79887f7e76c..972e5e35601 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -2379,10 +2379,6 @@ api.prov.error.freeformMissingJsonKey=The JSON object you send must have a key c
 api.prov.error.freeformNoText=No provenance free form text available for this file.
 api.prov.error.noDataFileFound=Could not find a file based on ID.
 
-bagit.sourceOrganization=Dataverse Installation (<Site Url>)
-bagit.sourceOrganizationAddress=<Full address>
-bagit.sourceOrganizationEmail=<Email address>
-
 bagit.checksum.validation.error=Invalid checksum for file "{0}". Manifest checksum={2}, calculated checksum={3}, type={1}
 bagit.checksum.validation.exception=Error while calculating checksum for file "{0}". Checksum type={1}, error={2}
 bagit.validation.bag.file.not.found=Invalid BagIt package: "{0}"

From b2c62510e71e6436c2905796b9cc6a24a04b35d0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 17 Nov 2023 14:06:06 -0500
Subject: [PATCH 185/546] add docs and release note for bag-info.txt config
 #8760

---
 doc/release-notes/8760-bagit.md               | 15 ++++++
 .../source/installation/config.rst            | 46 +++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 doc/release-notes/8760-bagit.md

diff --git a/doc/release-notes/8760-bagit.md b/doc/release-notes/8760-bagit.md
new file mode 100644
index 00000000000..30601857309
--- /dev/null
+++ b/doc/release-notes/8760-bagit.md
@@ -0,0 +1,15 @@
+For BagIT export, it is now possible to configure the following information in bag-info.txt...
+
+Source-Organization: Harvard Dataverse
+Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA
+Organization-Email: support@dataverse.harvard.edu
+
+... using new JVM/MPCONFIG options:
+
+- dataverse.bagit.sourceorg.name
+- dataverse.bagit.sourceorg.address
+- dataverse.bagit.sourceorg.email
+
+Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.
+
+For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 13a7367de44..df311fcdaca 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -1608,6 +1608,25 @@ The workflow id returned in this call (or available by doing a GET of /api/admin
 
 Once these steps are taken, new publication requests will automatically trigger submission of an archival copy to the specified archiver, Chronopolis' DuraCloud component in this example. For Chronopolis, as when using the API, it is currently the admin's responsibility to snap-shot the DuraCloud space and monitor the result. Failure of the workflow, (e.g. if DuraCloud is unavailable, the configuration is wrong, or the space for this dataset already exists due to a prior publication action or use of the API), will create a failure message but will not affect publication itself.
 
+.. _bag-info.txt:
+
+Configuring bag-info.txt
+++++++++++++++++++++++++
+
+Out of the box, placeholder values like below will be placed in bag-info.txt:
+
+.. code-block:: text
+
+  Source-Organization: Dataverse Installation (<Site Url>)
+  Organization-Address: <Full address>
+  Organization-Email: <Email address>
+
+To customize these values for your institution, use the following JVM options:
+
+- :ref:`dataverse.bagit.sourceorg.name`
+- :ref:`dataverse.bagit.sourceorg.address`
+- :ref:`dataverse.bagit.sourceorg.email`
+
 Going Live: Launching Your Production Deployment
 ------------------------------------------------
 
@@ -2506,6 +2525,33 @@ See also :ref:`guestbook-at-request-api` in the API Guide, and .
 
 Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_FILES_GUESTBOOK_AT_REQUEST``.
 
+.. _dataverse.bagit.sourceorg.name:
+
+dataverse.bagit.sourceorg.name
+++++++++++++++++++++++++++++++
+
+The name for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_NAME``.
+
+.. _dataverse.bagit.sourceorg.address:
+
+dataverse.bagit.sourceorg.address
++++++++++++++++++++++++++++++++++
+
+The mailing address for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`. The example in https://datatracker.ietf.org/doc/html/rfc8493 uses commas as separators: ``1 Main St., Cupertino, California, 11111``.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_ADDRESS``.
+
+.. _dataverse.bagit.sourceorg.email:
+
+dataverse.bagit.sourceorg.email
++++++++++++++++++++++++++++++++
+
+The email for your institution that you'd like to appear in bag-info.txt. See :ref:`bag-info.txt`.
+
+Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``.
+
 .. _feature-flags:
 
 Feature Flags

From fa6f850b28e8dea1dd2dff542814e29fd7865153 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 17 Nov 2023 16:07:30 -0500
Subject: [PATCH 186/546] limit to downloading from /tmp, add docs #8760

---
 doc/release-notes/8760-download-tmp-file.md   |  3 +++
 doc/sphinx-guides/source/api/changelog.rst    |  7 +++++
 doc/sphinx-guides/source/api/native-api.rst   | 10 +++++++
 .../edu/harvard/iq/dataverse/api/Admin.java   | 13 +++++++---
 .../edu/harvard/iq/dataverse/api/AdminIT.java | 26 +++++++++++++++++++
 .../edu/harvard/iq/dataverse/api/BagIT.java   | 10 ++-----
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  7 +++++
 7 files changed, 64 insertions(+), 12 deletions(-)
 create mode 100644 doc/release-notes/8760-download-tmp-file.md

diff --git a/doc/release-notes/8760-download-tmp-file.md b/doc/release-notes/8760-download-tmp-file.md
new file mode 100644
index 00000000000..7623a91ac9a
--- /dev/null
+++ b/doc/release-notes/8760-download-tmp-file.md
@@ -0,0 +1,3 @@
+A new API has been added for testing purposes that allows files to be downloaded from /tmp.
+
+See
diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index d6742252d27..7d6545999ca 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -5,6 +5,13 @@ API Changelog
     :local:
     :depth: 1
 
+6.1
+---
+
+New
+~~~
+- **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`.
+
 6.0
 -----
 
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 1992390410c..5b1e7410a4f 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -5349,6 +5349,16 @@ A curl example using an ``ID``
 
 Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`.
 
+.. _download-file-from-tmp:
+
+Download File from /tmp
+~~~~~~~~~~~~~~~~~~~~~~~
+
+As a superuser::
+
+    GET /api/admin/downloadTmpFile?fullyQualifiedPathToFile=/tmp/foo.txt
+
+Note that this API is probably only useful for testing.
 
 MyData
 ------
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
index 684ed32dff8..4da1962853a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
@@ -123,6 +123,7 @@
 import jakarta.ws.rs.QueryParam;
 import jakarta.ws.rs.WebApplicationException;
 import jakarta.ws.rs.core.StreamingOutput;
+import java.nio.file.Paths;
 
 /**
  * Where the secure, setup API calls live.
@@ -2428,12 +2429,12 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur
     }
 
     /**
-     * For testing only. Download a file from the file system.
+     * For testing only. Download a file from /tmp.
      */
     @GET
     @AuthRequired
-    @Path("/localfile")
-    public Response getLocalFile(@Context ContainerRequestContext crc, @QueryParam("pathToFile") String pathToFile) {
+    @Path("/downloadTmpFile")
+    public Response downloadTmpFile(@Context ContainerRequestContext crc, @QueryParam("fullyQualifiedPathToFile") String fullyQualifiedPathToFile) {
         try {
             AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
             if (!user.isSuperuser()) {
@@ -2442,8 +2443,12 @@ public Response getLocalFile(@Context ContainerRequestContext crc, @QueryParam("
         } catch (WrappedResponse wr) {
             return wr.getResponse();
         }
+        java.nio.file.Path normalizedPath = Paths.get(fullyQualifiedPathToFile).normalize();
+        if (!normalizedPath.toString().startsWith("/tmp")) {
+            return error(Status.BAD_REQUEST, "Path must begin with '/tmp' but after normalization was '" + normalizedPath +"'.");
+        }
         try {
-            return ok(new FileInputStream(pathToFile));
+            return ok(new FileInputStream(fullyQualifiedPathToFile));
         } catch (IOException ex) {
             return error(Status.BAD_REQUEST, ex.toString());
         }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java
index 0c5de662e8a..91ba67b10ff 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java
@@ -854,6 +854,32 @@ public void testBannerMessages(){
         
     }
 
+    /**
+     * For a successful download from /tmp, see BagIT. Here we are doing error
+     * checking.
+     */
+    @Test
+    public void testDownloadTmpFile() throws IOException {
+
+        Response createUser = UtilIT.createRandomUser();
+        createUser.then().assertThat().statusCode(OK.getStatusCode());
+        String username = UtilIT.getUsernameFromResponse(createUser);
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+
+        Response tryToDownloadAsNonSuperuser = UtilIT.downloadTmpFile("/tmp/foo", apiToken);
+        tryToDownloadAsNonSuperuser.then().assertThat().statusCode(FORBIDDEN.getStatusCode());
+
+        Response toggleSuperuser = UtilIT.makeSuperUser(username);
+        toggleSuperuser.then().assertThat()
+                .statusCode(OK.getStatusCode());
+
+        Response tryToDownloadEtcPasswd = UtilIT.downloadTmpFile("/etc/passwd", apiToken);
+        tryToDownloadEtcPasswd.then().assertThat()
+                .statusCode(BAD_REQUEST.getStatusCode())
+                .body("status", equalTo("ERROR"))
+                .body("message", equalTo("Path must begin with '/tmp' but after normalization was '/etc/passwd'."));
+    }
+
     private String createTestNonSuperuserApiToken() {
         Response createUserResponse = UtilIT.createRandomUser();
         createUserResponse.then().assertThat().statusCode(OK.getStatusCode());
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java
index fae9cf95156..28f7fa28328 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java
@@ -115,7 +115,7 @@ public void testBagItExport() throws IOException {
         //Bag-Size: 0 bytes
         //Payload-Oxum: 0.0
         //Internal-Sender-Identifier: Root:Darwin's Finches
-        Response downloadBag = downloadLocalFile(pathToZip, apiToken);
+        Response downloadBag = UtilIT.downloadTmpFile(pathToZip, apiToken);
         downloadBag.then().assertThat().statusCode(OK.getStatusCode());
         Path outputPath = Paths.get("/tmp/foo.zip");
         java.nio.file.Files.copy(downloadBag.getBody().asInputStream(), outputPath, StandardCopyOption.REPLACE_EXISTING);
@@ -162,10 +162,4 @@ public static void tearDownClass() {
 
     }
 
-    static Response downloadLocalFile(String pathToFile, String apiToken) {
-        return given()
-                .header("X-Dataverse-key", apiToken)
-                .get("/api/admin/localfile?pathToFile=" + pathToFile);
-    }
-
-}
+}
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index e3a7fd0cfc3..6abfb10c4f6 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -3574,4 +3574,11 @@ static Response getDownloadSize(Integer datasetId,
         return requestSpecification
                 .get("/api/datasets/" + datasetId + "/versions/" + version + "/downloadsize");
     }
+
+    static Response downloadTmpFile(String fullyQualifiedPathToFile, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .get("/api/admin/downloadTmpFile?fullyQualifiedPathToFile=" + fullyQualifiedPathToFile);
+    }
+
 }

From 06f6222ba785fa37890efa4156ec3e7988fe4ff5 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 19 Nov 2023 20:29:47 -0500
Subject: [PATCH 187/546] more intermediate changes to the entity classes #8549

---
 .../edu/harvard/iq/dataverse/DvObject.java    | 28 +++++++++++++++++++
 .../iq/dataverse/DvObjectContainer.java       |  8 ++++--
 .../dataverse/ingest/IngestServiceBean.java   |  7 +++++
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
index 9e7f3f3fe96..b86fabd0a07 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -2,6 +2,8 @@
 
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.pidproviders.PidUtil;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
+import edu.harvard.iq.dataverse.storageuse.StorageUse;
 
 import java.sql.Timestamp;
 import java.text.SimpleDateFormat;
@@ -156,6 +158,9 @@ public String visit(DataFile df) {
     
     private boolean identifierRegistered;
     
+    @Column(nullable = true)
+    private Long storageSize; 
+    
     private transient GlobalId globalId = null;
     
     @OneToMany(mappedBy = "dvObject", cascade = CascadeType.ALL, orphanRemoval = true)
@@ -177,6 +182,13 @@ public void setAlternativePersistentIndentifiers(Set<AlternativePersistentIdenti
      */
     private boolean previewImageAvailable;
     
+    @OneToOne(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true)
+    private StorageQuota storageQuota;
+    
+    @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE}, orphanRemoval=true)
+    private StorageUse storageUse;
+    
+    
     public boolean isPreviewImageAvailable() {
         return previewImageAvailable;
     }
@@ -458,6 +470,22 @@ public void setStorageIdentifier(String storageIdentifier) {
         this.storageIdentifier = storageIdentifier;
     }
     
+    public Long getStorageSize() {
+        return storageSize; 
+    }
+    
+    public void setStorageSize(Long storageSize) {
+        this.storageSize = storageSize; 
+    }
+    
+    public StorageQuota getStorageQuota() {
+        return storageQuota;
+    }
+    
+    public void setStorageQuota(StorageQuota storageQuota) {
+        this.storageQuota = storageQuota;
+    }
+
     /**
      * 
      * @param other 
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
index f7d361d76f5..2f391e394fa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
@@ -2,11 +2,15 @@
 
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
+import edu.harvard.iq.dataverse.storageuse.StorageUse;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import jakarta.persistence.CascadeType;
 import java.util.Locale;
 import java.util.Optional;
 
 import jakarta.persistence.MappedSuperclass;
+import jakarta.persistence.OneToOne;
 import org.apache.commons.lang3.StringUtils;
 
 /**
@@ -40,7 +44,7 @@ public boolean isEffectivelyPermissionRoot() {
     private String metadataLanguage=null;
     
     private Boolean guestbookAtRequest = null;
-    
+   
     public String getEffectiveStorageDriverId() {
         String id = storageDriver;
         if (StringUtils.isBlank(id)) {
@@ -160,5 +164,5 @@ public String getCurationLabelSetName() {
     public void setCurationLabelSetName(String setName) {
         this.externalLabelSetName = setName;
     }
-
+    
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index d6496a97b97..391f28674e4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -174,6 +174,13 @@ public class IngestServiceBean {
     // It must be called before we attempt to permanently save the files in 
     // the database by calling the Save command on the dataset and/or version.
     
+    // There is way too much going on in this method. :(
+    public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
+            List<DataFile> newFiles,
+            DataFile fileToReplace,
+            boolean tabIngest) {
+        return saveAndAddFilesToDataset(version, newFiles, fileToReplace, tabIngest, null);
+    }
     public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
             List<DataFile> newFiles,
             DataFile fileToReplace,

From 8766932b6c086b1775e3faf8e19f411d83f87c07 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 19 Nov 2023 21:09:12 -0500
Subject: [PATCH 188/546] extra logging

---
 .../iq/dataverse/search/SearchIncludeFragment.java       | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 400f10cc375..c579eb14b7e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -343,9 +343,10 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
         setSolrErrorEncountered(false);
         
         try {
-            logger.fine("ATTENTION! query from user:   " + query);
-            logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr);
-            logger.fine("ATTENTION! sort by: " + sortField);
+            logger.info("ATTENTION! query from user:   " + query);
+            logger.info("ATTENTION! queryToPassToSolr: " + queryToPassToSolr);
+            logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal);
+            logger.info("ATTENTION! sort by: " + sortField);
 
             /**
              * @todo Number of search results per page should be configurable -
@@ -408,6 +409,8 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                     }
                 }
                 filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")");
+                logger.info("second pass query: " + queryToPassToSolr);
+                logger.info("second pass filter query: "+filterQueriesFinalSecondPass.toString());
 
                 solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false);
 

From 552e7350cd7f9d9eb577b056e8d3eb414e8dc3cc Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 19 Nov 2023 21:09:40 -0500
Subject: [PATCH 189/546] get quota command #8549

---
 .../impl/GetCollectionQuotaCommand.java       | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java
new file mode 100644
index 00000000000..f07fde9508e
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java
@@ -0,0 +1,45 @@
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.util.BundleUtil;
+import java.io.IOException;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author landreev
+ * The command doesn't do much. It's sole purpose is to check the permissions
+ * when it's called by the /api/dataverses/.../storage/quota api. 
+ */
+@RequiredPermissions(Permission.ManageDataversePermissions)
+public class GetCollectionQuotaCommand  extends AbstractCommand<Long> {
+
+    private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName());
+    
+    private final Dataverse dataverse;
+    
+    public GetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) {
+        super(aRequest, target);
+        dataverse = target;
+    } 
+        
+    @Override
+    public Long execute(CommandContext ctxt) throws CommandException {
+               
+        if (dataverse != null && dataverse.getStorageQuota() != null) {
+            return dataverse.getStorageQuota().getAllocation();
+        }
+        
+        return null;
+    }    
+}
+
+    

From e4aea93f0ada3212d1116b13cd0b2ae8105100e1 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 19 Nov 2023 21:20:29 -0500
Subject: [PATCH 190/546] extra logging

---
 .../edu/harvard/iq/dataverse/search/SearchIncludeFragment.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index c579eb14b7e..e5b5763efe6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -345,7 +345,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
         try {
             logger.info("ATTENTION! query from user:   " + query);
             logger.info("ATTENTION! queryToPassToSolr: " + queryToPassToSolr);
-            logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal);
+            logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString());
             logger.info("ATTENTION! sort by: " + sortField);
 
             /**

From 2b8777990d008b31e61c4338f5b5e964e1f4a20d Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 19 Nov 2023 21:21:17 -0500
Subject: [PATCH 191/546] new classes and instances #8549

---
 .../iq/dataverse/storageuse/StorageQuota.java | 118 ++++++++++++++++++
 .../iq/dataverse/storageuse/StorageUse.java   |  94 ++++++++++++++
 .../storageuse/StorageUseServiceBean.java     |  65 ++++++++++
 3 files changed, 277 insertions(+)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java
new file mode 100644
index 00000000000..68ff6d95d00
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java
@@ -0,0 +1,118 @@
+/*
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
+ */
+package edu.harvard.iq.dataverse.storageuse;
+
+import edu.harvard.iq.dataverse.DvObject;
+import jakarta.persistence.Column;
+import jakarta.persistence.Entity;
+import jakarta.persistence.GeneratedValue;
+import jakarta.persistence.GenerationType;
+import jakarta.persistence.Id;
+import jakarta.persistence.JoinColumn;
+import jakarta.persistence.OneToOne;
+import java.io.Serializable;
+import java.util.logging.Logger;
+
+//import jakarta.persistence.*;
+
+/**
+ *
+ * @author landreev
+ * 
+ */
+@Entity
+public class StorageQuota implements Serializable {
+    private static final Logger logger = Logger.getLogger(StorageQuota.class.getCanonicalName()); 
+    
+    /**
+     * Only Collection quotas are supported, for now
+     */
+    
+    private static final long serialVersionUID = 1L;
+    @Id
+    @GeneratedValue(strategy = GenerationType.IDENTITY)
+    private Long id;
+
+    public Long getId() {
+        return id;
+    }
+
+    public void setId(Long id) {
+        this.id = id;
+    }
+    
+    /**
+     * For defining quotas for Users and/or Groups 
+     * (Not supported as of yet)
+     
+    @Column(nullable = true)
+    private String assigneeIdentifier;
+    */
+    
+    /**
+     * Could be changed to ManyToOne - if we wanted to be able to define separate
+     * quotas on the same collection for different users. (?)
+     * Whether we actually want to support the above is TBD. (possibly not)
+     * Only collection-wide quotas are supported for now. 
+     */
+    @OneToOne
+    @JoinColumn(name="definitionPoint_id", nullable=true)
+    private DvObject definitionPoint;
+    
+    @Column(nullable = true)
+    private Long allocation; 
+    
+    public StorageQuota() {}
+    
+    /*public String getAssigneeIdentifier() {
+        return assigneeIdentifier;
+    }
+
+    public void setAssigneeIdentifier(String assigneeIdentifier) {
+        this.assigneeIdentifier = assigneeIdentifier;
+    }*/
+    
+    public DvObject getDefinitionPoint() {
+        return definitionPoint;
+    }
+
+    public void setDefinitionPoint(DvObject definitionPoint) {
+        this.definitionPoint = definitionPoint;
+    }
+    
+    public Long getAllocation() {
+        return allocation; 
+    }
+    
+    public void setAllocation(Long allocation) {
+        this.allocation = allocation; 
+    }
+    
+    @Override
+    public int hashCode() {
+        int hash = 0;
+        hash += (id != null ? id.hashCode() : 0);
+        return hash;
+    }
+
+    @Override
+    public boolean equals(Object object) {
+        // TODO: Warning - this method won't work in the case the id fields are not set
+        if (!(object instanceof StorageQuota)) {
+            return false;
+        }
+        StorageQuota other = (StorageQuota) object;
+        if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public String toString() {
+        return "edu.harvard.iq.dataverse.storageuse.StorageQuota[ id=" + id + " ]";
+    }
+    
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
new file mode 100644
index 00000000000..2633e3e026b
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
@@ -0,0 +1,94 @@
+/*
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
+ */
+package edu.harvard.iq.dataverse.storageuse;
+
+import edu.harvard.iq.dataverse.DvObject;
+import edu.harvard.iq.dataverse.DvObjectContainer;
+import jakarta.persistence.Column;
+import jakarta.persistence.Entity;
+import jakarta.persistence.GenerationType;
+import jakarta.persistence.GeneratedValue;
+import jakarta.persistence.Id;
+import jakarta.persistence.JoinColumn;
+import jakarta.persistence.NamedQueries;
+import jakarta.persistence.NamedQuery;
+import jakarta.persistence.OneToOne;
+import java.io.Serializable;
+
+/**
+ *
+ * @author landreev
+ */
+@NamedQueries({
+    @NamedQuery(name = "StorageUse.findByteSizeByDvContainerId",query = "SELECT su.sizeInBytes FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "),
+    @NamedQuery(name = "StorageUse.findByDvContainerId",query = "SELECT su FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId ")
+})
+@Entity
+public class StorageUse implements Serializable {
+
+    private static final long serialVersionUID = 1L;
+    @Id
+    @GeneratedValue(strategy = GenerationType.AUTO)
+    private Long id;
+
+    public Long getId() {
+        return id;
+    }
+
+    public void setId(Long id) {
+        this.id = id;
+    }
+
+    @OneToOne
+    @JoinColumn(nullable=false)
+    private DvObject dvObjectContainer; 
+    
+    @Column
+    private Long sizeInBytes = null; 
+    
+    public StorageUse(DvObjectContainer dvObjectContainer, Long sizeInBytes) {
+        this.dvObjectContainer = dvObjectContainer;
+        this.sizeInBytes = sizeInBytes;
+    }
+    
+    public Long getSizeInBytes() {
+        return sizeInBytes; 
+    }
+    
+    public void setSizeInBytes(Long sizeInBytes) {
+        this.sizeInBytes = sizeInBytes;
+    }
+    
+    public void incrementSizeInBytes(Long sizeInBytes) {
+        this.sizeInBytes += sizeInBytes; 
+    }
+    
+    
+    @Override
+    public int hashCode() {
+        int hash = 0;
+        hash += (id != null ? id.hashCode() : 0);
+        return hash;
+    }
+
+    @Override
+    public boolean equals(Object object) {
+        // TODO: Warning - this method won't work in the case the id fields are not set
+        if (!(object instanceof StorageUse)) {
+            return false;
+        }
+        StorageUse other = (StorageUse) object;
+        if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public String toString() {
+        return "edu.harvard.iq.dataverse.storageuse.StorageUse[ id=" + id + " ]";
+    }
+    
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
new file mode 100644
index 00000000000..fd04344c234
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
@@ -0,0 +1,65 @@
+/*
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
+ */
+package edu.harvard.iq.dataverse.storageuse;
+
+import edu.harvard.iq.dataverse.DataverseServiceBean;
+import edu.harvard.iq.dataverse.DvObject;
+import edu.harvard.iq.dataverse.DvObjectContainer;
+import jakarta.ejb.EJB;
+import jakarta.ejb.Stateless;
+import jakarta.inject.Named;
+import jakarta.persistence.EntityManager;
+import jakarta.persistence.PersistenceContext;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author landreev
+ */
+@Stateless
+@Named
+public class StorageUseServiceBean  implements java.io.Serializable {
+    private static final Logger logger = Logger.getLogger(StorageUseServiceBean.class.getCanonicalName());
+    @EJB
+    DataverseServiceBean dataverseService;
+    
+    @PersistenceContext(unitName = "VDCNet-ejbPU")
+    private EntityManager em;
+    
+    public StorageUse findByDvContainerId(Long dvObjectId) {
+        return em.createNamedQuery("StorageUse.findByDvContainerId", StorageUse.class).setParameter("dvObjectId", dvObjectId).getSingleResult();
+    }
+    
+    public Long findStorageSizeByDvContainerId(Long dvObjectId) {
+        return em.createNamedQuery("StorageUse.findByteSizeByDvContainerId", Long.class).setParameter("dvObjectId", dvObjectId).getSingleResult();
+    }
+    
+    public void incrementStorageSizeHierarchy(DvObjectContainer dvObject, Long filesize) {
+        incrementStorageSize(dvObject, filesize); 
+        DvObjectContainer parent = dvObject.getOwner();
+        while (parent != null) {
+            incrementStorageSize(parent, filesize);
+            parent = parent.getOwner();
+        }
+    }
+    
+    /**
+     * Should this be done in a new transaction?
+     * @param dvObject
+     * @param filesize 
+     */
+    public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) {
+        StorageUse dvContainerSU = findByDvContainerId(dvObject.getId());
+        if (dvContainerSU != null) {
+            // @todo: named query
+            dvContainerSU.incrementSizeInBytes(filesize);
+            em.merge(dvContainerSU);
+        } else {
+            dvContainerSU = new StorageUse(dvObject, filesize); 
+            em.persist(dvContainerSU);
+        }
+    }
+    
+}

From 235b1b018a50fd099c983516b046c6847be41e48 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 19 Nov 2023 21:44:47 -0500
Subject: [PATCH 192/546] A fix for the missing subtree filter query in the 2nd
 pass search query. #9635

---
 .../search/SearchIncludeFragment.java         | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index e5b5763efe6..1acd4b0f8a1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -282,7 +282,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
         SolrQueryResponse solrQueryResponse = null;
         SolrQueryResponse solrQueryResponseSecondPass = null;
 
-        List<String> filterQueriesFinal = new ArrayList<>();
+        List<String> filterQueriesExtended = new ArrayList<>();
         
         if (dataverseAlias != null) {
             this.dataverse = dataverseService.findByAlias(dataverseAlias);
@@ -296,7 +296,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                  * @todo centralize this into SearchServiceBean
                  */
                 if (!isfilterQueryAlreadyInMap(filterDownToSubtree)){
-                    filterQueriesFinal.add(filterDownToSubtree);
+                    filterQueriesExtended.add(filterDownToSubtree);
                 }
 //                this.dataverseSubtreeContext = dataversePath;
             } else {
@@ -309,7 +309,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
             this.setRootDv(true);
         }
 
-        filterQueriesFinal.addAll(filterQueries);
+        filterQueriesExtended.addAll(filterQueries);
 
         /**
          * Add type queries, for the types (Dataverses, Datasets, Datafiles) 
@@ -323,7 +323,9 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
         selectedTypesHumanReadable = combine(arr, " OR ");
         if (!selectedTypesHumanReadable.isEmpty()) {
             typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")";
-        }        
+        } 
+        List<String> filterQueriesFinal = new ArrayList<>();
+        filterQueriesFinal.addAll(filterQueriesExtended);
         filterQueriesFinal.add(typeFilterQuery);
 
         if (page <= 1) {
@@ -343,10 +345,10 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
         setSolrErrorEncountered(false);
         
         try {
-            logger.info("ATTENTION! query from user:   " + query);
-            logger.info("ATTENTION! queryToPassToSolr: " + queryToPassToSolr);
-            logger.info("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString());
-            logger.info("ATTENTION! sort by: " + sortField);
+            logger.fine"ATTENTION! query from user:   " + query);
+            logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr);
+            logger.fine("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString());
+            logger.fine("ATTENTION! sort by: " + sortField);
 
             /**
              * @todo Number of search results per page should be configurable -
@@ -399,7 +401,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 // run a second search to obtain the numbers of the unselected types:
                 
                 List<String> filterQueriesFinalSecondPass = new ArrayList<>();
-                filterQueriesFinalSecondPass.addAll(filterQueries);
+                filterQueriesFinalSecondPass.addAll(filterQueriesExtended);
                    
                 arr = new String[3 - selectedTypesList.size()];
                 int c = 0; 
@@ -409,8 +411,8 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                     }
                 }
                 filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")");
-                logger.info("second pass query: " + queryToPassToSolr);
-                logger.info("second pass filter query: "+filterQueriesFinalSecondPass.toString());
+                logger.fine("second pass query: " + queryToPassToSolr);
+                logger.fine("second pass filter query: "+filterQueriesFinalSecondPass.toString());
 
                 solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false);
 

From ceeeaecb9d222c2d2073713cdd839dac2ab4a304 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 19 Nov 2023 21:47:30 -0500
Subject: [PATCH 193/546] typo. #9635

---
 .../edu/harvard/iq/dataverse/search/SearchIncludeFragment.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 1acd4b0f8a1..dd9cd78982a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -345,7 +345,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
         setSolrErrorEncountered(false);
         
         try {
-            logger.fine"ATTENTION! query from user:   " + query);
+            logger.fine("ATTENTION! query from user:   " + query);
             logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr);
             logger.fine("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString());
             logger.fine("ATTENTION! sort by: " + sortField);

From 5ecfd49c7397f04003c745fc78074e1fb1a9b0aa Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 20 Nov 2023 09:30:16 -0500
Subject: [PATCH 194/546] #9686 update metrics queries

---
 .../dataverse/metrics/MetricsServiceBean.java | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
index 79369207963..6b540595e77 100644
--- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
@@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
                 + "from datasetversion\n"
                 + "where versionstate='RELEASED' \n"
                 + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n")
-                + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
-                + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n "  : "")
+                + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
+                + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n "  : "")
                 + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
                 + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n")
                 + "group by dataset_id) as subq group by subq.date order by date;"
@@ -156,11 +156,13 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
      * @param d
      */
     public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
-        String dataLocationLine = "(date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n";
+        
+        System.out.print("datasets to month...");
+        String dataLocationLine = "(date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
-            String harvestBaseLine = "(date_trunc('month', createtime) <=  to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n";
+            String harvestBaseLine = "(date_trunc('month', createtime) <=  to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n";
             if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
                 dataLocationLine = harvestBaseLine; // replace
             } else if (DATA_LOCATION_ALL.equals(dataLocation)) {
@@ -189,7 +191,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
                         + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
                         + "from datasetversion\n"
                         + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                        + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+                        + "join dvobject on dvobject.id = dataset.id\n"
                         + "where versionstate='RELEASED' \n"
                         + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
                         + "and \n"
@@ -198,7 +200,6 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
             +") sub_temp"
         );
         logger.log(Level.FINE, "Metric query: {0}", query);
-
         return (long) query.getSingleResult();
     }
 
@@ -212,6 +213,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" +
                 "       from datasetversion\n" +
                 "       join dataset on dataset.id = datasetversion.dataset_id\n" +
+                "       join dvobject on dataset.id = dvobject.id \n" +
                 "       where versionstate='RELEASED'\n" +
                 "       	     and dataset.harvestingclient_id is null\n" +
                 "       	     and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
@@ -225,6 +227,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
             // so the query is simpler:
             String harvestOriginClause = "(\n" +
                     "   datasetversion.dataset_id = dataset.id\n" +
+                    " dvobject.id = dataset.id \n" +
                     "   AND dataset.harvestingclient_id IS NOT null \n" +
                     "   AND date_trunc('month', datasetversion.createtime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
                     ")\n";
@@ -253,7 +256,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 + "ORDER BY count(dataset.id) desc;"
         );
         logger.log(Level.FINE, "Metric query: {0}", query);
-
+        System.out.print("by sub to month: " + query);
         return query.getResultList();
     }
 
@@ -616,7 +619,7 @@ public String returnUnexpiredCacheDayBased(String metricName, String days, Strin
 
     public String returnUnexpiredCacheMonthly(String metricName, String yyyymm, String dataLocation, Dataverse d) {
         Metric queriedMetric = getMetric(metricName, dataLocation, yyyymm, d);
-
+        System.out.print("returnUnexpiredCacheMonthly: " + queriedMetric);
         if (!doWeQueryAgainMonthly(queriedMetric)) {
             return queriedMetric.getValueJson();
         }

From f69c22982aeae57fdfb57607e06dfad628123b45 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 20 Nov 2023 09:33:06 -0500
Subject: [PATCH 195/546] #9686 update metrics IT

---
 src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
index e3328eefb4a..fa05a23b675 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
@@ -30,7 +30,7 @@ public static void cleanUpClass() {
 
     @Test
     public void testGetDataversesToMonth() {
-        String yyyymm = "2018-04";
+        String yyyymm = "2023-11";
 //        yyyymm = null;
         Response response = UtilIT.metricsDataversesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -54,7 +54,7 @@ public void testGetDataversesToMonth() {
 
     @Test
     public void testGetDatasetsToMonth() {
-        String yyyymm = "2018-04";
+        String yyyymm = "2023-11";
 //        yyyymm = null;
         Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -77,7 +77,7 @@ public void testGetDatasetsToMonth() {
 
     @Test
     public void testGetFilesToMonth() {
-        String yyyymm = "2018-04";
+        String yyyymm = "2023-11";
 //        yyyymm = null;
         Response response = UtilIT.metricsFilesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -100,7 +100,7 @@ public void testGetFilesToMonth() {
 
     @Test
     public void testGetDownloadsToMonth() {
-        String yyyymm = "2018-04";
+        String yyyymm = "2023-11";
 //        yyyymm = null;
         Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null);
         String precache = response.prettyPrint();

From e4ede35ea8a57afc8830dc63619bed3b660da8ff Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 20 Nov 2023 09:37:27 -0500
Subject: [PATCH 196/546] #9464 fix logger reference

---
 .../engine/command/impl/ValidateDatasetJsonCommand.java         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
index ae1a89c3661..619740ddd89 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
@@ -21,7 +21,7 @@
 @RequiredPermissions(Permission.AddDataset)
 public class ValidateDatasetJsonCommand extends AbstractCommand<String> {
     
-    private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName());
+    private static final Logger logger = Logger.getLogger(ValidateDatasetJsonCommand.class.getCanonicalName());
     
     private final Dataverse dataverse;
     private final String datasetJson;

From d30ecfda14bd4adcafced8486d58507aba12c55f Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 25 Oct 2023 10:56:14 -0400
Subject: [PATCH 197/546] add S3 tests, LocalStack, MinIO #6783

Developers can now test S3 locally by using the Dockerized
development environment, which now includes both LocalStack
and MinIO. See S3AccessIT which executes API (end to end) tests.

In addition, a new integration test test class
(not an API test, the new kind launched with `mvn verify`)
has been added at S3AccessIOLocalstackIT. It uses Testcontainers
to spin up Localstack for S3 testing and does not require
Dataverse to be running.

Note that the format of docker-compose-dev.yml had to change to
allow for JVM options to be added.

Finally, docs were improved for listing and setting stores via API.
---
 conf/localstack/buckets.sh                    |   3 +
 doc/release-notes/6783-s3-tests.md            |   3 +
 .../source/admin/dataverses-datasets.rst      |   4 +
 docker-compose-dev.yml                        |  78 +++++-
 pom.xml                                       |   5 +
 .../harvard/iq/dataverse/api/S3AccessIT.java  | 228 +++++++++++++++---
 .../dataaccess/S3AccessIOLocalstackIT.java    | 153 ++++++++++++
 7 files changed, 436 insertions(+), 38 deletions(-)
 create mode 100755 conf/localstack/buckets.sh
 create mode 100644 doc/release-notes/6783-s3-tests.md
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOLocalstackIT.java

diff --git a/conf/localstack/buckets.sh b/conf/localstack/buckets.sh
new file mode 100755
index 00000000000..fe940d9890d
--- /dev/null
+++ b/conf/localstack/buckets.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+# https://stackoverflow.com/questions/53619901/auto-create-s3-buckets-on-localstack
+awslocal s3 mb s3://mybucket
diff --git a/doc/release-notes/6783-s3-tests.md b/doc/release-notes/6783-s3-tests.md
new file mode 100644
index 00000000000..1febb87aaed
--- /dev/null
+++ b/doc/release-notes/6783-s3-tests.md
@@ -0,0 +1,3 @@
+Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. See S3AccessIT which executes API (end to end) tests.
+
+In addition, a new integration test test class (not an API test, the new kind launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running.
diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst
index 170807d3d67..37494c57fa1 100644
--- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst
+++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst
@@ -53,11 +53,15 @@ Configure a Dataverse Collection to Store All New Files in a Specific File Store
 To direct new files (uploaded when datasets are created or edited) for all datasets in a given Dataverse collection, the store can be specified via the API as shown below, or by editing the 'General Information' for a Dataverse collection on the Dataverse collection page. Only accessible to superusers. ::
  
     curl -H "X-Dataverse-key: $API_TOKEN" -X PUT -d $storageDriverLabel http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver
+
+(Note that for ``dataverse.files.store1.label=MyLabel``, you should pass ``MyLabel``.)
     
 The current driver can be seen using::
 
     curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver
 
+(Note that for ``dataverse.files.store1.label=MyLabel``, ``store1`` will be returned.)
+
 and can be reset to the default store with::
 
     curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver
diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index bb0a4c95b12..769c24fb3a5 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -9,16 +9,38 @@ services:
     restart: on-failure
     user: payara
     environment:
-      - DATAVERSE_DB_HOST=postgres
-      - DATAVERSE_DB_PASSWORD=secret
-      - DATAVERSE_DB_USER=${DATAVERSE_DB_USER}
-      - ENABLE_JDWP=1
-      - DATAVERSE_FEATURE_API_BEARER_AUTH=1
-      - DATAVERSE_AUTH_OIDC_ENABLED=1
-      - DATAVERSE_AUTH_OIDC_CLIENT_ID=test
-      - DATAVERSE_AUTH_OIDC_CLIENT_SECRET=94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8
-      - DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL=http://keycloak.mydomain.com:8090/realms/test
-      - DATAVERSE_JSF_REFRESH_PERIOD=1
+      DATAVERSE_DB_HOST: postgres
+      DATAVERSE_DB_PASSWORD: secret
+      DATAVERSE_DB_USER: ${DATAVERSE_DB_USER}
+      ENABLE_JDWP: "1"
+      DATAVERSE_FEATURE_API_BEARER_AUTH: "1"
+      DATAVERSE_AUTH_OIDC_ENABLED: "1"
+      DATAVERSE_AUTH_OIDC_CLIENT_ID: test
+      DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8
+      DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test
+      DATAVERSE_JSF_REFRESH_PERIOD: "1"
+      JVM_ARGS: -Ddataverse.files.storage-driver-id=file1
+        -Ddataverse.files.file1.type=file
+        -Ddataverse.files.file1.label=Filesystem
+        -Ddataverse.files.file1.directory=${STORAGE_DIR}/store
+        -Ddataverse.files.localstack1.type=s3
+        -Ddataverse.files.localstack1.label=LocalStack
+        -Ddataverse.files.localstack1.custom-endpoint-url=http://localstack:4566
+        -Ddataverse.files.localstack1.custom-endpoint-region=us-east-2
+        -Ddataverse.files.localstack1.bucket-name=mybucket
+        -Ddataverse.files.localstack1.path-style-access=true
+        -Ddataverse.files.localstack1.upload-redirect=false
+        -Ddataverse.files.localstack1.access-key=default
+        -Ddataverse.files.localstack1.secret-key=default
+        -Ddataverse.files.minio1.type=s3
+        -Ddataverse.files.minio1.label=MinIO
+        -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000
+        -Ddataverse.files.minio1.custom-endpoint-region=us-east-1
+        -Ddataverse.files.minio1.bucket-name=mybucket
+        -Ddataverse.files.minio1.path-style-access=true
+        -Ddataverse.files.minio1.upload-redirect=false
+        -Ddataverse.files.minio1.access-key=minioadmin
+        -Ddataverse.files.minio1.secret-key=minioadmin
     ports:
       - "8080:8080" # HTTP (Dataverse Application)
       - "4848:4848" # HTTP (Payara Admin Console)
@@ -156,6 +178,42 @@ services:
     networks:
       - dataverse
 
+  dev_localstack:
+    container_name: "dev_localstack"
+    hostname: "localstack"
+    image: localstack/localstack:2.3.2
+    restart: on-failure
+    ports:
+      - "127.0.0.1:4566:4566"
+    environment:
+      - DEBUG=${DEBUG-}
+      - DOCKER_HOST=unix:///var/run/docker.sock
+      - HOSTNAME_EXTERNAL=localstack
+    networks:
+      - dataverse
+    volumes:
+      - ./conf/localstack:/etc/localstack/init/ready.d
+    tmpfs:
+      - /localstack:mode=770,size=128M,uid=1000,gid=1000
+
+  dev_minio:
+    container_name: "dev_minio"
+    hostname: "minio"
+    image: minio/minio
+    restart: on-failure
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    networks:
+      - dataverse
+    volumes:
+      - minio_storage:/data
+    environment:
+      # these are the defaults but are here for clarity
+      MINIO_ROOT_USER: minioadmin
+      MINIO_ROOT_PASSWORD: minioadmin
+    command: server /data
+
 networks:
   dataverse:
     driver: bridge
diff --git a/pom.xml b/pom.xml
index 4d10073334f..34b0ad2e835 100644
--- a/pom.xml
+++ b/pom.xml
@@ -612,6 +612,11 @@
             <version>3.0.0</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.testcontainers</groupId>
+            <artifactId>localstack</artifactId>
+            <scope>test</scope>
+        </dependency>
         <!--
             Brute force solution until we are on Jakarta EE 10.
             Otherwise, we get very cryptic errors about missing bundle files on test runs.
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
index 8b1e96f3622..891a54e87c6 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
@@ -1,66 +1,238 @@
 package edu.harvard.iq.dataverse.api;
 
+import com.amazonaws.auth.AWSStaticCredentialsProvider;
+import com.amazonaws.auth.BasicAWSCredentials;
+import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
+import com.amazonaws.regions.Regions;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.AmazonS3Exception;
+import com.amazonaws.services.s3.model.Bucket;
+import com.amazonaws.services.s3.model.HeadBucketRequest;
+import com.amazonaws.services.s3.model.HeadBucketResult;
 import io.restassured.RestAssured;
+import static io.restassured.RestAssured.given;
 import io.restassured.path.json.JsonPath;
 import io.restassured.response.Response;
+import java.util.List;
 import java.util.logging.Logger;
+import static org.hamcrest.CoreMatchers.equalTo;
 
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
 import static org.hamcrest.Matchers.startsWith;
+import org.junit.jupiter.api.Assertions;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 /**
- *  NOTE: This test WILL NOT pass if your installation is not configured for Amazon S3 storage.
- *  For S3 storage, you must set two jvm options: storage-driver-id and s3-bucket-name
- *  Refer to the guides or to https://github.com/IQSS/dataverse/issues/3921#issuecomment-319973245
- * @author bsilverstein
+ * This test requires services spun up in Docker.
  */
 public class S3AccessIT {
-    
+
     private static final Logger logger = Logger.getLogger(S3AccessIT.class.getCanonicalName());
 
+    public enum TypesOfS3 {
+        MINIO,
+        LOCALSTACK
+    };
+
+    static final String accessKey = "minioadmin";
+    static final String secretKey = "minioadmin";
+    static final String bucketName = "mybucket";
+    static String driverId;
+    static String driverLabel;
+    static AmazonS3 s3 = null;
+
     @BeforeAll
     public static void setUp() {
         RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
-        
+
+        TypesOfS3 typeToTest = TypesOfS3.LOCALSTACK;
+        typeToTest = TypesOfS3.MINIO;
+
+        switch (typeToTest) {
+            case LOCALSTACK -> {
+                driverId = "localstack1";
+                driverLabel = "LocalStack";
+                s3 = AmazonS3ClientBuilder.standard()
+                        .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)))
+                        .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build();
+            }
+            case MINIO -> {
+                driverId = "minio1";
+                driverLabel = "MinIO";
+                s3 = AmazonS3ClientBuilder.standard()
+                        // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local
+                        .withPathStyleAccessEnabled(Boolean.TRUE)
+                        .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)))
+                        .withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build();
+//                String location = s3.getBucketLocation(bucketName);
+//                if (location != "US") {
+//                    Bucket bucket = s3.createBucket(bucketName);
+//                }
+            }
+        }
+        System.out.println("buckets before attempting to create " + bucketName);
+        for (Bucket bucket : s3.listBuckets()) {
+            System.out.println("bucket: " + bucket);
+        }
+
+        // create bucket if it doesn't exist
+        // Note that we create the localstack bucket with conf/localstack/buckets.sh
+        // because we haven't figured out how to create it properly in Java.
+        // Perhaps it is missing ACLs.
+        try {
+            s3.headBucket(new HeadBucketRequest(bucketName));
+        } catch (AmazonS3Exception ex) {
+            s3.createBucket(bucketName);
+        }
+
+//        String location = s3.getBucketLocation(bucketName);
+////        HeadBucketRequest headBucketRequest;
+//        s3.headBucket(headBucketRequest);
+//        if (location != null && !"US".equals(location)) {
+//            System.out.println("Creating bucket. Location was " + location);
+//            Bucket createdBucket = s3.createBucket(bucketName);
+//            System.out.println("created bucket: " + createdBucket);
+//        }
+//        System.out.println("buckets after creating " + bucketName);
+//        for (Bucket bucket : s3.listBuckets()) {
+//            System.out.println("bucket: " + bucket);
+//        }
     }
-    
+
     @Test
     public void testAddDataFileS3Prefix() {
+        Response createSuperuser = UtilIT.createRandomUser();
+        String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
+        String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
+        UtilIT.makeSuperUser(superusername);
+        Response storageDrivers = listStorageDrivers(superuserApiToken);
+        storageDrivers.prettyPrint();
+        // TODO where is "Local/local" coming from?
+        String drivers = """
+{
+    "status": "OK",
+    "data": {
+        "LocalStack": "localstack1",
+        "MinIO": "minio1",
+        "Local": "local",
+        "Filesystem": "file1"
+    }
+}""";
+
         //create user who will make a dataverse/dataset
         Response createUser = UtilIT.createRandomUser();
         String username = UtilIT.getUsernameFromResponse(createUser);
         String apiToken = UtilIT.getApiTokenFromResponse(createUser);
-        
+
         Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.prettyPrint();
         String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
-        
+
+        Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
+        originalStorageDriver.prettyPrint();
+        originalStorageDriver.then().assertThat()
+                .body("data.message", equalTo("undefined"))
+                .statusCode(200);
+
+        Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken);
+        setStorageDriverToS3.prettyPrint();
+        setStorageDriverToS3.then().assertThat()
+                .statusCode(200);
+
+        Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
+        updatedStorageDriver.prettyPrint();
+        updatedStorageDriver.then().assertThat()
+                .statusCode(200);
+
         Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
-        Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
         createDatasetResponse.prettyPrint();
-        
-        //upload a tabular file via native, check storage id prefix for s3
+        createDatasetResponse.then().assertThat().statusCode(201);
+        Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
+        String datasetPid = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId");
+        String datasetStorageIdentifier = datasetPid.substring(4);
+
+        Response getDatasetMetadata = UtilIT.nativeGet(datasetId, apiToken);
+        getDatasetMetadata.prettyPrint();
+        getDatasetMetadata.then().assertThat().statusCode(200);
+
+        //upload a tabular file via native, check storage id prefix for driverId
         String pathToFile = "scripts/search/data/tabular/1char";
         Response addFileResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
         addFileResponse.prettyPrint();
         addFileResponse.then().assertThat()
-                .body("data.files[0].dataFile.storageIdentifier", startsWith("s3://"));
-        
-        //clean up test dvobjects and user
-        Response deleteDataset = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken);
-        deleteDataset.prettyPrint();
-        deleteDataset.then().assertThat()
-                .statusCode(200);
+                .statusCode(200)
+                .body("data.files[0].dataFile.storageIdentifier", startsWith(driverId + "://"));
 
-        Response deleteDataverse = UtilIT.deleteDataverse(dataverseAlias, apiToken);
-        deleteDataverse.prettyPrint();
-        deleteDataverse.then().assertThat()
-                .statusCode(200);
-        
-        Response deleteUser = UtilIT.deleteUser(username);
-        deleteUser.prettyPrint();
-        deleteUser.then().assertThat()
-                .statusCode(200);
+        String fileId = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.id");
+
+        Response getfileMetadata = UtilIT.getFileData(fileId, apiToken);
+        getfileMetadata.prettyPrint();
+        getfileMetadata.then().assertThat().statusCode(200);
+
+        String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier");
+        String keyInDataverse = storageIdentifier.split(":")[2];
+        Assertions.assertEquals(driverId + "://" + bucketName + ":" + keyInDataverse, storageIdentifier);
+
+        for (Bucket bucket : s3.listBuckets()) {
+            System.out.println("bucket: " + bucket);
+        }
+
+//        List<S3ObjectSummary> summaries = s3.listObjects(bucketName).getObjectSummaries();
+//        for (S3ObjectSummary summary : summaries) {
+//            System.out.println("summary: " + summary);
+//            /**
+//             * summary: S3ObjectSummary{bucketName='mybucket',
+//             * key='10.5072/FK2/6MGSJD/18b631645ef-4c6a6c2d49f8',
+//             * eTag='60b725f10c9c85c70d97880dfe8191b3', size=2, lastModified=Tue
+//             * Oct 24 19:08:06 UTC 2023, storageClass='STANDARD', owner=S3Owner
+//             * [name=webfile,id=75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a]}
+//             */
+//        }
+        String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse;
+        String s3Object = s3.getObjectAsString(bucketName, keyInS3);
+        System.out.println("s3Object: " + s3Object);
+
+        // The file uploaded above only contains the character "a".
+        assertEquals("a".trim(), s3Object.trim());
+
+        Response deleteFile = UtilIT.deleteFileApi(Integer.parseInt(fileId), apiToken);
+        deleteFile.prettyPrint();
+        deleteFile.then().assertThat().statusCode(200);
+
+        AmazonS3Exception expectedException = null;
+        try {
+            s3.getObjectAsString(bucketName, keyInS3);
+        } catch (AmazonS3Exception ex) {
+            expectedException = ex;
+        }
+        assertNotNull(expectedException);
+        // 404 because the file has been sucessfully deleted
+        assertEquals(404, expectedException.getStatusCode());
+
+    }
+
+    //TODO: move these into UtilIT. They are here for now to avoid merge conflicts
+    static Response listStorageDrivers(String apiToken) {
+        return given()
+                .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
+                .get("/api/admin/dataverse/storageDrivers");
     }
+
+    static Response getStorageDriver(String dvAlias, String apiToken) {
+        return given()
+                .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
+                .get("/api/admin/dataverse/" + dvAlias + "/storageDriver");
+    }
+
+    static Response setStorageDriver(String dvAlias, String label, String apiToken) {
+        return given()
+                .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
+                .body(label)
+                .put("/api/admin/dataverse/" + dvAlias + "/storageDriver");
+    }
+
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOLocalstackIT.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOLocalstackIT.java
new file mode 100644
index 00000000000..140b558fc1d
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOLocalstackIT.java
@@ -0,0 +1,153 @@
+package edu.harvard.iq.dataverse.dataaccess;
+
+import com.amazonaws.auth.AWSStaticCredentialsProvider;
+import com.amazonaws.auth.BasicAWSCredentials;
+import com.amazonaws.client.builder.AwsClientBuilder;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DvObject;
+import edu.harvard.iq.dataverse.util.FileUtil;
+import edu.harvard.iq.dataverse.util.testing.Tags;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Scanner;
+import java.util.UUID;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.testcontainers.containers.localstack.LocalStackContainer;
+import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.utility.DockerImageName;
+
+// https://java.testcontainers.org/modules/localstack/
+@Tag(Tags.INTEGRATION_TEST)
+@Tag(Tags.USES_TESTCONTAINERS)
+@Testcontainers(disabledWithoutDocker = true)
+@ExtendWith(MockitoExtension.class)
+class S3AccessIOLocalstackIT {
+
+    @BeforeAll
+    static void setUp() {
+        System.setProperty(staticFiles + "access-key", localstack.getAccessKey());
+        System.setProperty(staticFiles + "secret-key", localstack.getSecretKey());
+        System.setProperty(staticFiles + "custom-endpoint-url", localstack.getEndpoint().toString());
+        System.setProperty(staticFiles + "custom-endpoint-region", localstack.getRegion());
+        System.setProperty(staticFiles + "bucket-name", bucketName);
+
+        s3 = AmazonS3ClientBuilder
+                .standard()
+                .withEndpointConfiguration(
+                        new AwsClientBuilder.EndpointConfiguration(
+                                localstack.getEndpoint().toString(),
+                                localstack.getRegion()
+                        )
+                )
+                .withCredentials(
+                        new AWSStaticCredentialsProvider(
+                                new BasicAWSCredentials(localstack.getAccessKey(), localstack.getSecretKey())
+                        )
+                )
+                .build();
+        s3.createBucket(bucketName);
+    }
+
+    static final String storageDriverId = "si1";
+    static final String staticFiles = "dataverse.files." + storageDriverId + ".";
+    static final String bucketName = "bucket-" + UUID.randomUUID().toString();
+    static AmazonS3 s3 = null;
+
+    static DockerImageName localstackImage = DockerImageName.parse("localstack/localstack:2.3.2");
+    @Container
+    static LocalStackContainer localstack = new LocalStackContainer(localstackImage)
+            .withServices(S3);
+
+    //new S3AccessIO<>(dvObject, req, storageDriverId);
+    @Test
+    void test1() {
+        DvObject dvObject = new Dataset();
+        dvObject.setProtocol("doi");
+        dvObject.setAuthority("10.5072/FK2");
+        dvObject.setIdentifier("ABC123");
+        DataAccessRequest req = null;
+        S3AccessIO s3AccessIO = new S3AccessIO<>(dvObject, req, storageDriverId);
+        String textIn = "Hello";
+        InputStream inputStream = new ByteArrayInputStream(textIn.getBytes());
+        // Without this temp directory, saveInputStream fails
+        String tempDirPath = "/tmp/dataverse/temp";
+        try {
+            Files.createDirectories(Paths.get(tempDirPath));
+        } catch (IOException ex) {
+            System.out.println("failed to create " + tempDirPath + ": " + ex);
+        }
+        try {
+            s3AccessIO.saveInputStream(inputStream);
+            System.out.println("save complete!");
+        } catch (IOException ex) {
+            System.out.println("saveInputStream exception: " + ex);
+        }
+
+        String textOut = null;
+        try {
+            textOut = new Scanner(s3AccessIO.getInputStream()).useDelimiter("\\A").next();
+        } catch (IOException ex) {
+        }
+        assertEquals(textIn, textOut);
+    }
+
+    // testing a specific constructor
+    @Test
+    void test2() {
+        Dataset dataset = new Dataset();
+        dataset.setProtocol("doi");
+        dataset.setAuthority("10.5072/FK2");
+        dataset.setIdentifier("ABC123");
+        String sid = sid = bucketName + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + FileUtil.generateStorageIdentifier();
+        S3AccessIO<DataFile> s3io = new S3AccessIO<DataFile>(sid, storageDriverId);
+    }
+
+    // just to test this: saveInputStream exception: java.io.IOException: ERROR: s3 not initialised
+    @Test
+    void test3() {
+        DvObject dvObject = new Dataset();
+        dvObject.setProtocol("doi");
+        dvObject.setAuthority("10.5072/FK2");
+        dvObject.setIdentifier("ABC123");
+        DataAccessRequest req = null;
+        AmazonS3 nullAmazonS3 = null;
+        S3AccessIO s3AccessIO = new S3AccessIO<>(dvObject, req, nullAmazonS3, storageDriverId);
+        InputStream inputStream = null;
+        try {
+            s3AccessIO.saveInputStream(inputStream);
+            System.out.println("save complete!");
+        } catch (IOException ex) {
+            System.out.println("saveInputStream exception: " + ex);
+        }
+    }
+
+    @Test
+    void test4() {
+        DvObject dvObject = new DataFile();
+        dvObject.setProtocol("doi");
+        dvObject.setAuthority("10.5072/FK2");
+        dvObject.setIdentifier("ABC123");
+        DataAccessRequest req = null;
+        S3AccessIO s3AccessIO = new S3AccessIO<>(dvObject, req, storageDriverId);
+        InputStream inputStream = null;
+        try {
+            s3AccessIO.saveInputStream(inputStream);
+            System.out.println("save complete!");
+        } catch (IOException ex) {
+            System.out.println("saveInputStream exception: " + ex);
+        }
+    }
+}

From 53af167b5d746544fe3ebdb3eb16656b7ed2fa76 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 31 Oct 2023 14:23:30 -0400
Subject: [PATCH 198/546] stub out REST Assured code for testing direct upload
 #6783

---
 .../iq/dataverse/api/S3AccessDirectIT.java    | 97 +++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/S3AccessDirectIT.java

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessDirectIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessDirectIT.java
new file mode 100644
index 00000000000..1e44d952af7
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessDirectIT.java
@@ -0,0 +1,97 @@
+package edu.harvard.iq.dataverse.api;
+
+import io.restassured.RestAssured;
+import static io.restassured.RestAssured.given;
+import io.restassured.path.json.JsonPath;
+import io.restassured.response.Response;
+import io.restassured.specification.RequestSpecification;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.junit.jupiter.api.Test;
+
+public class S3AccessDirectIT {
+
+    @Test
+    public void testS3DirectUpload() {
+        // TODO: remove all these constants
+        RestAssured.baseURI = "https://demo.dataverse.org";
+        String apiToken = "";
+        String datasetPid = "doi:10.70122/FK2/UBWSJU";
+        String datasetId = "2106131";
+        long size = 1000000000l;
+
+        Response getUploadUrls = getUploadUrls(datasetPid, size, apiToken);
+        getUploadUrls.prettyPrint();
+        getUploadUrls.then().assertThat().statusCode(200);
+
+        String url = JsonPath.from(getUploadUrls.asString()).getString("data.url");
+        String partSize = JsonPath.from(getUploadUrls.asString()).getString("data.partSize");
+        String storageIdentifier = JsonPath.from(getUploadUrls.asString()).getString("data.storageIdentifier");
+        System.out.println("url: " + url);
+        System.out.println("partSize: " + partSize);
+        System.out.println("storageIdentifier: " + storageIdentifier);
+
+        System.out.println("uploading file via direct upload");
+        String decodedUrl = null;
+        try {
+            decodedUrl = URLDecoder.decode(url, StandardCharsets.UTF_8.name());
+        } catch (UnsupportedEncodingException ex) {
+        }
+
+        InputStream inputStream = new ByteArrayInputStream("bumble".getBytes(StandardCharsets.UTF_8));
+        Response uploadFileDirect = uploadFileDirect(decodedUrl, inputStream);
+        uploadFileDirect.prettyPrint();
+        uploadFileDirect.then().assertThat().statusCode(200);
+
+        // TODO: Use MD5 or whatever Dataverse is configured for and
+        // actually calculate it.
+        String jsonData = """
+{
+    "description": "My description.",
+    "directoryLabel": "data/subdir1",
+    "categories": [
+      "Data"
+    ],
+    "restrict": "false",
+    "storageIdentifier": "%s",
+    "fileName": "file1.txt",
+    "mimeType": "text/plain",
+    "checksum": {
+      "@type": "SHA-1",
+      "@value": "123456"
+    }
+}
+""".formatted(storageIdentifier);
+        Response addRemoteFile = UtilIT.addRemoteFile(datasetId, jsonData, apiToken);
+        addRemoteFile.prettyPrint();
+        addRemoteFile.then().assertThat()
+                .statusCode(200);
+    }
+
+    static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) {
+        String idInPath = idOrPersistentIdOfDataset; // Assume it's a number.
+        String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
+        if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) {
+            idInPath = ":persistentId";
+            optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset;
+        }
+        RequestSpecification requestSpecification = given();
+        if (apiToken != null) {
+            requestSpecification = given()
+                    .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken);
+        }
+        return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam);
+    }
+
+    static Response uploadFileDirect(String url, InputStream inputStream) {
+        return given()
+                .header("x-amz-tagging", "dv-state=temp")
+                .body(inputStream)
+                .put(url);
+    }
+
+}

From 69899c2f1445f73e36ad1f57ca196fb07fe161b0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 31 Oct 2023 14:24:27 -0400
Subject: [PATCH 199/546] explain that 200 OK is expected from direct upload
 #6783

---
 doc/sphinx-guides/source/developers/s3-direct-upload-api.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
index 423fb02d385..d7f270a4e38 100644
--- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
+++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst
@@ -69,8 +69,9 @@ In the single part case, only one call to the supplied URL is required:
 
 .. code-block:: bash
 
-    curl -H 'x-amz-tagging:dv-state=temp' -X PUT -T <filename> "<supplied url>"
+    curl -i -H 'x-amz-tagging:dv-state=temp' -X PUT -T <filename> "<supplied url>"
 
+Note that without the ``-i`` flag, you should not expect any output from the command above. With the ``-i`` flag, you should expect to see a "200 OK" response.
 
 In the multipart case, the client must send each part and collect the 'eTag' responses from the server. The calls for this are the same as the one for the single part case except that each call should send a <partSize> slice of the total file, with the last part containing the remaining bytes.
 The responses from the S3 server for these calls will include the 'eTag' for the uploaded part. 

From bbf6789b104d42970a251e09bb967e09bd908bd7 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 1 Nov 2023 15:03:43 -0400
Subject: [PATCH 200/546] refactor and add direct upload test #6783

---
 docker-compose-dev.yml                        |   2 +-
 .../harvard/iq/dataverse/api/S3AccessIT.java  | 321 +++++++++++++-----
 2 files changed, 245 insertions(+), 78 deletions(-)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 769c24fb3a5..73c00fb02c9 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -29,7 +29,7 @@ services:
         -Ddataverse.files.localstack1.custom-endpoint-region=us-east-2
         -Ddataverse.files.localstack1.bucket-name=mybucket
         -Ddataverse.files.localstack1.path-style-access=true
-        -Ddataverse.files.localstack1.upload-redirect=false
+        -Ddataverse.files.localstack1.upload-redirect=true
         -Ddataverse.files.localstack1.access-key=default
         -Ddataverse.files.localstack1.secret-key=default
         -Ddataverse.files.minio1.type=s3
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
index 891a54e87c6..9173775875b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
@@ -9,73 +9,65 @@
 import com.amazonaws.services.s3.model.AmazonS3Exception;
 import com.amazonaws.services.s3.model.Bucket;
 import com.amazonaws.services.s3.model.HeadBucketRequest;
-import com.amazonaws.services.s3.model.HeadBucketResult;
 import io.restassured.RestAssured;
 import static io.restassured.RestAssured.given;
 import io.restassured.path.json.JsonPath;
 import io.restassured.response.Response;
-import java.util.List;
+import io.restassured.specification.RequestSpecification;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.nio.charset.StandardCharsets;
 import java.util.logging.Logger;
+import org.apache.commons.lang3.math.NumberUtils;
 import static org.hamcrest.CoreMatchers.equalTo;
-
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
 import static org.hamcrest.Matchers.startsWith;
 import org.junit.jupiter.api.Assertions;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 /**
- * This test requires services spun up in Docker.
+ * This test requires LocalStack and Minio to be running. Developers can use our
+ * docker-compose file, which has all the necessary configuration.
  */
 public class S3AccessIT {
 
     private static final Logger logger = Logger.getLogger(S3AccessIT.class.getCanonicalName());
 
-    public enum TypesOfS3 {
-        MINIO,
-        LOCALSTACK
-    };
-
-    static final String accessKey = "minioadmin";
-    static final String secretKey = "minioadmin";
-    static final String bucketName = "mybucket";
-    static String driverId;
-    static String driverLabel;
-    static AmazonS3 s3 = null;
+    static final String BUCKET_NAME = "mybucket";
+    static AmazonS3 s3localstack = null;
+    static AmazonS3 s3minio = null;
 
     @BeforeAll
     public static void setUp() {
         RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
 
-        TypesOfS3 typeToTest = TypesOfS3.LOCALSTACK;
-        typeToTest = TypesOfS3.MINIO;
-
-        switch (typeToTest) {
-            case LOCALSTACK -> {
-                driverId = "localstack1";
-                driverLabel = "LocalStack";
-                s3 = AmazonS3ClientBuilder.standard()
-                        .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)))
-                        .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build();
-            }
-            case MINIO -> {
-                driverId = "minio1";
-                driverLabel = "MinIO";
-                s3 = AmazonS3ClientBuilder.standard()
-                        // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local
-                        .withPathStyleAccessEnabled(Boolean.TRUE)
-                        .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)))
-                        .withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build();
-//                String location = s3.getBucketLocation(bucketName);
-//                if (location != "US") {
-//                    Bucket bucket = s3.createBucket(bucketName);
-//                }
-            }
+        // At least in when spun up by our docker-compose file, the creds don't matter for LocalStack.
+        String accessKeyLocalStack = "whatever";
+        String secretKeyLocalStack = "not used";
+
+        s3localstack = AmazonS3ClientBuilder.standard()
+                .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyLocalStack, secretKeyLocalStack)))
+                .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build();
+
+        String accessKeyMinio = "minioadmin";
+        String secretKeyMinio = "minioadmin";
+        s3minio = AmazonS3ClientBuilder.standard()
+                // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local
+                .withPathStyleAccessEnabled(Boolean.TRUE)
+                .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyMinio, secretKeyMinio)))
+                .withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build();
+
+        System.out.println("buckets on LocalStack before attempting to create " + BUCKET_NAME);
+        for (Bucket bucket : s3localstack.listBuckets()) {
+            System.out.println("bucket: " + bucket);
         }
-        System.out.println("buckets before attempting to create " + bucketName);
-        for (Bucket bucket : s3.listBuckets()) {
+
+        System.out.println("buckets on MinIO before attempting to create " + BUCKET_NAME);
+        for (Bucket bucket : s3minio.listBuckets()) {
             System.out.println("bucket: " + bucket);
         }
 
@@ -84,27 +76,27 @@ public static void setUp() {
         // because we haven't figured out how to create it properly in Java.
         // Perhaps it is missing ACLs.
         try {
-            s3.headBucket(new HeadBucketRequest(bucketName));
+            s3localstack.headBucket(new HeadBucketRequest(BUCKET_NAME));
         } catch (AmazonS3Exception ex) {
-            s3.createBucket(bucketName);
+            s3localstack.createBucket(BUCKET_NAME);
+        }
+
+        try {
+            s3minio.headBucket(new HeadBucketRequest(BUCKET_NAME));
+        } catch (AmazonS3Exception ex) {
+            s3minio.createBucket(BUCKET_NAME);
         }
 
-//        String location = s3.getBucketLocation(bucketName);
-////        HeadBucketRequest headBucketRequest;
-//        s3.headBucket(headBucketRequest);
-//        if (location != null && !"US".equals(location)) {
-//            System.out.println("Creating bucket. Location was " + location);
-//            Bucket createdBucket = s3.createBucket(bucketName);
-//            System.out.println("created bucket: " + createdBucket);
-//        }
-//        System.out.println("buckets after creating " + bucketName);
-//        for (Bucket bucket : s3.listBuckets()) {
-//            System.out.println("bucket: " + bucket);
-//        }
     }
 
+    /**
+     * We're using MinIO for testing non-direct upload.
+     */
     @Test
-    public void testAddDataFileS3Prefix() {
+    public void testNonDirectUpload() {
+        String driverId = "minio1";
+        String driverLabel = "MinIO";
+
         Response createSuperuser = UtilIT.createRandomUser();
         String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
         String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
@@ -175,25 +167,10 @@ public void testAddDataFileS3Prefix() {
 
         String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier");
         String keyInDataverse = storageIdentifier.split(":")[2];
-        Assertions.assertEquals(driverId + "://" + bucketName + ":" + keyInDataverse, storageIdentifier);
+        Assertions.assertEquals(driverId + "://" + BUCKET_NAME + ":" + keyInDataverse, storageIdentifier);
 
-        for (Bucket bucket : s3.listBuckets()) {
-            System.out.println("bucket: " + bucket);
-        }
-
-//        List<S3ObjectSummary> summaries = s3.listObjects(bucketName).getObjectSummaries();
-//        for (S3ObjectSummary summary : summaries) {
-//            System.out.println("summary: " + summary);
-//            /**
-//             * summary: S3ObjectSummary{bucketName='mybucket',
-//             * key='10.5072/FK2/6MGSJD/18b631645ef-4c6a6c2d49f8',
-//             * eTag='60b725f10c9c85c70d97880dfe8191b3', size=2, lastModified=Tue
-//             * Oct 24 19:08:06 UTC 2023, storageClass='STANDARD', owner=S3Owner
-//             * [name=webfile,id=75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a]}
-//             */
-//        }
         String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse;
-        String s3Object = s3.getObjectAsString(bucketName, keyInS3);
+        String s3Object = s3minio.getObjectAsString(BUCKET_NAME, keyInS3);
         System.out.println("s3Object: " + s3Object);
 
         // The file uploaded above only contains the character "a".
@@ -205,7 +182,175 @@ public void testAddDataFileS3Prefix() {
 
         AmazonS3Exception expectedException = null;
         try {
-            s3.getObjectAsString(bucketName, keyInS3);
+            s3minio.getObjectAsString(BUCKET_NAME, keyInS3);
+        } catch (AmazonS3Exception ex) {
+            expectedException = ex;
+        }
+        assertNotNull(expectedException);
+        // 404 because the file has been sucessfully deleted
+        assertEquals(404, expectedException.getStatusCode());
+
+    }
+
+    /**
+     * We use LocalStack to test direct upload.
+     */
+    @Test
+    public void testDirectUpload() {
+        String driverId = "localstack1";
+        String driverLabel = "LocalStack";
+        Response createSuperuser = UtilIT.createRandomUser();
+        String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
+        String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
+        UtilIT.makeSuperUser(superusername);
+        Response storageDrivers = listStorageDrivers(superuserApiToken);
+        storageDrivers.prettyPrint();
+        // TODO where is "Local/local" coming from?
+        String drivers = """
+{
+    "status": "OK",
+    "data": {
+        "LocalStack": "localstack1",
+        "MinIO": "minio1",
+        "Local": "local",
+        "Filesystem": "file1"
+    }
+}""";
+
+        //create user who will make a dataverse/dataset
+        Response createUser = UtilIT.createRandomUser();
+        String username = UtilIT.getUsernameFromResponse(createUser);
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+
+        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.prettyPrint();
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
+        originalStorageDriver.prettyPrint();
+        originalStorageDriver.then().assertThat()
+                .body("data.message", equalTo("undefined"))
+                .statusCode(200);
+
+        Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken);
+        setStorageDriverToS3.prettyPrint();
+        setStorageDriverToS3.then().assertThat()
+                .statusCode(200);
+
+        Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
+        updatedStorageDriver.prettyPrint();
+        updatedStorageDriver.then().assertThat()
+                .statusCode(200);
+
+        Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
+        createDatasetResponse.prettyPrint();
+        createDatasetResponse.then().assertThat().statusCode(201);
+        Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
+        String datasetPid = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId");
+        String datasetStorageIdentifier = datasetPid.substring(4);
+
+        Response getDatasetMetadata = UtilIT.nativeGet(datasetId, apiToken);
+        getDatasetMetadata.prettyPrint();
+        getDatasetMetadata.then().assertThat().statusCode(200);
+
+//        //upload a tabular file via native, check storage id prefix for driverId
+//        String pathToFile = "scripts/search/data/tabular/1char";
+//        Response addFileResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
+//        addFileResponse.prettyPrint();
+//        addFileResponse.then().assertThat()
+//                .statusCode(200)
+//                .body("data.files[0].dataFile.storageIdentifier", startsWith(driverId + "://"));
+//
+//        String fileId = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.id");
+        long size = 1000000000l;
+        Response getUploadUrls = getUploadUrls(datasetPid, size, apiToken);
+        getUploadUrls.prettyPrint();
+        getUploadUrls.then().assertThat().statusCode(200);
+
+        String url = JsonPath.from(getUploadUrls.asString()).getString("data.url");
+        String partSize = JsonPath.from(getUploadUrls.asString()).getString("data.partSize");
+        String storageIdentifier = JsonPath.from(getUploadUrls.asString()).getString("data.storageIdentifier");
+        System.out.println("url: " + url);
+        System.out.println("partSize: " + partSize);
+        System.out.println("storageIdentifier: " + storageIdentifier);
+
+        System.out.println("uploading file via direct upload");
+        String decodedUrl = null;
+        try {
+            decodedUrl = URLDecoder.decode(url, StandardCharsets.UTF_8.name());
+        } catch (UnsupportedEncodingException ex) {
+        }
+
+        // change to localhost because LocalStack is running in a container locally
+        String localhostUrl = decodedUrl.replace("http://localstack", "http://localhost");
+        String contentsOfFile = "foobar";
+
+        InputStream inputStream = new ByteArrayInputStream(contentsOfFile.getBytes(StandardCharsets.UTF_8));
+        Response uploadFileDirect = uploadFileDirect(localhostUrl, inputStream);
+        uploadFileDirect.prettyPrint();
+        /*
+        Direct upload to MinIO is failing with errors like this:
+        <Error>
+          <Code>SignatureDoesNotMatch</Code>
+          <Message>The request signature we calculated does not match the signature you provided. Check your key and signing method.</Message>
+          <Key>10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5</Key>
+          <BucketName>mybucket</BucketName>
+          <Resource>/mybucket/10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5</Resource>
+          <RequestId>1793915CCC5BC95C</RequestId>
+          <HostId>dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8</HostId>
+        </Error>
+         */
+        uploadFileDirect.then().assertThat().statusCode(200);
+
+        // TODO: Use MD5 or whatever Dataverse is configured for and
+        // actually calculate it.
+        String jsonData = """
+{
+    "description": "My description.",
+    "directoryLabel": "data/subdir1",
+    "categories": [
+      "Data"
+    ],
+    "restrict": "false",
+    "storageIdentifier": "%s",
+    "fileName": "file1.txt",
+    "mimeType": "text/plain",
+    "checksum": {
+      "@type": "SHA-1",
+      "@value": "123456"
+    }
+}
+""".formatted(storageIdentifier);
+
+        // "There was an error when trying to add the new file. File size must be explicitly specified when creating DataFiles with Direct Upload"
+        Response addRemoteFile = UtilIT.addRemoteFile(datasetId.toString(), jsonData, apiToken);
+        addRemoteFile.prettyPrint();
+        addRemoteFile.then().assertThat()
+                .statusCode(200);
+
+        String fileId = JsonPath.from(addRemoteFile.asString()).getString("data.files[0].dataFile.id");
+        Response getfileMetadata = UtilIT.getFileData(fileId, apiToken);
+        getfileMetadata.prettyPrint();
+        getfileMetadata.then().assertThat().statusCode(200);
+
+//        String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier");
+        String keyInDataverse = storageIdentifier.split(":")[2];
+        Assertions.assertEquals(driverId + "://" + BUCKET_NAME + ":" + keyInDataverse, storageIdentifier);
+
+        String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse;
+        String s3Object = s3localstack.getObjectAsString(BUCKET_NAME, keyInS3);
+        System.out.println("s3Object: " + s3Object);
+
+//        assertEquals(contentsOfFile.trim(), s3Object.trim());
+        assertEquals(contentsOfFile, s3Object);
+
+        Response deleteFile = UtilIT.deleteFileApi(Integer.parseInt(fileId), apiToken);
+        deleteFile.prettyPrint();
+        deleteFile.then().assertThat().statusCode(200);
+
+        AmazonS3Exception expectedException = null;
+        try {
+            s3localstack.getObjectAsString(BUCKET_NAME, keyInS3);
         } catch (AmazonS3Exception ex) {
             expectedException = ex;
         }
@@ -235,4 +380,26 @@ static Response setStorageDriver(String dvAlias, String label, String apiToken)
                 .put("/api/admin/dataverse/" + dvAlias + "/storageDriver");
     }
 
+    static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) {
+        String idInPath = idOrPersistentIdOfDataset; // Assume it's a number.
+        String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
+        if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) {
+            idInPath = ":persistentId";
+            optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset;
+        }
+        RequestSpecification requestSpecification = given();
+        if (apiToken != null) {
+            requestSpecification = given()
+                    .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken);
+        }
+        return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam);
+    }
+
+    static Response uploadFileDirect(String url, InputStream inputStream) {
+        return given()
+                .header("x-amz-tagging", "dv-state=temp")
+                .body(inputStream)
+                .put(url);
+    }
+
 }

From 20bbc700d39c6e49ab3d2914f1982ef047c9760c Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 8 Nov 2023 10:24:00 -0500
Subject: [PATCH 201/546] add tests for direct (and non-direct) download #6783

---
 docker-compose-dev.yml                        |  2 +
 .../harvard/iq/dataverse/api/S3AccessIT.java  | 39 +++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 73c00fb02c9..6bc50f7e764 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -30,6 +30,7 @@ services:
         -Ddataverse.files.localstack1.bucket-name=mybucket
         -Ddataverse.files.localstack1.path-style-access=true
         -Ddataverse.files.localstack1.upload-redirect=true
+        -Ddataverse.files.localstack1.download-redirect=true
         -Ddataverse.files.localstack1.access-key=default
         -Ddataverse.files.localstack1.secret-key=default
         -Ddataverse.files.minio1.type=s3
@@ -39,6 +40,7 @@ services:
         -Ddataverse.files.minio1.bucket-name=mybucket
         -Ddataverse.files.minio1.path-style-access=true
         -Ddataverse.files.minio1.upload-redirect=false
+        -Ddataverse.files.minio1.download-redirect=false
         -Ddataverse.files.minio1.access-key=minioadmin
         -Ddataverse.files.minio1.secret-key=minioadmin
     ports:
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
index 9173775875b..f5e4ce6a794 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
@@ -11,6 +11,7 @@
 import com.amazonaws.services.s3.model.HeadBucketRequest;
 import io.restassured.RestAssured;
 import static io.restassured.RestAssured.given;
+import io.restassured.http.Header;
 import io.restassured.path.json.JsonPath;
 import io.restassured.response.Response;
 import io.restassured.specification.RequestSpecification;
@@ -176,6 +177,13 @@ public void testNonDirectUpload() {
         // The file uploaded above only contains the character "a".
         assertEquals("a".trim(), s3Object.trim());
 
+        System.out.println("non-direct download...");
+        Response downloadFile = UtilIT.downloadFile(Integer.valueOf(fileId), apiToken);
+        downloadFile.then().assertThat().statusCode(200);
+
+        String contentsOfDownloadedFile = downloadFile.getBody().asString();
+        assertEquals("a\n", contentsOfDownloadedFile);
+
         Response deleteFile = UtilIT.deleteFileApi(Integer.parseInt(fileId), apiToken);
         deleteFile.prettyPrint();
         deleteFile.then().assertThat().statusCode(200);
@@ -344,6 +352,28 @@ public void testDirectUpload() {
 //        assertEquals(contentsOfFile.trim(), s3Object.trim());
         assertEquals(contentsOfFile, s3Object);
 
+        System.out.println("direct download...");
+        Response getHeaders = downloadFileNoRedirect(Integer.valueOf(fileId), apiToken);
+        for (Header header : getHeaders.getHeaders()) {
+            System.out.println("direct download header: " + header);
+        }
+        getHeaders.then().assertThat().statusCode(303);
+
+        String urlFromResponse = getHeaders.getHeader("Location");
+        String localhostDownloadUrl = urlFromResponse.replace("localstack", "localhost");
+        String decodedDownloadUrl = null;
+        try {
+            decodedDownloadUrl = URLDecoder.decode(localhostDownloadUrl, StandardCharsets.UTF_8.name());
+        } catch (UnsupportedEncodingException ex) {
+        }
+
+        Response downloadFile = downloadFromUrl(decodedDownloadUrl);
+        downloadFile.prettyPrint();
+        downloadFile.then().assertThat().statusCode(200);
+
+        String contentsOfDownloadedFile = downloadFile.getBody().asString();
+        assertEquals(contentsOfFile, contentsOfDownloadedFile);
+
         Response deleteFile = UtilIT.deleteFileApi(Integer.parseInt(fileId), apiToken);
         deleteFile.prettyPrint();
         deleteFile.then().assertThat().statusCode(200);
@@ -402,4 +432,13 @@ static Response uploadFileDirect(String url, InputStream inputStream) {
                 .put(url);
     }
 
+    static Response downloadFileNoRedirect(Integer fileId, String apiToken) {
+        return given().when().redirects().follow(false)
+                .get("/api/access/datafile/" + fileId + "?key=" + apiToken);
+    }
+
+    static Response downloadFromUrl(String url) {
+        return given().get(url);
+    }
+
 }

From 6608dc27a81a53343a4ea95a88e348a36545a06d Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 9 Nov 2023 13:14:48 -0500
Subject: [PATCH 202/546] tweak release note #6783

---
 doc/release-notes/6783-s3-tests.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/release-notes/6783-s3-tests.md b/doc/release-notes/6783-s3-tests.md
index 1febb87aaed..1b9bb400cc6 100644
--- a/doc/release-notes/6783-s3-tests.md
+++ b/doc/release-notes/6783-s3-tests.md
@@ -1,3 +1,3 @@
-Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. See S3AccessIT which executes API (end to end) tests.
+Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT.
 
-In addition, a new integration test test class (not an API test, the new kind launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running.
+In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running.

From 3d94c8f747533f770a11c85e839533caf53bd7c5 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 9 Nov 2023 13:49:39 -0500
Subject: [PATCH 203/546] mention S3 in testing docs, other cleanup #6783

---
 .../source/developers/testing.rst             | 41 ++++++++++---------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst
index abecaa09fad..fd0f227342f 100755
--- a/doc/sphinx-guides/source/developers/testing.rst
+++ b/doc/sphinx-guides/source/developers/testing.rst
@@ -190,42 +190,38 @@ Finally, run the script:
 
   $ ./ec2-create-instance.sh -g jenkins.yml -l log_dir
 
-Running the full API test suite using Docker
+Running the Full API Test Suite Using Docker
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-To run the full suite of integration tests on your laptop, running Dataverse and its dependencies in Docker, as explained in the :doc:`/container/dev-usage` section of the Container Guide.
+To run the full suite of integration tests on your laptop, we recommend running Dataverse and its dependencies in Docker, as explained in the :doc:`/container/dev-usage` section of the Container Guide. This environment provides additional services (such as S3) that are used in testing.
 
-Alternatively, you can run tests against the app server running on your laptop by following the "getting set up" steps below.
+Running the APIs Without Docker (Classic Dev Env)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Getting Set Up to Run REST Assured Tests
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+While it is possible to run a good number of API tests without using Docker in our :doc:`classic-dev-env`, we are transitioning toward including additional services (such as S3) in our Dockerized development environment (:doc:`/container/dev-usage`), so you will probably find it more convenient to it instead.
 
-Unit tests are run automatically on every build, but dev environments and servers require special setup to run REST Assured tests. In short, the Dataverse Software needs to be placed into an insecure mode that allows arbitrary users and datasets to be created and destroyed. This differs greatly from the out-of-the-box behavior of the Dataverse Software, which we strive to keep secure for sysadmins installing the software for their institutions in a production environment.
-
-The :doc:`dev-environment` section currently refers developers here for advice on getting set up to run REST Assured tests, but we'd like to add some sort of "dev" flag to the installer to put the Dataverse Software in "insecure" mode, with lots of scary warnings that this dev mode should not be used in production.
-
-The instructions below assume a relatively static dev environment on a Mac. There is a newer "all in one" Docker-based approach documented in the :doc:`/developers/containers` section under "Docker" that you may like to play with as well.
+Unit tests are run automatically on every build, but dev environments and servers require special setup to run API (REST Assured) tests. In short, the Dataverse software needs to be placed into an insecure mode that allows arbitrary users and datasets to be created and destroyed (this is done automatically in the Dockerized environment, as well as the steps described below). This differs greatly from the out-of-the-box behavior of the Dataverse software, which we strive to keep secure for sysadmins installing the software for their institutions in a production environment.
 
 The Burrito Key
 ^^^^^^^^^^^^^^^
 
-For reasons that have been lost to the mists of time, the Dataverse Software really wants you to to have a burrito. Specifically, if you're trying to run REST Assured tests and see the error "Dataverse config issue: No API key defined for built in user management", you must run the following curl command (or make an equivalent change to your database):
+For reasons that have been lost to the mists of time, the Dataverse software really wants you to to have a burrito. Specifically, if you're trying to run REST Assured tests and see the error "Dataverse config issue: No API key defined for built in user management", you must run the following curl command (or make an equivalent change to your database):
 
 ``curl -X PUT -d 'burrito' http://localhost:8080/api/admin/settings/BuiltinUsers.KEY``
 
-Without this "burrito" key in place, REST Assured will not be able to create users. We create users to create objects we want to test, such as Dataverse collections, datasets, and files.
+Without this "burrito" key in place, REST Assured will not be able to create users. We create users to create objects we want to test, such as collections, datasets, and files.
 
-Root Dataverse Collection Permissions
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Root Collection Permissions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-In your browser, log in as dataverseAdmin (password: admin) and click the "Edit" button for your root Dataverse collection. Navigate to Permissions, then the Edit Access button. Under "Who can add to this Dataverse collection?" choose "Anyone with a Dataverse installation account can add sub Dataverse collections and datasets" if it isn't set to this already.
+In your browser, log in as dataverseAdmin (password: admin) and click the "Edit" button for your root collection. Navigate to Permissions, then the Edit Access button. Under "Who can add to this collection?" choose "Anyone with a Dataverse installation account can add sub collections and datasets" if it isn't set to this already.
 
 Alternatively, this same step can be done with this script: ``scripts/search/tests/grant-authusers-add-on-root``
 
-Publish Root Dataverse Collection
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Publish Root Collection
+^^^^^^^^^^^^^^^^^^^^^^^
 
-The root Dataverse collection must be published for some of the REST Assured tests to run.
+The root collection must be published for some of the REST Assured tests to run.
 
 dataverse.siteUrl
 ^^^^^^^^^^^^^^^^^
@@ -274,15 +270,20 @@ Remember, it’s only a test (and it's not graded)! Some guidelines to bear in m
 - Assert the conditions of success / return values for each operation
   * A useful resource would be `HTTP status codes <https://www.restapitutorial.com/httpstatuscodes.html>`_
 - Let the code do the labor; automate everything that happens when you run your test file.
+- If you need to test an optional service (S3, etc.), add it to our docker compose file. See :doc:`/container/dev-usage`.
 - Just as with any development, if you’re stuck: ask for help!
 
 To execute existing integration tests on your local Dataverse installation, a helpful command line tool to use is `Maven <https://maven.apache.org/ref/3.1.0/maven-embedder/cli.html>`_. You should have Maven installed as per the `Development Environment <https://guides.dataverse.org/en/latest/developers/dev-environment.html>`_ guide, but if not it’s easily done via Homebrew: ``brew install maven``.
 
 Once installed, you may run commands with ``mvn [options] [<goal(s)>] [<phase(s)>]``.
 
-+ If you want to run just one particular API test, it’s as easy as you think:
++ If you want to run just one particular API test class:
+
+  ``mvn test -Dtest=UsersIT``
+
++ If you want to run just one particular API test method,
 
-  ``mvn test -Dtest=FileRecordJobIT``
+  ``mvn test -Dtest=UsersIT#testMergeAccounts``
 
 + To run more than one test at a time, separate by commas:
 

From d4bab0763d770d14bfd57a2f90250d11cc153b7c Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 14 Nov 2023 08:18:00 -0500
Subject: [PATCH 204/546] add S3AccessIT to the list of integration-tests #6783

---
 tests/integration-tests.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt
index bb3bc7f9ce6..3c4f7dce31f 100644
--- a/tests/integration-tests.txt
+++ b/tests/integration-tests.txt
@@ -1 +1 @@
-DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,ProvIT
+DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,ProvIT,S3AccessIT

From ff722a257c7196e29884666b2875f0ee667940c2 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 20 Nov 2023 10:55:02 -0500
Subject: [PATCH 205/546] descriptive labels for file stores #9622

---
 .../source/installation/config.rst            | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 13a7367de44..cabff6215ce 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -539,6 +539,27 @@ If you wish to change which store is used by default, you'll need to delete the
 
 It is also possible to set maximum file upload size limits per store. See the :ref:`:MaxFileUploadSizeInBytes` setting below.
 
+.. _labels-file-stores:
+
+Labels for File Stores
+++++++++++++++++++++++
+
+If you find yourself adding many file stores with various configurations such as per-file limits and direct upload, you might find it helpful to make the label descriptive.
+
+For example, instead of simply labeling an S3 store as "S3"...
+
+.. code-block:: none
+
+    ./asadmin create-jvm-options "\-Ddataverse.files.s3xl.label=S3"
+
+... you might want to include some extra information such as the example below.
+
+.. code-block:: none
+
+    ./asadmin create-jvm-options "\-Ddataverse.files.s3xl.label=S3XL, Filesize limit: 100GB, direct-upload"
+
+Please keep in mind that the UI will only show so many characters, so labels are best kept short.
+
 .. _storage-files-dir:
 
 File Storage

From 766c9c3da73a02fefa9c465c26e452cbd0d79f62 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 20 Nov 2023 13:32:25 -0500
Subject: [PATCH 206/546] #9464 add base schema as a file

---
 .../iq/dataverse/baseDatasetSchema.json       | 123 ++++++++++++++++++
 1 file changed, 123 insertions(+)
 create mode 100644 src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json

diff --git a/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json b/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json
new file mode 100644
index 00000000000..a37e216b2ea
--- /dev/null
+++ b/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json
@@ -0,0 +1,123 @@
+{
+    "$schema": "http://json-schema.org/draft-04/schema#",
+    "$defs": {
+    "field": {
+        "type": "object",
+        "required": ["typeClass", "multiple", "typeName"],
+        "properties": {
+            "value": {
+                "anyOf": [
+                    {
+                        "type": "array"
+                    },
+                    {
+                        "type": "string"
+                    },
+                    {
+                        "$ref": "#/$defs/field"
+                    }
+                ]
+            },
+            "typeClass": {
+                "type": "string"
+            },
+            "multiple": {
+                "type": "boolean"
+            },
+            "typeName": {
+                "type": "string"
+            }
+        }
+    }
+},
+"type": "object",
+"properties": {
+    "datasetVersion": {
+        "type": "object",
+        "properties": {
+           "license": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string"
+                    },
+                    "uri": {
+                        "type": "string",
+                        "format": "uri"
+                   }
+                },
+                "required": ["name", "uri"]
+            },
+            "metadataBlocks": {
+                "type": "object",
+                "properties": {
+                           "citation": {
+                            "type": "object",
+                            "properties": {
+                                "fields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/$defs/field"
+                                    },
+                                    "minItems": 5,
+                                    "allOf": [
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "title"
+                                                    }
+                                                }
+                                            }
+                                        },
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "author"
+                                                    }
+                                                }
+                                            }
+                                        },
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "datasetContact"
+                                                    }
+                                                }
+                                            }
+                                        },
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "dsDescription"
+                                                    }
+                                                }
+                                            }
+                                        },
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "subject"
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    ]
+                                }
+                            },
+                            "required": ["fields"]
+                        }
+                    },
+                    "required": ["citation"]
+                }
+            },
+            "required": ["metadataBlocks"]
+        }
+    },
+    "required": ["datasetVersion"]
+}
+

From d34e92711a118b43b90023b458116dba651fe8b0 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Mon, 20 Nov 2023 15:58:06 -0500
Subject: [PATCH 207/546] Remove the authentication annotation from info API

---
 .../harvard/iq/dataverse/api/AbstractApiBean.java | 10 ++++++++++
 .../java/edu/harvard/iq/dataverse/api/Info.java   | 15 ++++++---------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
index 027f9e0fcb1..754ea95e427 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
@@ -636,6 +636,16 @@ protected Response response(DataverseRequestHandler hdl, User user) {
         }
     }
 
+    /***
+     * The preferred way of handling a request from an open API.
+     *
+     * @param hdl handling code block.
+     * @return HTTP Response appropriate for the way {@code hdl} executed.
+     */
+    protected Response response(DataverseRequestHandler hdl) {
+        return response(hdl, null);
+    }
+
     private Response handleDataverseRequestHandlerException(Exception ex) {
         String incidentId = UUID.randomUUID().toString();
         logger.log(Level.SEVERE, "API internal error " + incidentId +": " + ex.getMessage(), ex);
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
index 0652539b595..0a3887cbcb3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
@@ -35,30 +35,27 @@ public Response getMaxEmbargoDurationInMonths() {
     }
 
     @GET
-    @AuthRequired
     @Path("version")
-    public Response getInfo(@Context ContainerRequestContext crc) {
+    public Response getInfo() {
         String versionStr = systemConfig.getVersion(true);
         String[] comps = versionStr.split("build",2);
         String version = comps[0].trim();
         JsonValue build = comps.length > 1 ? Json.createArrayBuilder().add(comps[1].trim()).build().get(0) : JsonValue.NULL;
 
         return response( req -> ok( Json.createObjectBuilder().add("version", version)
-                                                              .add("build", build)), getRequestUser(crc));
+                                                              .add("build", build)));
     }
 
     @GET
-    @AuthRequired
     @Path("server")
-    public Response getServer(@Context ContainerRequestContext crc) {
-        return response( req -> ok(JvmSettings.FQDN.lookup()), getRequestUser(crc));
+    public Response getServer() {
+        return response( req -> ok(JvmSettings.FQDN.lookup()));
     }
 
     @GET
-    @AuthRequired
     @Path("apiTermsOfUse")
-    public Response getTermsOfUse(@Context ContainerRequestContext crc) {
-        return response( req -> ok(systemConfig.getApiTermsOfUse()), getRequestUser(crc));
+    public Response getTermsOfUse() {
+        return response( req -> ok(systemConfig.getApiTermsOfUse()));
     }
 
     @GET

From 9a5f523a36b3e3aa708628a8de8fcb1bbe52565d Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Mon, 20 Nov 2023 16:03:00 -0500
Subject: [PATCH 208/546] Remove unused libraries

---
 src/main/java/edu/harvard/iq/dataverse/api/Info.java | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
index 0a3887cbcb3..5478c3be0bd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
@@ -1,6 +1,5 @@
 package edu.harvard.iq.dataverse.api;
 
-import edu.harvard.iq.dataverse.api.auth.AuthRequired;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -9,8 +8,6 @@
 import jakarta.json.JsonValue;
 import jakarta.ws.rs.GET;
 import jakarta.ws.rs.Path;
-import jakarta.ws.rs.container.ContainerRequestContext;
-import jakarta.ws.rs.core.Context;
 import jakarta.ws.rs.core.Response;
 
 @Path("info")

From c5cbe939f17fff244a88cd1adac6c0aaead7e309 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 20 Nov 2023 16:21:15 -0500
Subject: [PATCH 209/546] add performance section to dev guide #9919

Also cross-link related content.
---
 doc/sphinx-guides/source/admin/monitoring.rst |   2 +-
 doc/sphinx-guides/source/developers/index.rst |   1 +
 .../source/developers/performance.rst         | 196 ++++++++++++++++++
 .../source/developers/testing.rst             |   2 +
 doc/sphinx-guides/source/developers/tips.rst  |   2 +
 .../source/installation/advanced.rst          |   2 +
 6 files changed, 204 insertions(+), 1 deletion(-)
 create mode 100644 doc/sphinx-guides/source/developers/performance.rst

diff --git a/doc/sphinx-guides/source/admin/monitoring.rst b/doc/sphinx-guides/source/admin/monitoring.rst
index e902d5fdcc9..04fba23a3e8 100644
--- a/doc/sphinx-guides/source/admin/monitoring.rst
+++ b/doc/sphinx-guides/source/admin/monitoring.rst
@@ -1,7 +1,7 @@
 Monitoring
 ===========
 
-Once you're in production, you'll want to set up some monitoring. This page may serve as a starting point for you but you are encouraged to share your ideas with the Dataverse community!
+Once you're in production, you'll want to set up some monitoring. This page may serve as a starting point for you but you are encouraged to share your ideas with the Dataverse community! You may also be interested in the :doc:`/developers/performance` section of the Developer Guide.
 
 .. contents:: Contents:
 	:local:
diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst
index 60d97feeef9..141b19d2f89 100755
--- a/doc/sphinx-guides/source/developers/index.rst
+++ b/doc/sphinx-guides/source/developers/index.rst
@@ -21,6 +21,7 @@ Developer Guide
    documentation
    api-design
    security
+   performance
    dependencies
    debugging
    coding-style
diff --git a/doc/sphinx-guides/source/developers/performance.rst b/doc/sphinx-guides/source/developers/performance.rst
new file mode 100644
index 00000000000..aa50cd6e40c
--- /dev/null
+++ b/doc/sphinx-guides/source/developers/performance.rst
@@ -0,0 +1,196 @@
+Performance
+===========
+
+`Performance is a feature <https://blog.codinghorror.com/performance-is-a-feature/>`_ was a mantra when Stack Overflow was being developed. We endeavor to do the same with Dataverse!
+
+In this section we collect ideas and share practices for improving performance.
+
+.. contents:: |toctitle|
+        :local:
+
+Problem Statement
+-----------------
+
+Performance has always been important to the Dataverse Project, but results have been uneven. We've seen enough success in the marketplace that performance must be adequate, but internally we sometimes refer to Dataverse as a pig. 🐷
+
+Current Practices
+-----------------
+
+We've adopted a number of practices to help us maintain our current level of performance and most should absolutely continue in some form, but challenges mentioned throughout should be addressed to further improve performance.
+
+Cache When You Can
+~~~~~~~~~~~~~~~~~~
+
+The Metrics API, for example, caches values for 7 days by default. We took a look at JSR 107 (JCache - Java Temporary Caching API) in `#2100 <https://github.com/IQSS/dataverse/issues/2100>`_. We're aware of the benefits of caching.
+
+Use Async
+~~~~~~~~~
+
+We index datasets (and all objects) asynchronously. That is, we let changes persist in the database and afterward copy the data into Solr.
+
+Use a Queue
+~~~~~~~~~~~
+
+We use a JMS queue for when ingesting tabular files. We've talked about adding a queue (even `an external queue <https://github.com/IQSS/dataverse/issues/1100%23issuecomment-311341995>`_) for indexing, DOI registration, and other services.
+
+Offload Expensive Operations Outside the App Server
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When operations are computationally expensive, we have realized performance gains by offloading them to systems outside of the core code. For example, rather than having files pass through our application server when they are downloaded, we use direct download so that client machines download files directly from S3. (We use the same trick with upload.) When a client downloads multiple files, rather than zipping them within the application server as before, we now have a separate "zipper" process that does this work out of band.
+
+Drop to Raw SQL as Necessary
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We aren't shy about writing raw SQL queries when necessary. We've written `querycount <https://github.com/IQSS/dataverse/blob/v6.0/scripts/database/querycount/README.txt>`_  scripts to help identify problematic queries and mention slow query log at :doc:`/admin/monitoring`.
+
+Add Indexes to Database Tables
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There was a concerted effort in `#1880 <https://github.com/IQSS/dataverse/issues/1880>`_ to add indexes to a large number of columns, but it's something we're mindful of, generally. Perhaps we could use some better detection of when indexes would be valuable.
+
+Find Bottlenecks with a Profiler
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+VisualVM is popular and bundled with Netbeans. Many options are available including `JProfiler <https://github.com/IQSS/dataverse/pull/9413>`_.
+
+Warn Developers in Code Comments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For code that has been optimized for performance, warnings are sometimes inserted in the form of comments for future developers to prevent backsliding.
+
+Write Docs for Devs about Perf
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Like this doc. :)
+
+Sometimes perf is written about in other places, such as :ref:`avoid-efficiency-issues-with-render-logic-expressions`.
+
+Horizontal Scaling of App Server
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We've made it possible to run more than one application server, though it requires some special configuration. This way load can be spread out across multiple servers. For details, see :ref:`multiple-app-servers` in the Installation Guide.
+
+Code Review and QA
+~~~~~~~~~~~~~~~~~~
+
+Before code is merged, while it is in review or QA, if a performance problem is detected (usually on an ad hoc basis), the code is returned to the developer for improvement. Developers and reviewers typically do not have many tools at their disposal to test code changes against anything close to production data. QA maintains a machine with a copy of production data but tests against smaller data unless a performance problem is suspected.
+
+A new QA guide is coming in https://github.com/IQSS/dataverse/pull/10103
+
+Locust Testing at Release Time
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As one of the final steps in preparing for a release, QA runs performance tests using a tool called Locust as explained the Developer Guide (see :ref:`locust`). The tests are not comprehensive, testing only a handful of pages with anonymous users, but they increase confidence that the upcoming release is not drastically slower than previous releases.
+
+Issue Tracking and Prioritization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Performance issues are tracked in our issue tracker under the `Feature: Performance & Stability <https://github.com/IQSS/dataverse/issues?q=is%3Aopen+is%3Aissue+label%3A%22Feature%3A+Performance+%26+Stability%22>`_ label (e.g. `#7788 <https://github.com/IQSS/dataverse/issues/7788>`_). That way, we can track performance problems throughout the application. Unfortunately, the pain is often felt by users in production before we realize there is a problem. As needed, performance issues are prioritized to be included in a sprint, to \ `speed up the collection page <https://github.com/IQSS/dataverse/pull/8143>`_, for example.
+
+Document Performance Tools
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the :doc:`/admin/monitoring` page section of the Admin Guide we describe how to set up Munin for monitoring performance of an operating system. We also explain how to set up Performance Insights to monitor AWS RDS (PostgreSQL as a service, in our case). In the :doc:`/developers/tools` section of the Developer Guide, we have documented how to use Eclipse Memory Analyzer Tool (MAT), SonarQube, jmap, and jstat.
+
+Google Analytics
+~~~~~~~~~~~~~~~~
+
+Emails go to a subset of the team monthly with subjects like "Your September Search performance for https://dataverse.harvard.edu" with a link to a report but it's mostly about the number clicks, not how fast the site is. It's unclear if it provides any value with regard to performance.
+
+Abandoned Tools and Practices
+-----------------------------
+
+New Relic
+~~~~~~~~~
+
+For many years Harvard Dataverse was hooked up to New Relic, a tool that promises all-in-one observability, according to their `website <https://newrelic.com>`_. In practice, we didn't do much with `the data <https://github.com/IQSS/dataverse/issues/3665>`_.
+
+Areas of Particular Concern
+---------------------------
+
+Command Engine Execution Rate Metering
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We'd like to rate limit commands (CreateDataset, etc.) so that we can keep them at a reasonable level (`#9356 <https://github.com/IQSS/dataverse/issues/9356>`_). This is similar to how many APIs are rate limited, such as the GitHub API.
+
+Solr
+~~~~
+
+While in the past Solr performance hasn't been much of a concern, in recent years we've noticed performance problems when Harvard Dataverse is under load. We are investigating in `#9635 <https://github.com/IQSS/dataverse/issues/9635>`_.
+
+Datasets with Large Numbers of Files or Versions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We'd like to scale Dataverse to better handle large number of files or versions (`#9763 <https://github.com/IQSS/dataverse/issues/9763>`_).
+
+Withstanding Bots
+~~~~~~~~~~~~~~~~~
+
+Google bot, etc.
+
+Suggested Practices
+-------------------
+
+Many of our current practices should remain in place unaltered. Others could use some refinement. Some new practices should be adopted as well. Here are some suggestions.
+
+Implement the Frontend Plan for Performance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The `Dataverse - SPA MVP Definition doc <https://docs.google.com/document/d/1WnJzLeVK5eVP4_10eX6BwPAnmiamO1n2uGzcwrAsucQ/edit?usp%3Dsharing>`_  has some ideas around how to achieve good performance for the new front end in the areas of rendering, monitoring,file upload/download, pagination, and caching. We should create as many issues as necessary in the frontend repo and work on them in time. The doc recommends the use of `React Profiler <https://legacy.reactjs.org/blog/2018/09/10/introducing-the-react-profiler.html>`_ and other tools. Not mentioned is https://pagespeed.web.dev but we can investigate it as well. See also `#183 <https://github.com/IQSS/dataverse-frontend/issues/183>`_, a parent issue about performance. In `#184 <https://github.com/IQSS/dataverse-frontend/issues/184>`_  we plan to compare the performance of the old JSF UI vs. the new React UI. Cypress plugins for load testing could be investigated.
+
+Set up Query Counter in Jenkins
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+See countquery script above. See also https://jenkins.dataverse.org/job/IQSS-dataverse-develop/ws/target/query_count.out
+
+Show the plot over time. Make spikes easily apparent. 320,035 queries as of this writing.
+
+Count Database Queries per API Test
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Is it possible? Just a thought.
+
+Teach Developers How to Do Performance Testing Locally
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Do developers know how to use a profiler? Should they use `JMeter <https://github.com/BU-NU-CLOUD-SP18/Dataverse-Scaling%23our-project-video>`_? `statsd-jvm-profiler <https://github.com/etsy/statsd-jvm-profiler>`_? How do you run our :ref:`locust` tests? Should we continue using that tool? Give developers time and space to try out tools and document any tips along the way. For this stage, small data is fine.
+
+Automate Performance Testing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We are already using two excellent continuous integration (CI) tools, Jenkins and GitHub Actions, to test our code. We should add performance testing into the mix (`#4201 <https://github.com/IQSS/dataverse/issues/4201>`_ is an old issue for this but we can open a fresh one). Currently we test every commit on every PR and we should consider if this model makes sense since performance testing will likely take longer to run than regular tests. Once developers are comfortable with their favorite tools, we can pick which ones to automate.
+
+Make Production Data or Equivalent Available to Developers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If developers are only testing small amounts of data on their laptops, it's hard to detect performance problems. Not every bug fix requires access to data similar to production, but it should be made available. This is not a trivial task! If we are to use actual production data, we need to be very careful to de-identify it. If we start with our `sample-data <https://github.com/IQSS/dataverse-sample-data>`_  repo instead, we'll need to figure out how to make sure we cover cases like many files, many versions, etc.
+
+Automate Performance Testing with Production Data or Equivalent
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Hopefully the environment developers use with production data or equivalent can be made available to our CI tools. Perhaps these tests don't need to be run on every commit to every pull request, but they should be run regularly.
+
+Use Monitoring as Performance Testing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Monitoring can be seen as a form of testing. How long is a round trip ping to production? What is the Time to First Byte? First Contentful Paint? Largest Contentful Paint? Time to Interactive? We now have a beta server that we could monitor continuously to know if our app is getting faster or slower over time. Should our monitoring of production servers be improved?
+
+Learn from Training and Conferences
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Most likely there is training available that is oriented toward performance. The subject of performance often comes up at conferences as well.
+
+Learn from the Community How They Monitor Performance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some members of the Dataverse community are likely users of newish tools like the ELK stack (Elasticsearch, Logstash, and Kibana), the TICK stack (Telegraph InfluxDB Chronograph and Kapacitor), GoAccess, Prometheus, Graphite, and more we haven't even heard of. In the :doc:`/admin/monitoring` section of the Admin Guide, we already encourage the community to share findings (, but we could dedicate time to this topic at our annual meeting or community calls.
+
+Teach the Community to Do Performance Testing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We have a worldwide community of developers. We should do what we can in the form of documentation and other resources to help them develop performant code.
+
+Conclusion
+----------
+
+Given its long history, Dataverse has encountered many performance problems over the years. The core team is conversant in how to make the app more performant, but investment in learning additional tools and best practices would likely yield dividends. We should automate our performance testing, catching more problems before code is merged.
diff --git a/doc/sphinx-guides/source/developers/testing.rst b/doc/sphinx-guides/source/developers/testing.rst
index abecaa09fad..94b94eb0ab6 100755
--- a/doc/sphinx-guides/source/developers/testing.rst
+++ b/doc/sphinx-guides/source/developers/testing.rst
@@ -426,6 +426,8 @@ target/coverage-it/index.html is the place to start reading the code coverage re
 Load/Performance Testing
 ------------------------
 
+.. _locust:
+
 Locust
 ~~~~~~
 
diff --git a/doc/sphinx-guides/source/developers/tips.rst b/doc/sphinx-guides/source/developers/tips.rst
index e1ee40cafa5..764434d1896 100755
--- a/doc/sphinx-guides/source/developers/tips.rst
+++ b/doc/sphinx-guides/source/developers/tips.rst
@@ -238,6 +238,8 @@ with the following code in ``SettingsWrapper.java``:
 
 A more serious example would be direct calls to PermissionServiceBean methods used in render logic expressions. This is something that has happened and caused some problems in real life. A simple permission service lookup (for example, whether a user is authorized to create a dataset in the current dataverse) can easily take 15 database queries. Repeated multiple times, this can quickly become a measurable delay in rendering the page. PermissionsWrapper must be used exclusively for any such lookups from JSF pages.
 
+See also :doc:`performance`.
+
 ----
 
 Previous: :doc:`dev-environment` | Next: :doc:`troubleshooting`
diff --git a/doc/sphinx-guides/source/installation/advanced.rst b/doc/sphinx-guides/source/installation/advanced.rst
index 87f2a4fd0ab..3de5d0ea07c 100644
--- a/doc/sphinx-guides/source/installation/advanced.rst
+++ b/doc/sphinx-guides/source/installation/advanced.rst
@@ -7,6 +7,8 @@ Advanced installations are not officially supported but here we are at least doc
 .. contents:: |toctitle|
 	:local:
 
+.. _multiple-app-servers:
+
 Multiple App Servers
 --------------------
 

From 3e004c2c1aa7bdb720298dbd00d98d8ce48f1428 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 20 Nov 2023 17:23:09 -0500
Subject: [PATCH 210/546] set/delete apis for quotas (#8549)

---
 .../harvard/iq/dataverse/api/Dataverses.java  | 41 +++++++++++++++++++
 .../iq/dataverse/storageuse/StorageUse.java   |  3 +-
 src/main/java/propertyFiles/Bundle.properties |  4 ++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
index d0711aefa5f..6539e42b07d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
@@ -41,9 +41,11 @@
 import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.CreateRoleCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.DeleteCollectionQuotaCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionQuotaCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand;
@@ -63,6 +65,7 @@
 import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.RemoveRoleAssigneesFromExplicitGroupCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.SetCollectionQuotaCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseDefaultContributorRoleCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand;
@@ -937,6 +940,44 @@ public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam(
                 execCommand(new GetDataverseStorageSizeCommand(req, findDataverseOrDie(dvIdtf), includeCached)))), getRequestUser(crc));
     }
     
+    @GET
+    @AuthRequired
+    @Path("{identifier}/storage/quota")
+    public Response getCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) throws WrappedResponse {
+        try {
+            Long bytesAllocated = execCommand(new GetCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf)));
+            if (bytesAllocated != null) {
+                return ok(MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.storage.quota.allocation"),bytesAllocated));
+            }
+            return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.notdefined"));
+        } catch (WrappedResponse ex) {
+            return ex.getResponse();
+        }
+    }
+    
+    @POST
+    @AuthRequired
+    @Path("{identifier}/storage/quota/{bytesAllocated}")
+    public Response setCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @PathParam("bytesAllocated") Long bytesAllocated) throws WrappedResponse {
+        try {
+            execCommand(new SetCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf), bytesAllocated));
+            return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.updated"));
+        } catch (WrappedResponse ex) {
+            return ex.getResponse();
+        }
+    }
+    
+    @DELETE
+    @AuthRequired
+    @Path("{identifier}/storage/quota")
+    public Response deleteCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) throws WrappedResponse {
+        try {
+            execCommand(new DeleteCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf)));
+            return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.deleted"));
+        } catch (WrappedResponse ex) {
+            return ex.getResponse();
+        }
+    }
     
     @GET
     @AuthRequired
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
index 2633e3e026b..c91ff69b0ae 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
@@ -23,7 +23,8 @@
  */
 @NamedQueries({
     @NamedQuery(name = "StorageUse.findByteSizeByDvContainerId",query = "SELECT su.sizeInBytes FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "),
-    @NamedQuery(name = "StorageUse.findByDvContainerId",query = "SELECT su FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId ")
+    @NamedQuery(name = "StorageUse.findByDvContainerId",query = "SELECT su FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "),
+    @NamedQuery(name = "StorageUse.incrementByteSizeByDvContainerId", query = "UPDATE StorageUse SET sizeInBytes = (sizeInBytes + :fileSize) WHERE dvObjectContainer.id=dv.ObjectId")
 })
 @Entity
 public class StorageUse implements Serializable {
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 79887f7e76c..c5b94b7be58 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -921,6 +921,10 @@ dataverse.update.failure=This dataverse was not able to be updated.
 dataverse.selected=Selected
 dataverse.listing.error=Fatal error trying to list the contents of the dataverse. Please report this error to the Dataverse administrator.
 dataverse.datasize=Total size of the files stored in this dataverse: {0} bytes
+dataverse.storage.quota.allocation=Total quota allocation for this collection: {0} bytes
+dataverse.storage.quota.notdefined=No quota defined for this collection
+dataverse.storage.quota.updated=Storage quota successfully updated for the collection
+dataverse.storage.quota.deleted=Storage quota successfully disabled the collection
 dataverse.datasize.ioerror=Fatal IO error while trying to determine the total size of the files stored in the dataverse. Please report this error to the Dataverse administrator.
 dataverse.inherited=(inherited from enclosing Dataverse)
 dataverse.default=(Default)

From fb952a20720505e432196a77254b19947cc623a6 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 20 Nov 2023 17:25:07 -0500
Subject: [PATCH 211/546] more commands (#8549)

---
 .../impl/DeleteCollectionQuotaCommand.java    | 49 +++++++++++++++
 .../impl/SetCollectionQuotaCommand.java       | 62 +++++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
new file mode 100644
index 00000000000..5fcbad929a9
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
@@ -0,0 +1,49 @@
+/*
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
+ */
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author landreev
+ */
+@RequiredPermissions(Permission.ManageDataversePermissions)
+public class DeleteCollectionQuotaCommand  extends AbstractVoidCommand {
+
+    private static final Logger logger = Logger.getLogger(DeleteCollectionQuotaCommand.class.getCanonicalName());
+    
+    private final Dataverse targetDataverse;
+    
+    public DeleteCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) {
+        super(aRequest, target);
+        targetDataverse = target;
+    } 
+        
+    @Override
+    public void executeImpl(CommandContext ctxt) throws CommandException {
+        if (targetDataverse == null) {
+            throw new IllegalCommandException("", this);
+        }
+        
+        StorageQuota storageQuota = targetDataverse.getStorageQuota();
+        
+        if (storageQuota != null && storageQuota.getAllocation() != null) {
+            storageQuota.setAllocation(null);
+            ctxt.em().merge(storageQuota);
+            ctxt.em().flush();
+        } 
+        // ... and if no quota was enabled on the collection - nothing to do = success
+    }    
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
new file mode 100644
index 00000000000..a134cbefdb9
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
@@ -0,0 +1,62 @@
+/*
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
+ */
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author landreev
+ */
+@RequiredPermissions(Permission.ManageDataversePermissions)
+public class SetCollectionQuotaCommand  extends AbstractVoidCommand {
+
+    private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName());
+    
+    private final Dataverse dataverse;
+    private final Long allocation; 
+    
+    public SetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target, Long allocation) {
+        super(aRequest, target);
+        dataverse = target;
+        this.allocation = allocation; 
+    } 
+        
+    @Override
+    public void executeImpl(CommandContext ctxt) throws CommandException {
+        
+        if (dataverse == null) {
+            throw new IllegalCommandException("", this);
+        }
+        
+        if (allocation == null) {
+            throw new IllegalCommandException("", this);
+        }
+        
+        StorageQuota storageQuota = dataverse.getStorageQuota();
+        
+        if (storageQuota != null) {
+            storageQuota.setAllocation(allocation);
+            ctxt.em().merge(storageQuota);
+        } else {
+            storageQuota = new StorageQuota(); 
+            storageQuota.setDefinitionPoint(dataverse);
+            storageQuota.setAllocation(allocation);
+            dataverse.setStorageQuota(storageQuota);
+            ctxt.em().persist(storageQuota);
+        }
+        ctxt.em().flush();
+    }    
+}

From 233da54f29b8a741d65fcb54e2f394a98485d6a5 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 21 Nov 2023 10:51:10 -0500
Subject: [PATCH 212/546] add to API changelog #10104

---
 doc/sphinx-guides/source/api/changelog.rst  | 11 +++++++++--
 doc/sphinx-guides/source/api/native-api.rst |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index d6742252d27..1726736e75c 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -5,9 +5,16 @@ API Changelog
     :local:
     :depth: 1
 
+6.1
+---
+
+Changes
+~~~~~~~
+- **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`.
+
 6.0
------
+---
 
 Changes
 ~~~~~~~
-- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch.
+- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. See :doc:`dataaccess`.
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 2e3a0b2af08..2713580f238 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -2491,6 +2491,8 @@ Get Dataset By Private URL Token
 
   curl "$SERVER_URL/api/datasets/privateUrlDatasetVersion/$PRIVATE_URL_TOKEN"
 
+.. _get-citation:
+
 Get Citation
 ~~~~~~~~~~~~
 

From d720aeaaffce7a9196769789dc888f6ea05c193a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 21 Nov 2023 11:57:14 -0500
Subject: [PATCH 213/546] rename flyway script #10093

---
 ...id_update.sql => V6.0.0.4__10093-privateurluser_id_update.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V6.0.0.3__10093-privateurluser_id_update.sql => V6.0.0.4__10093-privateurluser_id_update.sql} (100%)

diff --git a/src/main/resources/db/migration/V6.0.0.3__10093-privateurluser_id_update.sql b/src/main/resources/db/migration/V6.0.0.4__10093-privateurluser_id_update.sql
similarity index 100%
rename from src/main/resources/db/migration/V6.0.0.3__10093-privateurluser_id_update.sql
rename to src/main/resources/db/migration/V6.0.0.4__10093-privateurluser_id_update.sql

From c82faf967d62dc7e076d59885c77548cc90e206b Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 21 Nov 2023 13:46:36 -0500
Subject: [PATCH 214/546] #9464 fix formatting

---
 .../dataverse/metrics/MetricsServiceBean.java | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
index 832dda5ced9..ff283bcda80 100644
--- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
@@ -207,17 +207,17 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
         // A published local datasets may have more than one released version!
         // So that's why we have to jump through some extra hoops below
         // in order to select the latest one:
-        String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" +
-                "(\n" +
-                "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" +
-                "       from datasetversion\n" +
-                "       join dataset on dataset.id = datasetversion.dataset_id\n" +
-                "       join dvobject on dataset.id = dvobject.id\n" +
-                "       where versionstate='RELEASED'\n" +
-                "       	     and dvobject.harvestingclient_id is null\n" +
-                "       	     and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
-                "       group by dataset_id\n" +
-                "))\n";
+        String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n"
+                + "(\n"
+                + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
+                + "       from datasetversion\n"
+                + "       join dataset on dataset.id = datasetversion.dataset_id\n"
+                + "       join dvobject on dataset.id = dvobject.id\n"
+                + "       where versionstate='RELEASED'\n"
+                + "       	     and dvobject.harvestingclient_id is null\n"
+                + "       	     and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
+                + "       group by dataset_id\n"
+                + "))\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated

From 44a07a31b8baf84fd6bfd5fdb846d0976a52f1aa Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 21 Nov 2023 13:54:39 -0500
Subject: [PATCH 215/546] #9464 more code cleanup

---
 .../dataverse/metrics/MetricsServiceBean.java | 54 +++++++++----------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
index ff283bcda80..2b62c6cd9a7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
@@ -51,7 +51,7 @@ public class MetricsServiceBean implements Serializable {
 
     /** Dataverses */
 
-    
+
     public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) {
         Query query = em.createNativeQuery(""
                 + "select distinct to_char(date_trunc('month', dvobject.publicationdate),'YYYY-MM') as month, count(date_trunc('month', dvobject.publicationdate))\n"
@@ -64,7 +64,7 @@ public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) {
         List<Object[]> results = query.getResultList();
         return MetricsUtil.timeSeriesToJson(results);
     }
-    
+
     /**
      * @param yyyymm Month in YYYY-MM format.
      * @param d
@@ -129,9 +129,9 @@ public List<Object[]> dataversesBySubject(Dataverse d) {
 
     /** Datasets */
 
-    
+
     public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dataverse d) {
-        Query query = em.createNativeQuery(                
+        Query query = em.createNativeQuery(
                 "select distinct date, count(dataset_id)\n"
                 + "from (\n"
                 + "select min(to_char(COALESCE(releasetime, createtime), 'YYYY-MM')) as date, dataset_id\n"
@@ -149,8 +149,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
         List<Object[]> results = query.getResultList();
         return MetricsUtil.timeSeriesToJson(results);
     }
-    
-    
+
+
     /**
      * @param yyyymm Month in YYYY-MM format.
      * @param d
@@ -180,10 +180,10 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
         // But do not use this notation if you need the values returned to
         // meaningfully identify the datasets!
 
-  
+
         Query query = em.createNativeQuery(
-                        
-                        
+
+
                 "select count(*)\n"
                         + "from (\n"
                         + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
@@ -214,7 +214,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 + "       join dataset on dataset.id = datasetversion.dataset_id\n"
                 + "       join dvobject on dataset.id = dvobject.id\n"
                 + "       where versionstate='RELEASED'\n"
-                + "       	     and dvobject.harvestingclient_id is null\n"
+                + "       	     and dvobject.harvestingclient_id is null"
                 + "       	     and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
                 + "       group by dataset_id\n"
                 + "))\n";
@@ -313,7 +313,7 @@ public JsonArray filesTimeSeries(Dataverse d) {
         return MetricsUtil.timeSeriesToJson(results);
     }
 
-    
+
     /**
      * @param yyyymm Month in YYYY-MM format.
      * @param d
@@ -390,7 +390,7 @@ public JsonArray filesByType(Dataverse d) {
         return jab.build();
 
     }
-    
+
     public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) {
         Query query = em.createNativeQuery("SELECT DISTINCT to_char(" + (published ? "ob.publicationdate" : "ob.createdate") + ",'YYYY-MM') as date, df.contenttype, count(df.id), coalesce(sum(df.filesize),0) "
                 + " FROM DataFile df, DvObject ob"
@@ -403,13 +403,13 @@ public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) {
         logger.log(Level.FINE, "Metric query: {0}", query);
         List<Object[]> results = query.getResultList();
         return MetricsUtil.timeSeriesByTypeToJson(results);
-        
+
     }
-    /** Downloads 
+    /** Downloads
      * @param d
      * @throws ParseException */
 
-    
+
     public JsonArray downloadsTimeSeries(Dataverse d) {
         // ToDo - published only?
         Query earlyDateQuery = em.createNativeQuery(""
@@ -433,11 +433,11 @@ public JsonArray downloadsTimeSeries(Dataverse d) {
         List<Object[]> results = query.getResultList();
         return MetricsUtil.timeSeriesToJson(results);
     }
-    
+
     /*
      * This includes getting historic download without a timestamp if query
      * is earlier than earliest timestamped record
-     * 
+     *
      * @param yyyymm Month in YYYY-MM format.
      */
     public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException {
@@ -460,7 +460,7 @@ public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException {
                         + "where (date_trunc('month', responsetime) <=  to_date('" + yyyymm + "','YYYY-MM')"
                         + "or responsetime is NULL)\n" // includes historic guestbook records without date
                         + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n"
-                    + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") 
+                    + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");")
                 );
                 logger.log(Level.FINE, "Metric query: {0}", query);
                 return (long) query.getSingleResult();
@@ -488,7 +488,7 @@ public long downloadsPastDays(int days, Dataverse d) {
 
         return (long) query.getSingleResult();
     }
-    
+
     public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) {
         Query query = em.createNativeQuery("select distinct to_char(gb.responsetime, 'YYYY-MM') as date, ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") "
                 + " FROM guestbookresponse gb, DvObject ob"
@@ -502,7 +502,7 @@ public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) {
         return MetricsUtil.timeSeriesByIDAndPIDToJson(results);
 
     }
-    
+
     public JsonArray fileDownloads(String yyyymm, Dataverse d, boolean uniqueCounts) {
         Query query = em.createNativeQuery("select ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") "
                 + " FROM guestbookresponse gb, DvObject ob"
@@ -544,7 +544,7 @@ public JsonArray uniqueDownloadsTimeSeries(Dataverse d) {
         return MetricsUtil.timeSeriesByPIDToJson(results);
 
     }
-    
+
     public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) {
 
     //select distinct count(distinct email),dataset_id, date_trunc('month', responsetime)  from guestbookresponse group by dataset_id, date_trunc('month',responsetime) order by dataset_id,date_trunc('month',responsetime);
@@ -572,10 +572,10 @@ public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) {
         return jab.build();
 
     }
-    
-    //MDC 
-    
-    
+
+    //MDC
+
+
     public JsonArray mdcMetricTimeSeries(MetricType metricType, String country, Dataverse d) {
         Query query = em.createNativeQuery("SELECT distinct substring(monthyear from 1 for 7) as date, coalesce(sum(" + metricType.toString() + "),0) as count FROM DatasetMetrics\n"
                 + ((d == null) ? "" : "WHERE dataset_id in ( " + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ")\n")
@@ -747,7 +747,7 @@ public Metric getMetric(String name, String dataLocation, String dayString, Data
     // https://github.com/DANS-KNAW/dataverse/blob/dans-develop/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsDansServiceBean.java
 
     /**
-     * 
+     *
      * @param dvId - parent dataverse id
      * @param dtype - type of object to return 'Dataverse' or 'Dataset'
      * @return - list of objects of specified type included in the subtree (includes parent dataverse if dtype is 'Dataverse')
@@ -769,7 +769,7 @@ private String getCommaSeparatedIdStringForSubtree(Dataverse d, String dtype) {
     }
 
     private List<Integer> getChildrenIdsRecursively(Long dvId, String dtype, DatasetVersion.VersionState versionState) {
-        
+
         //Intended to be called only with dvId != null
         String sql = "WITH RECURSIVE querytree AS (\n"
                 + "     SELECT id, dtype, owner_id, publicationdate\n"

From 7d687e9e06f9a70724650b13f91eab5a2a767e97 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 21 Nov 2023 13:59:38 -0500
Subject: [PATCH 216/546] #9464 third time's the charm?

---
 .../edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
index 2b62c6cd9a7..a50797fe443 100644
--- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
@@ -214,8 +214,8 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 + "       join dataset on dataset.id = datasetversion.dataset_id\n"
                 + "       join dvobject on dataset.id = dvobject.id\n"
                 + "       where versionstate='RELEASED'\n"
-                + "       	     and dvobject.harvestingclient_id is null"
-                + "       	     and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
+                + "       and dvobject.harvestingclient_id is null"
+                + "       and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
                 + "       group by dataset_id\n"
                 + "))\n";
 

From 1c2a20d940632da96c9d56c1bb40367c7902b398 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 21 Nov 2023 14:25:26 -0500
Subject: [PATCH 217/546] fix formatting (indentation) #10093

---
 .../iq/dataverse/api/auth/SignedUrlAuthMechanism.java       | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java
index e2c2f2381d8..258661f6495 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java
@@ -61,9 +61,9 @@ private User getAuthenticatedUserFromSignedUrl(ContainerRequestContext container
         String userId = uriInfo.getQueryParameters().getFirst(SIGNED_URL_USER);
         User targetUser = null; 
         ApiToken userApiToken = null;
-        if(!userId.startsWith(PrivateUrlUser.PREFIX)) {
-        targetUser = authSvc.getAuthenticatedUser(userId);
-        userApiToken = authSvc.findApiTokenByUser((AuthenticatedUser)targetUser);
+        if (!userId.startsWith(PrivateUrlUser.PREFIX)) {
+            targetUser = authSvc.getAuthenticatedUser(userId);
+            userApiToken = authSvc.findApiTokenByUser((AuthenticatedUser) targetUser);
         } else {
             PrivateUrl privateUrl = privateUrlSvc.getPrivateUrlFromDatasetId(Long.parseLong(userId.substring(PrivateUrlUser.PREFIX.length())));
             userApiToken = new ApiToken();

From 949b0885f84d953ab51cc458b4f61c5d376b12a9 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 21 Nov 2023 14:25:59 -0500
Subject: [PATCH 218/546] give Signed URLs a "ref" in the guides, link to it
 #10093

---
 doc/sphinx-guides/source/api/auth.rst         |  5 ++++
 .../source/api/external-tools.rst             | 26 ++++++++++++-------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/doc/sphinx-guides/source/api/auth.rst b/doc/sphinx-guides/source/api/auth.rst
index eced7afbbcf..eae3bd3c969 100644
--- a/doc/sphinx-guides/source/api/auth.rst
+++ b/doc/sphinx-guides/source/api/auth.rst
@@ -80,3 +80,8 @@ To test if bearer tokens are working, you can try something like the following (
   export TOKEN=`curl -s -X POST --location "http://keycloak.mydomain.com:8090/realms/test/protocol/openid-connect/token" -H "Content-Type: application/x-www-form-urlencoded" -d "username=user&password=user&grant_type=password&client_id=test&client_secret=94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8" | jq '.access_token' -r | tr -d "\n"`
   
   curl -H "Authorization: Bearer $TOKEN" http://localhost:8080/api/users/:me
+
+Signed URLs
+-----------
+
+See :ref:`signed-urls`.
diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst
index d802bc8714a..ae0e44b36aa 100644
--- a/doc/sphinx-guides/source/api/external-tools.rst
+++ b/doc/sphinx-guides/source/api/external-tools.rst
@@ -160,17 +160,25 @@ Authorization Options
 
 When called for datasets or data files that are not public (i.e. in a draft dataset or for a restricted file), external tools are allowed access via the user's credentials. This is accomplished by one of two mechanisms:
 
-* Signed URLs (more secure, recommended)
+.. _signed-urls:
 
-  - Configured via the ``allowedApiCalls`` section of the manifest. The tool will be provided with signed URLs allowing the specified access to the given dataset or datafile for the specified amount of time. The tool will not be able to access any other datasets or files the user may have access to and will not be able to make calls other than those specified.
-  - For tools invoked via a GET call, Dataverse will include a callback query parameter with a Base64 encoded value. The decoded value is a signed URL that can be called to retrieve a JSON response containing all of the queryParameters and allowedApiCalls specified in the manfiest.
-  - For tools invoked via POST, Dataverse will send a JSON body including the requested queryParameters and allowedApiCalls. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool.
+Signed URLs
+^^^^^^^^^^^
 
-* API Token (deprecated, less secure, not recommended)
+The signed URL mechanism is more secure than exposing API tokens and therefore recommended.
 
-  - Configured via the ``queryParameters`` by including an ``{apiToken}`` value. When this is present Dataverse will send the user's apiToken to the tool. With the user's API token, the tool can perform any action via the Dataverse API that the user could. External tools configured via this method should be assessed for their trustworthiness.
-  - For tools invoked via GET, this will be done via a query parameter in the request URL which could be cached in the browser's history. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool.
-  - For tools invoked via POST, Dataverse will send a JSON body including the apiToken.
+- Configured via the ``allowedApiCalls`` section of the manifest. The tool will be provided with signed URLs allowing the specified access to the given dataset or datafile for the specified amount of time. The tool will not be able to access any other datasets or files the user may have access to and will not be able to make calls other than those specified.
+- For tools invoked via a GET call, Dataverse will include a callback query parameter with a Base64 encoded value. The decoded value is a signed URL that can be called to retrieve a JSON response containing all of the queryParameters and allowedApiCalls specified in the manfiest.
+- For tools invoked via POST, Dataverse will send a JSON body including the requested queryParameters and allowedApiCalls. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool.
+
+API Token
+^^^^^^^^^
+
+The API token mechanism is deprecated. Because it is less secure than signed URLs, it is not recommended for new external tools.
+
+- Configured via the ``queryParameters`` by including an ``{apiToken}`` value. When this is present Dataverse will send the user's apiToken to the tool. With the user's API token, the tool can perform any action via the Dataverse API that the user could. External tools configured via this method should be assessed for their trustworthiness.
+- For tools invoked via GET, this will be done via a query parameter in the request URL which could be cached in the browser's history. Dataverse expects the response to the POST to indicate a redirect which Dataverse will use to open the tool.
+- For tools invoked via POST, Dataverse will send a JSON body including the apiToken.
 
 Internationalization of Your External Tool
 ++++++++++++++++++++++++++++++++++++++++++
@@ -187,7 +195,7 @@ Using Example Manifests to Get Started
 ++++++++++++++++++++++++++++++++++++++
 
 Again, you can use :download:`fabulousFileTool.json <../_static/installation/files/root/external-tools/fabulousFileTool.json>` or :download:`dynamicDatasetTool.json <../_static/installation/files/root/external-tools/dynamicDatasetTool.json>` as a starting point for your own manifest file.
-Additional working examples, including ones using signedUrls, are available at https://github.com/gdcc/dataverse-previewers .
+Additional working examples, including ones using :ref:`signed-urls`, are available at https://github.com/gdcc/dataverse-previewers .
 
 Testing Your External Tool
 --------------------------

From 551194671f69b2bc9a400a1dc5e52926f0867c8e Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 21 Nov 2023 14:26:44 -0500
Subject: [PATCH 219/546] tweak release note, add issue numbers #10093 #10045

---
 doc/release-notes/10093-signedUrl_improvements.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/release-notes/10093-signedUrl_improvements.md b/doc/release-notes/10093-signedUrl_improvements.md
index 8f6ae89f981..26a17c65e3f 100644
--- a/doc/release-notes/10093-signedUrl_improvements.md
+++ b/doc/release-notes/10093-signedUrl_improvements.md
@@ -1,5 +1,5 @@
 A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended.
 
-SignedUrls can now be used with PrivateUrl access, i.e. allowing PrivateUrl users to view Previews when they are configured to use signedUrls
+SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093.
 
-Launching a Dataset-level Configuration tool will automatically generate an api key when needed. This is consistent with how other types of tools work.
+Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045.

From 31e1a4fede34960671f886e1c4fdfee51a7ffc25 Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Tue, 21 Nov 2023 14:26:58 -0500
Subject: [PATCH 220/546] Add validation for entered coordinate values in
 Geospatial metadata

---
 .../dataverse/DatasetFieldValueValidator.java | 84 ++++++++++++++++++-
 .../iq/dataverse/search/IndexServiceBean.java | 34 ++------
 src/main/java/propertyFiles/Bundle.properties |  1 +
 .../DatasetFieldValueValidatorTest.java       | 11 +++
 .../search/IndexServiceBeanTest.java          | 50 ++++++++---
 5 files changed, 138 insertions(+), 42 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
index b6c21014f04..7941fa5efa1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
@@ -8,9 +8,7 @@
 import edu.harvard.iq.dataverse.DatasetFieldType.FieldType;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.GregorianCalendar;
+import java.util.*;
 import java.util.logging.Logger;
 import java.util.regex.Pattern;
 import jakarta.validation.ConstraintValidator;
@@ -34,7 +32,6 @@ public void initialize(ValidateDatasetFieldType constraintAnnotation) {
     }
 
     public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext context) {
-
         context.disableDefaultConstraintViolation(); // we do this so we can have different messages depending on the different issue
 
         boolean lengthOnly = false;
@@ -55,6 +52,33 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte
             return true;
         }
 
+        // verify no junk in individual fields and values are within range
+        if (dsfType.getName() != null && (dsfType.getName().equals(DatasetFieldConstant.northLatitude) || dsfType.getName().equals(DatasetFieldConstant.southLatitude) ||
+                dsfType.getName().equals(DatasetFieldConstant.westLongitude) || dsfType.getName().equals(DatasetFieldConstant.eastLongitude))) {
+            try {
+                verifyBoundingBoxCoordinatesWithinRange(dsfType.getName(), value.getValue());
+            } catch (IllegalArgumentException iae) {
+                    try {
+                        context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + "  " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEntry")).addConstraintViolation();
+                    } catch (NullPointerException e) {
+                    }
+                    return false;
+            }
+        }
+
+        // validate fields that are siblings and depend on each others values
+        if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null) {
+            Optional<String> failureMessage = validateChildConstraints(value.getDatasetField());
+            if (failureMessage.isPresent()) {
+                try {
+                    context.buildConstraintViolationWithTemplate(dsfType.getParentDatasetFieldType().getDisplayName() +  "  " +
+                            BundleUtil.getStringFromBundle(failureMessage.get()) ).addConstraintViolation();
+                } catch (NullPointerException npe) {
+                }
+                return false;
+            }
+        }
+
         if (fieldType.equals(FieldType.TEXT) && !lengthOnly && value.getDatasetField().getDatasetFieldType().getValidationFormat() != null) {
             boolean valid = value.getValue().matches(value.getDatasetField().getDatasetFieldType().getValidationFormat());
             if (!valid) {
@@ -216,4 +240,56 @@ public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) {
         return pattern.matcher(userInput).matches();
     }
 
+    // Validate child fields against each other and return failure message or null if success
+    public Optional<String> validateChildConstraints(DatasetField dsf) {
+        final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : "";
+        Optional<String> returnFailureMessage = Optional.empty();
+
+        // Validate Child Constraint for Geospatial Bounding Box
+        // validate the four points of the box to insure proper layout
+        // only validate on one value for speed. picked northLatitude since it was at the bottom of the UI
+        if (fieldName.equals(DatasetFieldConstant.northLatitude)) {
+            final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates";
+            DatasetFieldCompoundValue cv = dsf.getParentDatasetFieldCompoundValue();
+            List<DatasetField> cdsf = cv.getChildDatasetFields();
+            try {
+                if (cdsf.size() == 4) {
+                    if (!validateBoundingBox(cdsf.get(0).getValue(), cdsf.get(1).getValue(), cdsf.get(2).getValue(), cdsf.get(3).getValue())) {
+                        returnFailureMessage = Optional.of(failureMessage);
+                    }
+                }
+            } catch (IllegalArgumentException e) { // IllegalArgumentException NumberFormatException
+                returnFailureMessage = Optional.of(failureMessage);
+            }
+        }
+
+        return returnFailureMessage;
+    }
+
+    public static boolean validateBoundingBox(final String westLon, final String eastLon, final String northLat, final String southLat) {
+        boolean returnVal = false;
+
+        try {
+            Float west = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.westLongitude, westLon);
+            Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon);
+            Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat);
+            Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat);
+            returnVal = east < west && south < north;
+        } catch (IllegalArgumentException e) {
+            returnVal = false;
+        }
+
+        return returnVal;
+    }
+
+    private static Float verifyBoundingBoxCoordinatesWithinRange(final String name, final String value) throws IllegalArgumentException {
+        int max = name.equals(DatasetFieldConstant.westLongitude) || name.equals(DatasetFieldConstant.eastLongitude) ? 180 : 90;
+        int min = max * -1;
+
+        final Float returnVal = value != null ? Float.parseFloat(value) : min; // defaults to min if value is missing
+        if (returnVal < min || returnVal > max) {
+            throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal, min, max));
+        }
+        return returnVal;
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
index d6d0be7a17b..8138a6f414f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java
@@ -1,27 +1,6 @@
 package edu.harvard.iq.dataverse.search;
 
-import edu.harvard.iq.dataverse.ControlledVocabularyValue;
-import edu.harvard.iq.dataverse.DataFile;
-import edu.harvard.iq.dataverse.DataFileServiceBean;
-import edu.harvard.iq.dataverse.DataFileTag;
-import edu.harvard.iq.dataverse.Dataset;
-import edu.harvard.iq.dataverse.DatasetField;
-import edu.harvard.iq.dataverse.DatasetFieldCompoundValue;
-import edu.harvard.iq.dataverse.DatasetFieldConstant;
-import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
-import edu.harvard.iq.dataverse.DatasetFieldType;
-import edu.harvard.iq.dataverse.DatasetLinkingServiceBean;
-import edu.harvard.iq.dataverse.DatasetServiceBean;
-import edu.harvard.iq.dataverse.DatasetVersion;
-import edu.harvard.iq.dataverse.Dataverse;
-import edu.harvard.iq.dataverse.DataverseLinkingServiceBean;
-import edu.harvard.iq.dataverse.DataverseServiceBean;
-import edu.harvard.iq.dataverse.DvObject;
-import edu.harvard.iq.dataverse.DvObjectServiceBean;
-import edu.harvard.iq.dataverse.Embargo;
-import edu.harvard.iq.dataverse.FileMetadata;
-import edu.harvard.iq.dataverse.GlobalId;
-import edu.harvard.iq.dataverse.PermissionServiceBean;
+import edu.harvard.iq.dataverse.*;
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean;
 import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
@@ -35,6 +14,7 @@
 import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.StringUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -1072,13 +1052,17 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set<Long
                             if(maxNorthLat==null || Float.parseFloat(maxNorthLat) < Float.parseFloat(northLat)) {
                                 maxNorthLat=northLat;
                             }
-                            //W, E, N, S
-                            solrInputDocument.addField(SearchFields.GEOLOCATION, "ENVELOPE(" + westLon + "," + eastLon + "," + northLat + "," + southLat + ")");
+
+                            if (DatasetFieldValueValidator.validateBoundingBox(westLon, eastLon, northLat, southLat)) {
+                                //W, E, N, S
+                                solrInputDocument.addField(SearchFields.GEOLOCATION, "ENVELOPE(" + westLon + "," + eastLon + "," + northLat + "," + southLat + ")");
+                            }
                         }
                     }
                     //Only one bbox per dataset
                     //W, E, N, S
-                    if ((minWestLon != null || maxEastLon != null) && (maxNorthLat != null || minSouthLat != null)) {
+                    if (DatasetFieldValueValidator.validateBoundingBox(minWestLon, maxEastLon, maxNorthLat, minSouthLat) &&
+                            (minWestLon != null || maxEastLon != null) && (maxNorthLat != null || minSouthLat != null)) {
                         solrInputDocument.addField(SearchFields.BOUNDING_BOX, "ENVELOPE(" + minWestLon + "," + maxEastLon + "," + maxNorthLat + "," + minSouthLat + ")");
                     }
 
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 79887f7e76c..15b0ebb8020 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -1623,6 +1623,7 @@ dataset.metadata.alternativePersistentId.tip=A previously used persistent identi
 dataset.metadata.invalidEntry=is not a valid entry.
 dataset.metadata.invalidDate=is not a valid date. "yyyy" is a supported format.
 dataset.metadata.invalidNumber=is not a valid number.
+dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. West must be greater than East and North must be greater than South.
 dataset.metadata.invalidInteger=is not a valid integer.
 dataset.metadata.invalidURL=is not a valid URL.
 dataset.metadata.invalidEmail=is not a valid email address.
diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
index f4af88818a5..2729e5b042a 100644
--- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
@@ -213,4 +213,15 @@ public void testInvalidEmail() {
             assertTrue(c.getMessage().contains("email"));
         });
     }
+    @Test
+    public void testBoundingBoxValidity() {
+        // valid tests
+        assertTrue(DatasetFieldValueValidator.validateBoundingBox("180", "-180", "90", "-90"));
+        assertTrue(DatasetFieldValueValidator.validateBoundingBox("180", "-180", "90", null));
+
+        // invalid tests
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox("180", "-180", "90", "junk"));
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox("40", "45", "90", "0"));
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox("360", "0", "90", "-90"));
+    }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java
index ce6005a3d11..a0330e1fcad 100644
--- a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java
@@ -1,17 +1,7 @@
 package edu.harvard.iq.dataverse.search;
 
-import edu.harvard.iq.dataverse.ControlledVocabularyValue;
-import edu.harvard.iq.dataverse.DOIServiceBean;
-import edu.harvard.iq.dataverse.Dataset;
-import edu.harvard.iq.dataverse.DatasetField;
-import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
-import edu.harvard.iq.dataverse.DatasetFieldType;
-import edu.harvard.iq.dataverse.DatasetVersion;
-import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.*;
 import edu.harvard.iq.dataverse.Dataverse.DataverseType;
-import edu.harvard.iq.dataverse.DataverseServiceBean;
-import edu.harvard.iq.dataverse.GlobalId;
-import edu.harvard.iq.dataverse.MetadataBlock;
 import edu.harvard.iq.dataverse.branding.BrandingUtil;
 import edu.harvard.iq.dataverse.mocks.MocksFactory;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
@@ -21,6 +11,7 @@
 import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.common.SolrInputDocument;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.ExtendWith;
@@ -30,8 +21,7 @@
 import org.mockito.junit.jupiter.MockitoExtension;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.Set;
+import java.util.*;
 import java.util.logging.Logger;
 import java.util.stream.Collectors;
 
@@ -108,6 +98,40 @@ public void TestIndexing() throws SolrServerException, IOException {
         assertTrue(indexedFields.contains("language"));
     }
 
+    @Test
+    public void testValidateBoundingBox() throws SolrServerException, IOException {
+        final IndexableDataset indexableDataset = createIndexableDataset();
+        final DatasetVersion datasetVersion = indexableDataset.getDatasetVersion();
+        DatasetField dsf = new DatasetField();
+        DatasetFieldType dsft = new DatasetFieldType(DatasetFieldConstant.geographicBoundingBox, DatasetFieldType.FieldType.TEXT, true);
+        dsf.setDatasetFieldType(dsft);
+
+        List<DatasetFieldCompoundValue> vals = new LinkedList<>();
+        DatasetFieldCompoundValue val = new DatasetFieldCompoundValue();
+        val.setParentDatasetField(dsf);
+        val.setChildDatasetFields(Arrays.asList(
+                constructBoundingBoxValue(DatasetFieldConstant.westLongitude, "34.8"),
+                constructBoundingBoxValue(DatasetFieldConstant.eastLongitude, "34.9"), // bad value. must be less than west
+                constructBoundingBoxValue(DatasetFieldConstant.northLatitude, "34.2"),
+                constructBoundingBoxValue(DatasetFieldConstant.southLatitude, "34.1")
+        ));
+        vals.add(val);
+        dsf.setDatasetFieldCompoundValues(vals);
+        datasetVersion.getDatasetFields().add(dsf);
+
+        final SolrInputDocuments docs = indexService.toSolrDocs(indexableDataset, null);
+        Optional<SolrInputDocument> doc = docs.getDocuments().stream().findFirst();
+        assertTrue(doc.isPresent());
+        assertTrue(!doc.get().containsKey("geolocation"));
+        assertTrue(!doc.get().containsKey("boundingBox"));
+    }
+    private DatasetField constructBoundingBoxValue(String datasetFieldTypeName, String value) {
+        DatasetField retVal = new DatasetField();
+        retVal.setDatasetFieldType(new DatasetFieldType(datasetFieldTypeName, DatasetFieldType.FieldType.TEXT, false));
+        retVal.setDatasetFieldValues(Collections.singletonList(new DatasetFieldValue(retVal, value)));
+        return retVal;
+    }
+
     private IndexableDataset createIndexableDataset() {
         final Dataset dataset = MocksFactory.makeDataset();
         dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL,"10.666", "FAKE/fake", "/", DOIServiceBean.DOI_RESOLVER_URL, null));

From 48ae3332964b3d218127318f92ae997f52f3d9e0 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 21 Nov 2023 16:18:32 -0500
Subject: [PATCH 221/546] add download methods + cache, tweak upload, reformat

---
 .../dataverse/globus/GlobusServiceBean.java   | 495 ++++++++++--------
 1 file changed, 264 insertions(+), 231 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 3dee3bd498f..28173aa0074 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -120,23 +120,23 @@ public void setUserTransferToken(String userTransferToken) {
         this.userTransferToken = userTransferToken;
     }
 
-    private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions) throws MalformedURLException {
-       
-        String principalType="identity";
-        
+    private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions)
+            throws MalformedURLException {
+
+        String principalType = "identity";
+
         URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list");
-        MakeRequestResponse result = makeRequest(url, "Bearer",
-                endpoint.getClientToken(), "GET", null);
+        MakeRequestResponse result = makeRequest(url, "Bearer", endpoint.getClientToken(), "GET", null);
         if (result.status == 200) {
             AccessList al = parseJson(result.jsonResponse, AccessList.class, false);
 
             for (int i = 0; i < al.getDATA().size(); i++) {
                 Permissions pr = al.getDATA().get(i);
-                
+
                 if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath()))
                         && pr.getPrincipalType().equals(principalType)
                         && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))
-                        &&pr.getPermissions().equals(permissions)) {
+                        && pr.getPermissions().equals(permissions)) {
                     return pr.getId();
                 } else {
                     logger.fine(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType());
@@ -148,85 +148,80 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi
     }
 
     /*
-    public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm)
-            throws MalformedURLException {
-        if (directory != null && !directory.equals("")) {
-            directory = directory + "/";
-        }
-        logger.info("Start updating permissions." + " Directory is " + directory);
-        String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, "");
-        ArrayList<String> rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, null);
-        logger.info("Size of rules " + rules.size());
-        int count = 0;
-        while (count < rules.size()) {
-            logger.info("Start removing rules " + rules.get(count));
-            Permissions permissions = new Permissions();
-            permissions.setDATA_TYPE("access");
-            permissions.setPermissions(perm);
-            permissions.setPath(directory);
-
-            Gson gson = new GsonBuilder().create();
-            URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/"
-                    + rules.get(count));
-            logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/"
-                    + rules.get(count));
-            MakeRequestResponse result = makeRequest(url, "Bearer",
-                    clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", gson.toJson(permissions));
-            if (result.status != 200) {
-                logger.warning("Cannot update access rule " + rules.get(count));
-            } else {
-                logger.info("Access rule " + rules.get(count) + " was updated");
-            }
-            count++;
-        }
-    }
-*/
-    
-/** Call to delete a globus rule related to the specified dataset.
- * 
- * @param ruleId - Globus rule id - assumed to be associated with the dataset's file path (should not be called with a user specified rule id w/o further checking)
- * @param datasetId - the id of the dataset associated with the rule
- * @param globusLogger - a separate logger instance, may be null
- */
-public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) {
-
-    if (ruleId.length() > 0) {
-        if (dataset != null) {
-            GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
-            if (endpoint != null) {
-                String accessToken = endpoint.getClientToken();
-                if (globusLogger != null) {
-                    globusLogger.info("Start deleting permissions.");
-                }
-                try {
-                    URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId()
-                            + "/access/" + ruleId);
-                    MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "DELETE", null);
-                    if (result.status != 200) {
-                        if (globusLogger != null) {
-                            globusLogger.warning("Cannot delete access rule " + ruleId);
+     * public void updatePermision(AccessToken clientTokenUser, String directory,
+     * String principalType, String perm) throws MalformedURLException { if
+     * (directory != null && !directory.equals("")) { directory = directory + "/"; }
+     * logger.info("Start updating permissions." + " Directory is " + directory);
+     * String globusEndpoint =
+     * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, "");
+     * ArrayList<String> rules = checkPermisions(clientTokenUser, directory,
+     * globusEndpoint, principalType, null); logger.info("Size of rules " +
+     * rules.size()); int count = 0; while (count < rules.size()) {
+     * logger.info("Start removing rules " + rules.get(count)); Permissions
+     * permissions = new Permissions(); permissions.setDATA_TYPE("access");
+     * permissions.setPermissions(perm); permissions.setPath(directory);
+     * 
+     * Gson gson = new GsonBuilder().create(); URL url = new
+     * URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint
+     * + "/access/" + rules.get(count));
+     * logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" +
+     * globusEndpoint + "/access/" + rules.get(count)); MakeRequestResponse result =
+     * makeRequest(url, "Bearer",
+     * clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT",
+     * gson.toJson(permissions)); if (result.status != 200) {
+     * logger.warning("Cannot update access rule " + rules.get(count)); } else {
+     * logger.info("Access rule " + rules.get(count) + " was updated"); } count++; }
+     * }
+     */
+
+    /**
+     * Call to delete a globus rule related to the specified dataset.
+     * 
+     * @param ruleId       - Globus rule id - assumed to be associated with the
+     *                     dataset's file path (should not be called with a user
+     *                     specified rule id w/o further checking)
+     * @param datasetId    - the id of the dataset associated with the rule
+     * @param globusLogger - a separate logger instance, may be null
+     */
+    public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) {
+
+        if (ruleId.length() > 0) {
+            if (dataset != null) {
+                GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+                if (endpoint != null) {
+                    String accessToken = endpoint.getClientToken();
+                    if (globusLogger != null) {
+                        globusLogger.info("Start deleting permissions.");
+                    }
+                    try {
+                        URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId()
+                                + "/access/" + ruleId);
+                        MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "DELETE", null);
+                        if (result.status != 200) {
+                            if (globusLogger != null) {
+                                globusLogger.warning("Cannot delete access rule " + ruleId);
+                            } else {
+                                // When removed due to a cache ejection, we don't have a globusLogger
+                                logger.warning("Cannot delete access rule " + ruleId);
+                            }
                         } else {
-                            // When removed due to a cache ejection, we don't have a globusLogger
-                            logger.warning("Cannot delete access rule " + ruleId);
-                        }
-                    } else {
-                        if (globusLogger != null) {
-                            globusLogger.info("Access rule " + ruleId + " was deleted successfully");
+                            if (globusLogger != null) {
+                                globusLogger.info("Access rule " + ruleId + " was deleted successfully");
+                            }
                         }
+                    } catch (MalformedURLException ex) {
+                        logger.log(Level.WARNING,
+                                "Failed to delete access rule " + ruleId + " on endpoint " + endpoint.getId(), ex);
                     }
-                } catch (MalformedURLException ex) {
-                    logger.log(Level.WARNING,
-                            "Failed to delete access rule " + ruleId + " on endpoint " + endpoint.getId(), ex);
                 }
             }
         }
     }
-}
 
     public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) {
 
         GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
-        String principalType= "identity";
+        String principalType = "identity";
 
         Permissions permissions = new Permissions();
         permissions.setDATA_TYPE("access");
@@ -235,20 +230,34 @@ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int
         permissions.setPath(endpoint.getBasePath() + "/");
         permissions.setPermissions("rw");
 
+        JsonObjectBuilder response = Json.createObjectBuilder();
+        response.add("status", requestPermission(endpoint, dataset, permissions));
+        String driverId = dataset.getEffectiveStorageDriverId();
+        JsonObjectBuilder paths = Json.createObjectBuilder();
+        for (int i = 0; i < numberOfPaths; i++) {
+            String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId);
+            int lastIndex = Math.max(storageIdentifier.lastIndexOf("/"), storageIdentifier.lastIndexOf(":"));
+            paths.add(storageIdentifier, endpoint.getBasePath() + "/" + storageIdentifier.substring(lastIndex + 1));
+
+        }
+        response.add("paths", paths.build());
+        return response.build();
+    }
+
+    private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissions permissions) {
         Gson gson = new GsonBuilder().create();
         MakeRequestResponse result = null;
-            logger.info("Start creating the rule");
-            JsonObjectBuilder response = Json.createObjectBuilder();
+        logger.info("Start creating the rule");
 
-            try {
+        try {
             URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access");
-            result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST",
-                    gson.toJson(permissions));
+            result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions));
 
-            response.add("status", result.status);
             switch (result.status) {
+            case 404:
+                logger.severe("Endpoint " + endpoint.getId() + " was not found");
+                break;
             case 400:
-
                 logger.severe("Path " + permissions.getPath() + " is not valid");
                 break;
             case 409:
@@ -260,28 +269,17 @@ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int
                     permissions.setId(globusResponse.getString("access_id"));
                     monitorTemporaryPermissions(permissions.getId(), dataset.getId());
                     logger.info("Access rule " + permissions.getId() + " was created successfully");
-                    
-                    String driverId = dataset.getEffectiveStorageDriverId();
-                    JsonObjectBuilder paths = Json.createObjectBuilder();
-                    for(int i=0;i<numberOfPaths;i++) {
-                        String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId);
-                        int lastIndex = Math.max(storageIdentifier.lastIndexOf("/"), storageIdentifier.lastIndexOf(":")); 
-                        paths.add(storageIdentifier, endpoint.getBasePath() + "/" + storageIdentifier.substring(lastIndex + 1));
-                    
-                    }
-                    response.add("paths", paths.build());
-                    
                 } else {
-                    //Shouldn't happen!
+                    // Shouldn't happen!
                     logger.warning("Access rule id not returned for dataset " + dataset.getId());
                 }
             }
-            } catch (MalformedURLException ex) {
-                //Misconfiguration
-                logger.warning("Failed to create access rule URL for " + endpoint.getId());
-                response.add("status", 500);
-            }
-            return response.build();
+            return result.status;
+        } catch (MalformedURLException ex) {
+            // Misconfiguration
+            logger.warning("Failed to create access rule URL for " + endpoint.getId());
+            return 500;
+        }
     }
 
     public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) {
@@ -305,26 +303,24 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref
                         "Referenced file " + referencedFile + " is not in an allowed endpoint/path");
             }
             String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId);
-            fileMap.add(referencedFile,
-                    storageIdentifier + "//" + referencedFile);
+            fileMap.add(referencedFile, storageIdentifier + "//" + referencedFile);
         });
         return fileMap.build();
     }
 
-    //Single cache of open rules/permission requests
+    // Single cache of open rules/permission requests
     private final Cache<String, Long> rulesCache = Caffeine.newBuilder()
-            .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
-            .scheduler(Scheduler.systemScheduler())
-            .evictionListener((ruleId, datasetId, cause) -> {
-                //Delete rules that expire
+            .expireAfterWrite(
+                    Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
+            .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> {
+                // Delete rules that expire
                 logger.info("Rule " + ruleId + " expired");
                 Dataset dataset = datasetSvc.find(datasetId);
                 deletePermission((String) ruleId, dataset, logger);
-              })
-            
+            })
+
             .build();
-    
-    
+
     private void monitorTemporaryPermissions(String ruleId, long datasetId) {
         logger.info("Adding rule " + ruleId + " for dataset " + datasetId);
         rulesCache.put(ruleId, datasetId);
@@ -349,8 +345,7 @@ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger
 
         URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId);
 
-        MakeRequestResponse result = makeRequest(url, "Bearer",
-                accessToken, "GET", null);
+        MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "GET", null);
 
         GlobusTask task = null;
 
@@ -540,59 +535,81 @@ static class MakeRequestResponse {
 
     }
 
-    /* unused - may be needed for S3 case
-    private MakeRequestResponse findDirectory(String directory, String clientToken, String globusEndpoint)
-            throws MalformedURLException {
-        URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path="
-                + directory + "/");
-
-        MakeRequestResponse result = makeRequest(url, "Bearer",
-                clientToken, "GET", null);
-        logger.info("find directory status:" + result.status);
-
-        return result;
-    }
-*/
-    
     /*
-    public boolean giveGlobusPublicPermissions(Dataset dataset)
-            throws UnsupportedEncodingException, MalformedURLException {
-
-        GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+     * unused - may be needed for S3 case private MakeRequestResponse
+     * findDirectory(String directory, String clientToken, String globusEndpoint)
+     * throws MalformedURLException { URL url = new
+     * URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint
+     * + "/ls?path=" + directory + "/");
+     * 
+     * MakeRequestResponse result = makeRequest(url, "Bearer", clientToken, "GET",
+     * null); logger.info("find directory status:" + result.status);
+     * 
+     * return result; }
+     */
 
+    /*
+     * public boolean giveGlobusPublicPermissions(Dataset dataset) throws
+     * UnsupportedEncodingException, MalformedURLException {
+     * 
+     * GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+     * 
+     * 
+     * MakeRequestResponse status = findDirectory(endpoint.getBasePath(),
+     * endpoint.getClientToken(), endpoint.getId());
+     * 
+     * if (status.status == 200) {
+     * 
+     * int perStatus = givePermission("all_authenticated_users", "", "r", dataset);
+     * logger.info("givePermission status " + perStatus); if (perStatus == 409) {
+     * logger.info("Permissions already exist or limit was reached"); } else if
+     * (perStatus == 400) { logger.info("No directory in Globus"); } else if
+     * (perStatus != 201 && perStatus != 200) {
+     * logger.info("Cannot give read permission"); return false; }
+     * 
+     * } else if (status.status == 404) {
+     * logger.info("There is no globus directory"); } else {
+     * logger.severe("Cannot find directory in globus, status " + status); return
+     * false; }
+     * 
+     * return true; }
+     */
 
-        MakeRequestResponse status = findDirectory(endpoint.getBasePath(), endpoint.getClientToken(), endpoint.getId());
+    // Single cache of open rules/permission requests
+    private final Cache<String, JsonObject> downloadCache = Caffeine.newBuilder()
+            .expireAfterWrite(
+                    Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) + 300, ChronoUnit.MINUTES))
+            .scheduler(Scheduler.systemScheduler()).evictionListener((downloadId, datasetId, cause) -> {
+                // Delete downloads that expire
+                logger.info("Download for " + downloadId + " expired");
+            })
 
-        if (status.status == 200) {
+            .build();
+    
+    public JsonObject getFilesForDownload(String downloadId) {
+        return downloadCache.getIfPresent(downloadId);
+    }
 
-            int perStatus = givePermission("all_authenticated_users", "", "r", dataset);
-            logger.info("givePermission status " + perStatus);
-            if (perStatus == 409) {
-                logger.info("Permissions already exist or limit was reached");
-            } else if (perStatus == 400) {
-                logger.info("No directory in Globus");
-            } else if (perStatus != 201 && perStatus != 200) {
-                logger.info("Cannot give read permission");
-                return false;
-            }
+    public int setPermissionForDownload(Dataset dataset, String principal) {
+        GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+        String principalType = "identity";
 
-        } else if (status.status == 404) {
-            logger.info("There is no globus directory");
-        } else {
-            logger.severe("Cannot find directory in globus, status " + status);
-            return false;
-        }
+        Permissions permissions = new Permissions();
+        permissions.setDATA_TYPE("access");
+        permissions.setPrincipalType(principalType);
+        permissions.setPrincipal(principal);
+        permissions.setPath(endpoint.getBasePath() + "/");
+        permissions.setPermissions("r");
 
-        return true;
+        return requestPermission(endpoint, dataset, permissions);
     }
-*/
     
     // Generates the URL to launch the Globus app
     public String getGlobusAppUrlForDataset(Dataset d) {
         return getGlobusAppUrlForDataset(d, true, null);
     }
 
-    public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) {
+    public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<FileMetadata> fileMetadataList) {
         String localeCode = session.getLocaleCode();
         ApiToken apiToken = null;
         User user = session.getUser();
@@ -610,49 +627,60 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df)
         } catch (Exception e) {
             logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId);
         }
+
         // Use URLTokenUtil for params currently in common with external tools.
-        URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode);
-        String appUrl;
+        URLTokenUtil tokenUtil = new URLTokenUtil(d, null, apiToken, localeCode);
+        String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost");
+        String callback = null;
         if (upload) {
-            appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost")
-                    + "/upload?dvLocale={localeCode}";
-            String callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId()
+            appUrl = appUrl + "/upload?dvLocale={localeCode}";
+            callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId()
                     + "/globusUploadParameters?locale=" + localeCode;
-            if (apiToken != null) {
-                callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(),
-                        HttpMethod.GET,
-                        JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + apiToken.getTokenString());
-            } else {
-                // Shouldn't happen
-                logger.warning("unable to get api token for user: " + user.getIdentifier());
-            }
-            appUrl = appUrl + "&callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback));
         } else {
-            if (df == null) {
-                appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost")
-                        + "/download?datasetPid={datasetPid}&siteUrl={siteUrl}"
-                        + ((apiToken != null) ? "&apiToken={apiToken}" : "")
-                        + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}";
-            } else {
-                String rawStorageId = df.getStorageIdentifier();
-                rawStorageId=rawStorageId.substring(rawStorageId.lastIndexOf(":")+1);
-                appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost")
-                        + "/download-file?datasetPid={datasetPid}&siteUrl={siteUrl}"
-                        + ((apiToken != null) ? "&apiToken={apiToken}" : "")
-                        + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}&fileId={fileId}&storageIdentifier="
-                        + rawStorageId + "&fileName=" + df.getCurrentName();
+            // Download
+            ArrayList<DataFile> dataFiles = new ArrayList<DataFile>(fileMetadataList.size());
+            for (FileMetadata fileMetadata : fileMetadataList) {
+                dataFiles.add(fileMetadata.getDataFile());
             }
+            JsonObject files = getFilesMap(dataFiles, d);
+
+            String downloadId = UUID.randomUUID().toString();
+            downloadCache.put(downloadId, files);
+            appUrl = appUrl + "/download?dvLocale={localeCode}";
+            callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId()
+                    + "/globusDownloadParameters?locale=" + localeCode + "&downloadId=" + downloadId;
+
         }
+        if (apiToken != null) {
+            callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(),
+                    HttpMethod.GET,
+                    JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + apiToken.getTokenString());
+        } else {
+            // Shouldn't happen
+            logger.warning("Unable to get api token for user: " + user.getIdentifier());
+        }
+        appUrl = appUrl + "&callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback));
+
         String finalUrl = tokenUtil.replaceTokensWithValues(appUrl);
         logger.info("Calling app: " + finalUrl);
         return finalUrl;
     }
 
-    public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) {
-        return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, null));
-        
+    public JsonObject getFilesMap(ArrayList<DataFile> dataFiles, Dataset d) {
+        JsonObjectBuilder filesBuilder = Json.createObjectBuilder();
+        for(DataFile df: dataFiles) {
+            String fileLocation = DataAccess
+                    .getLocationFromStorageId(df.getStorageIdentifier(), d);
+            filesBuilder.add(df.getId().toString(), fileLocation);
+        }
+        return filesBuilder.build();
     }
-    
+
+    public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List<FileMetadata> downloadFMList) {
+        return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadFMList));
+
+    }
+
     @Asynchronous
     @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
     public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl,
@@ -749,7 +777,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                 JsonArray filesJsonArray = jsonData.getJsonArray("files");
 
                 if (filesJsonArray != null) {
-                    String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage();
+                    String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/"
+                            + dataset.getIdentifierForFileStorage();
 
                     for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) {
 
@@ -758,16 +787,16 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                         String storageIdentifier = fileJsonObject.getString("storageIdentifier");
                         String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier);
                         String storeId = parts[0];
-                        //If this is an S3 store, we need to split out the bucket name
+                        // If this is an S3 store, we need to split out the bucket name
                         String[] bits = parts[1].split(":");
                         String bucketName = "";
-                        if(bits.length > 1) {
+                        if (bits.length > 1) {
                             bucketName = bits[0];
                         }
                         String fileId = bits[bits.length - 1];
 
                         // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873
-                        //or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873
+                        // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873
                         String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId;
                         String fileName = fileJsonObject.getString("fileName");
 
@@ -777,8 +806,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                     // calculateMissingMetadataFields: checksum, mimetype
                     JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger);
                     JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files");
-logger.info("Size: " + newfilesJsonArray.size());
-logger.info("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0)));
+                    logger.info("Size: " + newfilesJsonArray.size());
+                    logger.info("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0)));
                     JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder();
 
                     for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) {
@@ -787,31 +816,32 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                         String storageIdentifier = fileJsonObject.getString("storageIdentifier");
                         String fileName = fileJsonObject.getString("fileName");
                         String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier);
-                        //If this is an S3 store, we need to split out the bucket name
+                        // If this is an S3 store, we need to split out the bucket name
                         String[] bits = parts[1].split(":");
-                        if(bits.length > 1) {
+                        if (bits.length > 1) {
                         }
                         String fileId = bits[bits.length - 1];
-                        
+
                         List<JsonObject> newfileJsonObject = IntStream.range(0, newfilesJsonArray.size())
                                 .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId))
                                 .filter(Objects::nonNull).collect(Collectors.toList());
                         if (newfileJsonObject != null) {
                             logger.info("List Size: " + newfileJsonObject.size());
-                            //if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) {
-                                JsonPatch path = Json.createPatchBuilder()
-                                        .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build();
-                                fileJsonObject = path.apply(fileJsonObject);
-                                path = Json.createPatchBuilder()
-                                        .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build();
-                                fileJsonObject = path.apply(fileJsonObject);
-                                jsonDataSecondAPI.add(fileJsonObject);
-                                countSuccess++;
-                           // } else {
-                           //     globusLogger.info(fileName
-                           //             + " will be skipped from adding to dataset by second API due to missing values ");
-                           //     countError++;
-                           // }
+                            // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) {
+                            JsonPatch path = Json.createPatchBuilder()
+                                    .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build();
+                            fileJsonObject = path.apply(fileJsonObject);
+                            path = Json.createPatchBuilder()
+                                    .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build();
+                            fileJsonObject = path.apply(fileJsonObject);
+                            jsonDataSecondAPI.add(fileJsonObject);
+                            countSuccess++;
+                            // } else {
+                            // globusLogger.info(fileName
+                            // + " will be skipped from adding to dataset by second API due to missing
+                            // values ");
+                            // countError++;
+                            // }
                         } else {
                             globusLogger.info(fileName
                                     + " will be skipped from adding to dataset by second API due to missing values ");
@@ -828,8 +858,9 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                             + datasetIdentifier + " -F jsonData='" + newjsonData + "'";
                     System.out.println("*******====command ==== " + command);
 
-                    //ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of calling API
-                
+                    // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of
+                    // calling API
+
                     String output = addFilesAsync(command, globusLogger);
                     if (output.equalsIgnoreCase("ok")) {
                         // if(!taskSkippedFiles)
@@ -856,8 +887,6 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                 globusLogger.info("Files failures: " + countError.toString());
                 globusLogger.info("Finished upload via Globus job.");
 
-
-
             } catch (Exception e) {
                 logger.info("Exception from globusUpload call ");
                 e.printStackTrace();
@@ -917,11 +946,11 @@ private String addFiles(String curlCommand, Logger globusLogger) {
 
             status = jsonObject.getString("status");
         } catch (Exception ex) {
-            if(ex instanceof JsonParsingException) {
+            if (ex instanceof JsonParsingException) {
                 globusLogger.log(Level.SEVERE, "Error parsing dataset json.");
-            } else { 
+            } else {
                 globusLogger.log(Level.SEVERE,
-                    "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex);
+                        "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex);
             }
         }
 
@@ -1004,11 +1033,13 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
 
     Executor executor = Executors.newFixedThreadPool(10);
 
-    private GlobusTask globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) throws MalformedURLException {
+    private GlobusTask globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger)
+            throws MalformedURLException {
         boolean taskCompletion = false;
         String status = "";
         GlobusTask task = null;
-        int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50);
+        int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault(
+                settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50);
         do {
             try {
                 globusLogger.info("checking globus transfer task   " + taskId);
@@ -1139,8 +1170,8 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger)
             } catch (IOException ioex) {
                 count = 3;
                 logger.info(ioex.getMessage());
-                globusLogger.info("DataFile (fullPath " + fullPath
-                        + ") does not appear to be accessible within Dataverse: ");
+                globusLogger.info(
+                        "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: ");
             } catch (Exception ex) {
                 count = count + 1;
                 ex.printStackTrace();
@@ -1167,7 +1198,7 @@ public String calculatemime(String fileName) throws InterruptedException {
         String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT;
         String type = FileUtil.determineFileTypeByNameAndExtension(fileName);
 
-        if (type!=null && !type.isBlank()) {
+        if (type != null && !type.isBlank()) {
             if (FileUtil.useRecognizedType(finalType, type)) {
                 finalType = type;
             }
@@ -1365,7 +1396,7 @@ public String calculatemime(String fileName) throws InterruptedException {
      * updatePermision(clientTokenUser, directory, "identity", "r"); return true; }
      * 
      */
-    
+
     GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
         Dataset dataset = null;
         if (dvObject instanceof Dataset) {
@@ -1377,35 +1408,37 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
         }
         String driverId = dataset.getEffectiveStorageDriverId();
         GlobusEndpoint endpoint = null;
-        
+
         String directoryPath = GlobusAccessibleStore.getTransferPath(driverId);
 
-        if (GlobusAccessibleStore.isDataverseManaged(driverId) && (dataset!=null)) {
-            directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage();
+        if (GlobusAccessibleStore.isDataverseManaged(driverId) && (dataset != null)) {
+            directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/"
+                    + dataset.getIdentifierForFileStorage();
             logger.info("directoryPath now: " + directoryPath);
 
         } else {
-            //remote store - may have path in file storageidentifier
-            String relPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
+            // remote store - may have path in file storageidentifier
+            String relPath = dvObject.getStorageIdentifier()
+                    .substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
             int filenameStart = relPath.lastIndexOf("/") + 1;
             if (filenameStart > 0) {
                 directoryPath = directoryPath + relPath.substring(0, filenameStart);
             }
         }
         logger.info("directoryPath finally: " + directoryPath);
-        
-        String endpointId = GlobusAccessibleStore.getTransferPath(driverId);
-        
+
+        String endpointId = GlobusAccessibleStore.getTransferEndpointId(driverId);
+
         logger.info("endpointId: " + endpointId);
-        
+
         String globusToken = GlobusAccessibleStore.getGlobusToken(driverId);
 
         AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
         String clientToken = accessToken.getOtherTokens().get(0).getAccessToken();
-logger.info("clientToken: " + clientToken);
+        logger.info("clientToken: " + clientToken);
         endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath);
 
         return endpoint;
     }
-    
+
 }

From 7fd2bb2ad30eb0bdeb26083089d5788abb95cbeb Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 21 Nov 2023 16:18:43 -0500
Subject: [PATCH 222/546] convenience method

---
 .../iq/dataverse/dataaccess/GlobusAccessibleStore.java        | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
index ce75395c883..aad1dab5eab 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
@@ -14,6 +14,10 @@ public static boolean isDataverseManaged(String driverId) {
         return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(driverId, MANAGED));
     }
     
+    public static String getTransferEnpointWithPath(String driverId) {
+        return StorageIO.getConfigParamForDriver(driverId, GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH);
+    }
+    
     public static String getTransferEndpointId(String driverId) {
         String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH);
         int pathStart = endpointWithBasePath.indexOf("/");

From 4774541a3e1ab766c834e666937b417260d68705 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 21 Nov 2023 16:19:08 -0500
Subject: [PATCH 223/546] adapt to changed method signature

---
 .../edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index d645dc3307f..cc064976982 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -22,6 +22,7 @@
 import jakarta.ws.rs.ext.Provider;
 
 import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.FileMetadata;
 import edu.harvard.iq.dataverse.dataaccess.*;
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.engine.command.Command;
@@ -212,7 +213,9 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         if (di.getConversionParam().equals("format")) {
 
                             if ("GlobusTransfer".equals(di.getConversionParamValue())) {
-                                redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, dataFile);
+                                List<FileMetadata> downloadFMList = new ArrayList<FileMetadata>(1);
+                                downloadFMList.add(dataFile.getFileMetadata());
+                                redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadFMList);
                             }
                         }
                     }

From 55c78433b1c3f876081ba51951ff31012b72fee1 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 21 Nov 2023 16:19:58 -0500
Subject: [PATCH 224/546] use rules cache timout in upload, add download
 methods

---
 .../harvard/iq/dataverse/api/Datasets.java    | 244 +++++++++++++++++-
 1 file changed, 238 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index a57f373f106..2d2db1fad7d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3599,17 +3599,21 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
         } else {
             params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths);
         }
-
+        int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) * 60;
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths";
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName)
                     .add(URLTokenUtil.HTTP_METHOD, "POST")
-                    .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusPaths")
-                    .add(URLTokenUtil.TIMEOUT, 300));
+                    .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusUploadPaths")
+                    .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles")
                 .add(URLTokenUtil.HTTP_METHOD, "POST")
                 .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles")
-                .add(URLTokenUtil.TIMEOUT, 300));
+                .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+        allowedApiCalls.add(
+                Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata").add(URLTokenUtil.HTTP_METHOD, "GET")
+                        .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
+                        .add(URLTokenUtil.TIMEOUT, 300));
         allowedApiCalls.add(
                 Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET")
                         .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
@@ -3630,7 +3634,7 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
      */
     @POST
     @AuthRequired
-    @Path("{id}/requestGlobusPaths")
+    @Path("{id}/requestGlobusUploadPaths")
     @Consumes(MediaType.APPLICATION_JSON)
     @Produces(MediaType.APPLICATION_JSON)
     public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody
@@ -3709,6 +3713,234 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP
         }
 
     }
+    
+    /**
+     * Retrieve the parameters and signed URLs required to perform a globus
+     * transfer/download. This api endpoint is expected to be called as a signed callback
+     * after the globus-dataverse app/other app is launched, but it will accept
+     * other forms of authentication.
+     * 
+     * @param crc
+     * @param datasetId
+     */
+    @GET
+    @AuthRequired
+    @Path("{id}/globusDownloadParameters")
+    @Consumes(MediaType.APPLICATION_JSON)
+    @Produces(MediaType.APPLICATION_JSON)
+    public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @QueryParam(value = "locale") String locale, @QueryParam(value = "downloadId") String downloadId)
+    {
+        // -------------------------------------
+        // (1) Get the user from the ContainerRequestContext
+        // -------------------------------------
+        AuthenticatedUser authUser;
+        try {
+            authUser = getRequestAuthenticatedUserOrDie(crc);
+        } catch (WrappedResponse e) {
+            return e.getResponse();
+        }
+        // -------------------------------------
+        // (2) Get the Dataset Id
+        // -------------------------------------
+        Dataset dataset;
+
+        try {
+            dataset = findDatasetOrDie(datasetId);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+        String storeId = dataset.getEffectiveStorageDriverId();
+        //acceptsGlobusTransfers should only be true for an S3 or globus store
+        if(!(GlobusAccessibleStore.acceptsGlobusTransfers(storeId) || GlobusAccessibleStore.allowsGlobusReferences(storeId))) {
+            return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
+        }
+
+        JsonObject files = globusService.getFilesForDownload(downloadId);
+        if(files==null) {
+            return notFound(BundleUtil.getStringFromBundle("datasets.api.globusdownloadnotfound"));
+        }
+        
+        URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
+
+        boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId);
+        String transferEndpoint = null;
+        
+        
+        JsonObjectBuilder queryParams = Json.createObjectBuilder();
+        queryParams.add("queryParameters",
+                Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}"))
+                        .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}"))
+                        .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}"))
+                        .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}"))
+                        .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}")));
+        JsonObject substitutedParams = tokenUtil.getParams(queryParams.build());
+        JsonObjectBuilder params = Json.createObjectBuilder();
+        substitutedParams.keySet().forEach((key) -> {
+            params.add(key, substitutedParams.get(key));
+        });
+        params.add("managed", Boolean.toString(managed));
+        if(managed) {
+            transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId);
+            params.add("endpoint", transferEndpoint);
+        }
+        params.add("files", files);
+        int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) * 60;
+        JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
+        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "monitorGlobusDownload")
+                    .add(URLTokenUtil.HTTP_METHOD, "POST")
+                    .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/monitorGlobusDownload")
+                    .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusDownload")
+                .add(URLTokenUtil.HTTP_METHOD, "POST")
+                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusDownload?downloadId=" + downloadId)
+                .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+        allowedApiCalls.add(
+                Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata").add(URLTokenUtil.HTTP_METHOD, "GET")
+                        .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
+                        .add(URLTokenUtil.TIMEOUT, 300));
+        allowedApiCalls.add(
+                Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET")
+                        .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
+                        .add(URLTokenUtil.TIMEOUT, 300));
+
+        return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build()));
+    }
+    
+    /** Requests permissions for a given globus user to download the specified files the dataset
+     * 
+     * @param crc
+     * @param datasetId
+     * @param jsonData
+     * @return
+     * @throws IOException
+     * @throws ExecutionException
+     * @throws InterruptedException
+     */
+    @POST
+    @AuthRequired
+    @Path("{id}/requestGlobusDownloadPaths")
+    @Consumes(MediaType.APPLICATION_JSON)
+    @Produces(MediaType.APPLICATION_JSON)
+    public Response requestGlobusDownload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+            @QueryParam(value = "downloadId") String downloadId, String jsonBody)
+            throws IOException, ExecutionException, InterruptedException {
+
+        logger.info(" ====  (api allowGlobusDownload) jsonBody   ====== " + jsonBody);
+
+        if (!systemConfig.isGlobusDownload()) {
+            return error(Response.Status.SERVICE_UNAVAILABLE,
+                    BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
+        }
+
+        // -------------------------------------
+        // (1) Get the user from the ContainerRequestContext
+        // -------------------------------------
+        User user = getRequestUser(crc);
+
+        // -------------------------------------
+        // (2) Get the Dataset Id
+        // -------------------------------------
+        Dataset dataset;
+
+        try {
+            dataset = findDatasetOrDie(datasetId);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+        JsonObject body = JsonUtil.getJsonObject(jsonBody);
+        Set<String> fileIds = null;
+        if (downloadId != null) {
+            JsonObject files = globusService.getFilesForDownload(downloadId);
+            if (files != null) {
+                fileIds = files.keySet();
+            }
+        } else {
+            if (body.containsKey("fileIds")) {
+                Collection<JsonValue> fileVals = body.getJsonArray("fileIds").getValuesAs(JsonValue.class);
+                fileIds = new HashSet<String>(fileVals.size());
+                for (JsonValue fileVal : fileVals) {
+                    String id = null;
+                    switch (fileVal.getValueType()) {
+                    case STRING:
+                        id = ((JsonString) fileVal).getString();
+                        break;
+                    case NUMBER:
+                        id = ((JsonNumber) fileVal).toString();
+                        break;
+                    default:
+                        return badRequest("fileIds must be numeric or string (ids/PIDs)");
+                    }
+                    ;
+                    fileIds.add(id);
+                }
+            } else {
+                return badRequest("fileIds JsonArray of file ids/PIDs required in POST body");
+            }
+        }
+
+        if (fileIds.isEmpty()) {
+            return notFound(BundleUtil.getStringFromBundle("datasets.api.globusdownloadnotfound"));
+        }
+        ArrayList<DataFile> dataFiles = new ArrayList<DataFile>(fileIds.size());
+        for (String id : fileIds) {
+            boolean published = false;
+
+            DataFile df = null;
+            try {
+                df = findDataFileOrDie(id);
+            } catch (WrappedResponse wr) {
+                return wr.getResponse();
+            }
+            if (df.getOwner() != dataset) {
+                return badRequest("All files must be in the dataset");
+            }
+            dataFiles.add(df);
+
+            for (FileMetadata fm : df.getFileMetadatas()) {
+                if (fm.getDatasetVersion().isPublished()) {
+                    published = true;
+                    break;
+                }
+            }
+
+            if (!published) {
+                // If the file is not published, they can still download the file, if the user
+                // has the permission to view unpublished versions:
+
+                if (!permissionService.hasPermissionsFor(user, df.getOwner(),
+                        EnumSet.of(Permission.ViewUnpublishedDataset))) {
+                    return forbidden("User doesn't have permission to download file: " + id);
+                }
+            } else { // published and restricted and/or embargoed
+                if (df.isRestricted() || FileUtil.isActivelyEmbargoed(df))
+                    // This line also handles all three authenticated session user, token user, and
+                    // guest cases.
+                    if (!permissionService.hasPermissionsFor(user, df, EnumSet.of(Permission.DownloadFile))) {
+                        return forbidden("User doesn't have permission to download file: " + id);
+                    }
+
+            }
+        }
+        // Allowed to download all requested files
+        JsonObject files = globusService.getFilesMap(dataFiles, dataset);
+        if (GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) {
+            // If managed, give the principal read permissions
+            int status = globusService.setPermissionForDownload(dataset, body.getString("principal"));
+            switch (status) {
+            case 201:
+                return ok(files);
+            case 400:
+                return badRequest("Unable to grant permission");
+            case 409:
+                return conflict("Permission already exists");
+            default:
+                return error(null, "Unexpected error when granting permission");
+            }
+
+        }
+
+        return ok(files);
+    }
 
     /** Monitors a globus download and removes permissions on the dir/dataset when done
      * 
@@ -3722,7 +3954,7 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP
      */
     @POST
     @AuthRequired
-    @Path("{id}/deleteglobusRule")
+    @Path("{id}/monitorGlobusDownload")
     @Consumes(MediaType.MULTIPART_FORM_DATA)
     public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,@FormDataParam("jsonData") String jsonData
     ) throws IOException, ExecutionException, InterruptedException {

From e276f1f89c9ca4459f50691051cbff10a3c1ee67 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 21 Nov 2023 16:20:14 -0500
Subject: [PATCH 225/546] minor text changes

---
 .../dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
index 8adaf746210..9de6bf69832 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
@@ -222,11 +222,11 @@ public String getStorageLocation() throws IOException {
                     .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
         }
         if (this.getDvObject() instanceof Dataset) {
-            throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject");
+            throw new IOException("AbstractRemoteOverlayAccessIO: Datasets are not a supported dvObject");
         } else if (this.getDvObject() instanceof DataFile) {
             fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation;
         } else if (dvObject instanceof Dataverse) {
-            throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
+            throw new IOException("AbstractRemoteOverlayAccessIO: Dataverses are not a supported dvObject");
         }
         logger.fine("fullStorageLocation: " + fullStorageLocation);
         return fullStorageLocation;

From b9317e885249b10d1661f9d8e2e2535e05bb4737 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 21 Nov 2023 16:20:39 -0500
Subject: [PATCH 226/546] use new method signature

---
 src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index fc18257196d..4b99464504c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -6261,7 +6261,13 @@ public void startGlobusTransfer() {
             apiToken = new ApiToken();
             apiToken.setTokenString(privUrl.getToken());
         }
-        PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken));
+        if(fileMetadataForAction!=null) {
+            List<FileMetadata> downloadFMList = new ArrayList<FileMetadata>(1);
+            downloadFMList.add(fileMetadataForAction);
+            PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList));
+        } else {
+            PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, getSelectedDownloadableFiles()));
+        }
     }
 
     public String getWebloaderUrlForDataset(Dataset d) {

From a31aa074ebd600dc6dbdd51916388ca45f199280 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 21 Nov 2023 16:20:55 -0500
Subject: [PATCH 227/546] add not found message

---
 src/main/java/propertyFiles/Bundle.properties | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index ea456f54907..693eb7ca294 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -2652,6 +2652,7 @@ datasets.api.version.files.invalid.access.status=Invalid access status: {0}
 datasets.api.deaccessionDataset.invalid.version.identifier.error=Only {0} or a specific version can be deaccessioned
 datasets.api.deaccessionDataset.invalid.forward.url=Invalid deaccession forward URL: {0}
 datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this dataset.
+datasets.api.globusdownloadnotfound=List of files to transfer not found.
 datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this dataset.
 
 #Dataverses.java

From 0ea4615174640b2d495a92cb064d53850fef6265 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 22 Nov 2023 14:40:13 -0500
Subject: [PATCH 228/546] fix dataset page access menu globus transfer option

---
 src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 4b99464504c..ab7e6a8554a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -6266,7 +6266,13 @@ public void startGlobusTransfer() {
             downloadFMList.add(fileMetadataForAction);
             PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList));
         } else {
-            PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, getSelectedDownloadableFiles()));
+            if(getSelectedDownloadableFiles()!=null) {
+                PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, getSelectedDownloadableFiles()));
+            } else {
+                //ToDo: For non-public, need the subset that are downloadable by the user
+                //ToDo: For mixed (some in backing store), need the ones in the globus store
+                PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, workingVersion.getFileMetadatas()));
+            }
         }
     }
 

From cf5cffce1b34229f2264fb020787229187e652c3 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 22 Nov 2023 15:02:30 -0500
Subject: [PATCH 229/546] multiple download fixes, reformat globus code

---
 .../harvard/iq/dataverse/api/Datasets.java    | 122 +++++++++---------
 1 file changed, 62 insertions(+), 60 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 2d2db1fad7d..f1bfc0ed69e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3541,10 +3541,9 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
     @GET
     @AuthRequired
     @Path("{id}/globusUploadParameters")
-    @Consumes(MediaType.APPLICATION_JSON)
     @Produces(MediaType.APPLICATION_JSON)
-    public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @QueryParam(value = "locale") String locale)
-    {
+    public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+            @QueryParam(value = "locale") String locale) {
         // -------------------------------------
         // (1) Get the user from the ContainerRequestContext
         // -------------------------------------
@@ -3565,17 +3564,18 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
             return wr.getResponse();
         }
         String storeId = dataset.getEffectiveStorageDriverId();
-        //acceptsGlobusTransfers should only be true for an S3 or globus store
-        if(!GlobusAccessibleStore.acceptsGlobusTransfers(storeId) && !GlobusAccessibleStore.allowsGlobusReferences(storeId)) {
+        // acceptsGlobusTransfers should only be true for an S3 or globus store
+        if (!GlobusAccessibleStore.acceptsGlobusTransfers(storeId)
+                && !GlobusAccessibleStore.allowsGlobusReferences(storeId)) {
             return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled"));
         }
-        
+
         URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
 
         boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId);
         String transferEndpoint = null;
         JsonArray referenceEndpointsWithPaths = null;
-        if(managed) {
+        if (managed) {
             transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId);
         } else {
             referenceEndpointsWithPaths = GlobusAccessibleStore.getReferenceEndpointsWithPaths(storeId);
@@ -3594,7 +3594,7 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
             params.add(key, substitutedParams.get(key));
         });
         params.add("managed", Boolean.toString(managed));
-        if(transferEndpoint!= null) {
+        if (transferEndpoint != null) {
             params.add("endpoint", transferEndpoint);
         } else {
             params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths);
@@ -3602,18 +3602,18 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
         int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) * 60;
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths";
-        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName)
-                    .add(URLTokenUtil.HTTP_METHOD, "POST")
-                    .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusUploadPaths")
-                    .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+        allowedApiCalls.add(
+                Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName).add(URLTokenUtil.HTTP_METHOD, "POST")
+                        .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusUploadPaths")
+                        .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles")
                 .add(URLTokenUtil.HTTP_METHOD, "POST")
                 .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles")
                 .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
-        allowedApiCalls.add(
-                Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata").add(URLTokenUtil.HTTP_METHOD, "GET")
-                        .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
-                        .add(URLTokenUtil.TIMEOUT, 300));
+        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata")
+                .add(URLTokenUtil.HTTP_METHOD, "GET")
+                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
+                .add(URLTokenUtil.TIMEOUT, 300));
         allowedApiCalls.add(
                 Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET")
                         .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
@@ -3621,8 +3621,9 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
 
         return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build()));
     }
-    
-    /** Requests permissions for a given globus user to upload to the dataset
+
+    /**
+     * Requests permissions for a given globus user to upload to the dataset
      * 
      * @param crc
      * @param datasetId
@@ -3637,15 +3638,14 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
     @Path("{id}/requestGlobusUploadPaths")
     @Consumes(MediaType.APPLICATION_JSON)
     @Produces(MediaType.APPLICATION_JSON)
-    public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, String jsonBody
-    ) throws IOException, ExecutionException, InterruptedException {
-
+    public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+            String jsonBody) throws IOException, ExecutionException, InterruptedException {
 
         logger.info(" ====  (api allowGlobusUpload) jsonBody   ====== " + jsonBody);
 
-
         if (!systemConfig.isGlobusUpload()) {
-            return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
+            return error(Response.Status.SERVICE_UNAVAILABLE,
+                    BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
         }
 
         // -------------------------------------
@@ -3713,12 +3713,12 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP
         }
 
     }
-    
+
     /**
      * Retrieve the parameters and signed URLs required to perform a globus
-     * transfer/download. This api endpoint is expected to be called as a signed callback
-     * after the globus-dataverse app/other app is launched, but it will accept
-     * other forms of authentication.
+     * transfer/download. This api endpoint is expected to be called as a signed
+     * callback after the globus-dataverse app/other app is launched, but it will
+     * accept other forms of authentication.
      * 
      * @param crc
      * @param datasetId
@@ -3726,10 +3726,9 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP
     @GET
     @AuthRequired
     @Path("{id}/globusDownloadParameters")
-    @Consumes(MediaType.APPLICATION_JSON)
     @Produces(MediaType.APPLICATION_JSON)
-    public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @QueryParam(value = "locale") String locale, @QueryParam(value = "downloadId") String downloadId)
-    {
+    public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+            @QueryParam(value = "locale") String locale, @QueryParam(value = "downloadId") String downloadId) {
         // -------------------------------------
         // (1) Get the user from the ContainerRequestContext
         // -------------------------------------
@@ -3750,22 +3749,22 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P
             return wr.getResponse();
         }
         String storeId = dataset.getEffectiveStorageDriverId();
-        //acceptsGlobusTransfers should only be true for an S3 or globus store
-        if(!(GlobusAccessibleStore.acceptsGlobusTransfers(storeId) || GlobusAccessibleStore.allowsGlobusReferences(storeId))) {
+        // acceptsGlobusTransfers should only be true for an S3 or globus store
+        if (!(GlobusAccessibleStore.acceptsGlobusTransfers(storeId)
+                || GlobusAccessibleStore.allowsGlobusReferences(storeId))) {
             return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
         }
 
         JsonObject files = globusService.getFilesForDownload(downloadId);
-        if(files==null) {
+        if (files == null) {
             return notFound(BundleUtil.getStringFromBundle("datasets.api.globusdownloadnotfound"));
         }
-        
+
         URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
 
         boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId);
         String transferEndpoint = null;
-        
-        
+
         JsonObjectBuilder queryParams = Json.createObjectBuilder();
         queryParams.add("queryParameters",
                 Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}"))
@@ -3779,7 +3778,7 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P
             params.add(key, substitutedParams.get(key));
         });
         params.add("managed", Boolean.toString(managed));
-        if(managed) {
+        if (managed) {
             transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId);
             params.add("endpoint", transferEndpoint);
         }
@@ -3787,17 +3786,18 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P
         int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) * 60;
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "monitorGlobusDownload")
-                    .add(URLTokenUtil.HTTP_METHOD, "POST")
-                    .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/monitorGlobusDownload")
-                    .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+                .add(URLTokenUtil.HTTP_METHOD, "POST")
+                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/monitorGlobusDownload")
+                .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusDownload")
                 .add(URLTokenUtil.HTTP_METHOD, "POST")
-                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusDownload?downloadId=" + downloadId)
+                .add(URLTokenUtil.URL_TEMPLATE,
+                        "/api/v1/datasets/{datasetId}/requestGlobusDownload?downloadId=" + downloadId)
                 .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
-        allowedApiCalls.add(
-                Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata").add(URLTokenUtil.HTTP_METHOD, "GET")
-                        .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
-                        .add(URLTokenUtil.TIMEOUT, 300));
+        allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata")
+                .add(URLTokenUtil.HTTP_METHOD, "GET")
+                .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
+                .add(URLTokenUtil.TIMEOUT, 300));
         allowedApiCalls.add(
                 Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET")
                         .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
@@ -3805,8 +3805,10 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P
 
         return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build()));
     }
-    
-    /** Requests permissions for a given globus user to download the specified files the dataset
+
+    /**
+     * Requests permissions for a given globus user to download the specified files
+     * the dataset
      * 
      * @param crc
      * @param datasetId
@@ -3818,7 +3820,7 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P
      */
     @POST
     @AuthRequired
-    @Path("{id}/requestGlobusDownloadPaths")
+    @Path("{id}/requestGlobusDownload")
     @Consumes(MediaType.APPLICATION_JSON)
     @Produces(MediaType.APPLICATION_JSON)
     public Response requestGlobusDownload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
@@ -3884,6 +3886,7 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat
         ArrayList<DataFile> dataFiles = new ArrayList<DataFile>(fileIds.size());
         for (String id : fileIds) {
             boolean published = false;
+            logger.info("File id: " + id);
 
             DataFile df = null;
             try {
@@ -3891,7 +3894,7 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat
             } catch (WrappedResponse wr) {
                 return wr.getResponse();
             }
-            if (df.getOwner() != dataset) {
+            if (!df.getOwner().equals(dataset)) {
                 return badRequest("All files must be in the dataset");
             }
             dataFiles.add(df);
@@ -3942,7 +3945,9 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat
         return ok(files);
     }
 
-    /** Monitors a globus download and removes permissions on the dir/dataset when done
+    /**
+     * Monitors a globus download and removes permissions on the dir/dataset when
+     * done
      * 
      * @param crc
      * @param datasetId
@@ -3955,16 +3960,15 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat
     @POST
     @AuthRequired
     @Path("{id}/monitorGlobusDownload")
-    @Consumes(MediaType.MULTIPART_FORM_DATA)
-    public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,@FormDataParam("jsonData") String jsonData
-    ) throws IOException, ExecutionException, InterruptedException {
-
+    @Consumes(MediaType.APPLICATION_JSON)
+    public Response monitorGlobusDownload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+            String jsonData) throws IOException, ExecutionException, InterruptedException {
 
         logger.info(" ====  (api deleteglobusRule) jsonData   ====== " + jsonData);
 
-
         if (!systemConfig.isGlobusDownload()) {
-            return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
+            return error(Response.Status.SERVICE_UNAVAILABLE,
+                    BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
         }
 
         // -------------------------------------
@@ -3991,7 +3995,6 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara
 
     }
 
-
     /**
      * Add multiple Files to an existing Dataset
      *
@@ -4003,9 +4006,8 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara
     @AuthRequired
     @Path("{id}/addFiles")
     @Consumes(MediaType.MULTIPART_FORM_DATA)
-    public Response addFilesToDataset(@Context ContainerRequestContext crc,
-                                      @PathParam("id") String idSupplied,
-                                      @FormDataParam("jsonData") String jsonData) {
+    public Response addFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied,
+            @FormDataParam("jsonData") String jsonData) {
 
         if (!systemConfig.isHTTPUpload()) {
             return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled"));

From 513d8b7e9ab55e3d13181732e2169a4cb7116004 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 22 Nov 2023 15:03:36 -0500
Subject: [PATCH 230/546] comment formatting

---
 .../dataaccess/GlobusOverlayAccessIO.java     | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index a37cd23ad2b..7ec1e2f9e73 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -37,13 +37,20 @@
  * Globus Overlay Driver
  * 
  * Remote: StorageIdentifier format:
- * <globusDriverId>://<baseStorageIdentifier>//<relativePath> Storage location:
- * <globusendpointId/basepath>/<relPath> Internal StorageIdentifier format:
- * <globusDriverId>://<baseStorageIdentifier> Storage location:
- * <globusEndpointId/basepath>/<dataset authority>/<dataset
- * identifier>/<baseStorageIdentifier>
+ * <globusDriverId>://<baseStorageIdentifier>//<relativePath> 
+ * 
+ * Storage location:
+ * <globusendpointId/basepath>/<relPath> 
+ * 
+ * Internal StorageIdentifier format:
+ * <globusDriverId>://<baseStorageIdentifier> 
+ * 
+ * Storage location:
+ * <globusEndpointId/basepath>/<dataset authority>/<dataset identifier>/<baseStorageIdentifier>
  *
- * transfer and reference endpoint formats: <globusEndpointId/basePath>
+ * transfer and reference endpoint formats: 
+ * <globusEndpointId/basePath>
+ * 
  * reference endpoints separated by a comma
  * 
  */

From 5ee190515c5fa901455e2cbc9587756ddbc325c9 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 22 Nov 2023 15:04:58 -0500
Subject: [PATCH 231/546] change filemap for download, fix order of permission
 delete/cache remove

---
 .../dataverse/globus/GlobusServiceBean.java   | 95 +++++++++++--------
 1 file changed, 56 insertions(+), 39 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 28173aa0074..37ec1afd4ed 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -184,30 +184,21 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi
      * @param globusLogger - a separate logger instance, may be null
      */
     public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) {
-
+        globusLogger.info("Start deleting rule " + ruleId + " for dataset " + dataset.getId());
         if (ruleId.length() > 0) {
             if (dataset != null) {
                 GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
                 if (endpoint != null) {
                     String accessToken = endpoint.getClientToken();
-                    if (globusLogger != null) {
-                        globusLogger.info("Start deleting permissions.");
-                    }
+                    globusLogger.info("Start deleting permissions.");
                     try {
                         URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId()
                                 + "/access/" + ruleId);
                         MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "DELETE", null);
                         if (result.status != 200) {
-                            if (globusLogger != null) {
-                                globusLogger.warning("Cannot delete access rule " + ruleId);
-                            } else {
-                                // When removed due to a cache ejection, we don't have a globusLogger
-                                logger.warning("Cannot delete access rule " + ruleId);
-                            }
+                            globusLogger.warning("Cannot delete access rule " + ruleId);
                         } else {
-                            if (globusLogger != null) {
-                                globusLogger.info("Access rule " + ruleId + " was deleted successfully");
-                            }
+                            globusLogger.info("Access rule " + ruleId + " was deleted successfully");
                         }
                     } catch (MalformedURLException ex) {
                         logger.log(Level.WARNING,
@@ -585,7 +576,7 @@ static class MakeRequestResponse {
             })
 
             .build();
-    
+
     public JsonObject getFilesForDownload(String downloadId) {
         return downloadCache.getIfPresent(downloadId);
     }
@@ -603,7 +594,7 @@ public int setPermissionForDownload(Dataset dataset, String principal) {
 
         return requestPermission(endpoint, dataset, permissions);
     }
-    
+
     // Generates the URL to launch the Globus app
     public String getGlobusAppUrlForDataset(Dataset d) {
         return getGlobusAppUrlForDataset(d, true, null);
@@ -668,9 +659,14 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<FileMeta
 
     public JsonObject getFilesMap(ArrayList<DataFile> dataFiles, Dataset d) {
         JsonObjectBuilder filesBuilder = Json.createObjectBuilder();
-        for(DataFile df: dataFiles) {
-            String fileLocation = DataAccess
-                    .getLocationFromStorageId(df.getStorageIdentifier(), d);
+        for (DataFile df : dataFiles) {
+            String[] fileInfo = DataAccess.getDriverIdAndStorageLocation(DataAccess.getLocationFromStorageId(df.getStorageIdentifier(), d));
+            String driverId = fileInfo[0];
+            String fileLocation= fileInfo[1]; 
+            if(GlobusAccessibleStore.isDataverseManaged(driverId)) {
+                String endpointWithBasePath = GlobusAccessibleStore.getTransferEnpointWithPath(driverId);
+                fileLocation = endpointWithBasePath + "/" + fileLocation;
+            }
             filesBuilder.add(df.getId().toString(), fileLocation);
         }
         return filesBuilder.build();
@@ -711,29 +707,35 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
             globusLogger = logger;
         }
 
-        Thread.sleep(5000);
-
         logger.fine("json: " + JsonUtil.prettyPrint(jsonData));
 
         String taskIdentifier = jsonData.getString("taskIdentifier");
 
         GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
-        // globus task status check
-        GlobusTask task = globusStatusCheck(endpoint, taskIdentifier, globusLogger);
-        String taskStatus = getTaskStatus(task);
-
-        globusLogger.info("Starting an globusUpload ");
-
+        GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger);
         String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw");
         logger.info("Found rule: " + ruleId);
         if (ruleId != null) {
             Long datasetId = rulesCache.getIfPresent(ruleId);
             if (datasetId != null) {
-
-                // Will delete rule
+                // Will not delete rule
                 rulesCache.invalidate(ruleId);
             }
         }
+        
+        //Wait before first check
+        Thread.sleep(5000);
+        // globus task status check
+        task = globusStatusCheck(endpoint, taskIdentifier, globusLogger);
+        String taskStatus = getTaskStatus(task);
+
+        globusLogger.info("Starting a globusUpload ");
+
+        if (ruleId != null) {
+            // Transfer is complete, so delete rule
+            deletePermission(ruleId, dataset, globusLogger);
+
+        }
 
         // If success, switch to an EditInProgress lock - do this before removing the
         // GlobusUpload lock
@@ -983,7 +985,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
             globusLogger = logger;
         }
 
-        globusLogger.info("Starting an globusDownload ");
+        globusLogger.info("Starting a globusDownload ");
 
         JsonObject jsonObject = null;
         try {
@@ -995,19 +997,34 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
         }
 
         String taskIdentifier = jsonObject.getString("taskIdentifier");
-        String ruleId = "";
 
-        try {
-            jsonObject.getString("ruleId");
-        } catch (NullPointerException npe) {
-            globusLogger.log(Level.SEVERE, "Error parsing dataset json. No ruleId: {0}", jsonData);
-        }
+        GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+        logger.info("Endpoint path: " + endpoint.getBasePath());
 
+        // If the rules_cache times out, the permission will be deleted. Presumably that
+        // doesn't affect a
         // globus task status check
-        GlobusTask task = globusStatusCheck(null, taskIdentifier, globusLogger);
+        GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger);
+        String ruleId = getRuleId(endpoint, task.getOwner_id(), "r");
+        if (ruleId != null) {
+            logger.info("Found rule: " + ruleId);
+            Long datasetId = rulesCache.getIfPresent(ruleId);
+            if (datasetId != null) {
+                logger.info("Deleting from cache: rule: " + ruleId);
+                // Will not delete rule
+                rulesCache.invalidate(ruleId);
+            }
+        } else {
+            // Something is wrong - the rule should be there (a race with the cache timing
+            // out?)
+            logger.warning("ruleId not found for taskId: " + taskIdentifier);
+        }
+        task = globusStatusCheck(endpoint, taskIdentifier, globusLogger);
         String taskStatus = getTaskStatus(task);
-
-        if (ruleId.length() > 0) {
+        
+        //Transfer is done (success or failure) so delete the rule
+        if (ruleId != null) {
+            logger.info("Deleting: rule: " + ruleId);
             deletePermission(ruleId, dataset, globusLogger);
         }
 
@@ -1087,7 +1104,7 @@ private String getTaskStatus(GlobusTask task) {
         if (task != null) {
             status = task.getStatus();
             if (status != null) {
-                // The task is in progress.
+                // The task is in progress but is not ok or queued
                 if (status.equalsIgnoreCase("ACTIVE")) {
                     status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description();
                 } else {

From c6ff2c7cefc939bb14ba9a30df2712c7ff264674 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 22 Nov 2023 15:40:38 -0500
Subject: [PATCH 232/546] fix timeouts, use addFiles call for remote reference
 case

---
 .../harvard/iq/dataverse/api/Datasets.java    | 19 +++++++++++++------
 .../dataverse/globus/GlobusServiceBean.java   |  2 +-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index f1bfc0ed69e..7587c1b6e06 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3599,25 +3599,32 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
         } else {
             params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths);
         }
-        int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) * 60;
+        int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class);
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths";
         allowedApiCalls.add(
                 Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName).add(URLTokenUtil.HTTP_METHOD, "POST")
                         .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusUploadPaths")
                         .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+        if(managed) {
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles")
                 .add(URLTokenUtil.HTTP_METHOD, "POST")
                 .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles")
                 .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+        } else {
+            allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addFiles")
+                    .add(URLTokenUtil.HTTP_METHOD, "POST")
+                    .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addFiles")
+                    .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+        }
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata")
                 .add(URLTokenUtil.HTTP_METHOD, "GET")
                 .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
-                .add(URLTokenUtil.TIMEOUT, 300));
+                .add(URLTokenUtil.TIMEOUT, 5));
         allowedApiCalls.add(
                 Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET")
                         .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
-                        .add(URLTokenUtil.TIMEOUT, 300));
+                        .add(URLTokenUtil.TIMEOUT, 5));
 
         return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build()));
     }
@@ -3783,7 +3790,7 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P
             params.add("endpoint", transferEndpoint);
         }
         params.add("files", files);
-        int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) * 60;
+        int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class);
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "monitorGlobusDownload")
                 .add(URLTokenUtil.HTTP_METHOD, "POST")
@@ -3797,11 +3804,11 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata")
                 .add(URLTokenUtil.HTTP_METHOD, "GET")
                 .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
-                .add(URLTokenUtil.TIMEOUT, 300));
+                .add(URLTokenUtil.TIMEOUT, 5));
         allowedApiCalls.add(
                 Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET")
                         .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
-                        .add(URLTokenUtil.TIMEOUT, 300));
+                        .add(URLTokenUtil.TIMEOUT, 5));
 
         return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build()));
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 37ec1afd4ed..b2ebea14835 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -569,7 +569,7 @@ static class MakeRequestResponse {
     // Single cache of open rules/permission requests
     private final Cache<String, JsonObject> downloadCache = Caffeine.newBuilder()
             .expireAfterWrite(
-                    Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) + 300, ChronoUnit.MINUTES))
+                    Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES))
             .scheduler(Scheduler.systemScheduler()).evictionListener((downloadId, datasetId, cause) -> {
                 // Delete downloads that expire
                 logger.info("Download for " + downloadId + " expired");

From 9e8081523e28b21bff400369f70ec97d861d42fb Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sat, 25 Nov 2023 14:56:58 -0500
Subject: [PATCH 233/546] another iteration of the flyway script for
 calculating the initial sizes of all dvobjects. (#8549)

---
 .../V6.0.0.3__8549-collection-quotas.sql      | 53 +++++++++++--------
 1 file changed, 30 insertions(+), 23 deletions(-)

diff --git a/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql
index f74d9bebe30..5644d06a682 100644
--- a/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql
+++ b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql
@@ -1,9 +1,5 @@
--- Storage size column added:
-ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS storagesize BIGINT;
-
--- (work in progress! the table structure may change/the column may be moved out into
--- its own table. but the mechanics of the recursion are working)
-
+-- Adding the storagesize column to the datafile table:
+ALTER TABLE datafile ADD COLUMN IF NOT EXISTS storagesize BIGINT;
 -- The somewhat convoluted queries below populate the storage sizes for the entire
 -- DvObject tree, fast. It IS possible, to do it all with one recursive PostgresQL
 -- query, that will crawl the tree from the leaves (DataFiles) up and add up the
@@ -16,55 +12,66 @@ ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS storagesize BIGINT;
 -- have sub-collections. To take any sub-collections into account we are then running
 -- a recursive query - but then we only need to run it on the tree of Collections only,
 -- which should make it manageably fast on any real life instance. 
-
-UPDATE dvobject SET storagesize=0;
+----UPDATE dvobject SET storagesize=0;
 -- For datafiles, the storage size = main file size by default:
 -- (we are excluding any harvested files)
-UPDATE dvobject SET storagesize=COALESCE(f.filesize,0) FROM datafile f, dataset d WHERE f.id = dvobject.id AND dvobject.owner_id = d.id AND d.harvestingclient_id IS null;
+UPDATE datafile SET storagesize=COALESCE(filesize,0) FROM dataset d, dvobject o WHERE datafile.id = o.id AND o.owner_id = d.id AND d.harvestingclient_id IS null;
 -- ... but for ingested tabular files the size of the saved original needs to be added, since
 -- those also take space:
 -- (should be safe to assume that there are no *harvested ingested* files)
-UPDATE dvobject SET storagesize=dvobject.storagesize + COALESCE(datatable.originalFileSize,0) FROM datatable WHERE datatable.datafile_id = dvobject.id;
+UPDATE datafile SET storagesize=datafile.storagesize + COALESCE(datatable.originalFileSize,0) FROM datatable WHERE datatable.datafile_id = datafile.id;
+-- *Temporarily* add this "storagesize" column to the DvObject table.
+-- It will be used to calculate the storage sizes for all 3 types of DvObjects;
+-- a recursive query is used for this purpose further down, and it is convenient
+-- to use a column directly in the same table. 
+-- Once calculated, the values will will be moved to the permanent locations
+-- in the DataFile and StorageUse tables. 
+ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS tempStorageSize BIGINT;
 -- Now we can calculate storage sizes of each individual dataset (a simple sum
 -- of the storage sizes of all the files in the dataset):
 -- (excluding the harvested datasets; this is less important, since there should be
 -- significantly fewer datasets than files, but might as well)
-UPDATE dvobject SET storagesize=o.combinedStorageSize
-FROM (SELECT datasetobject.id, SUM(fileobject.storagesize) AS combinedStorageSize
-FROM dvobject fileobject, dvobject datasetobject
+UPDATE dvobject SET tempStorageSize=o.combinedStorageSize
+FROM (SELECT datasetobject.id, SUM(file.storagesize) AS combinedStorageSize
+FROM dvobject fileobject, dvobject datasetobject, datafile file
 WHERE fileobject.owner_id = datasetobject.id
+AND fileobject.id = file.id
 GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null;
 -- ... and then we can repeat the same for collections, by setting the storage size
 -- to the sum of the storage sizes of the datasets *directly* in each collection:
 -- (no attemp is made yet to recursively count the sizes all the chilld sub-collections)
-UPDATE dvobject SET storagesize=o.combinedStorageSize
-FROM (SELECT collectionobject.id, SUM(datasetobject.storagesize) AS combinedStorageSize
+UPDATE dvobject SET tempStorageSize=o.combinedStorageSize
+FROM (SELECT collectionobject.id, SUM(datasetobject.tempStorageSize) AS combinedStorageSize
 FROM dvobject datasetobject, dvobject collectionobject
 WHERE datasetobject.owner_id = collectionobject.id
-AND datasetobject.storagesize IS NOT null 
+AND datasetobject.tempStorageSize IS NOT null 
 GROUP BY collectionobject.id) o WHERE o.id = dvobject.id AND dvobject.dtype='Dataverse';
 
 -- And now we will update the storage sizes of all the Collection ("Dataverse") objects
 -- that contain sub-collections, *recursively*, to add their sizes to the totals:
-WITH RECURSIVE treestorage (id, owner_id, storagesize, dtype) AS
+WITH RECURSIVE treestorage (id, owner_id, tempStorageSize, dtype) AS
 (
     -- All dataverses:
-    SELECT id, owner_id, storagesize, dtype
+    SELECT id, owner_id, tempStorageSize, dtype
     FROM dvobject
     WHERE dtype = 'Dataverse'
 
     UNION
 
     -- Recursive Member:
-    SELECT dvobject.id, treestorage.owner_id, dvobject.storagesize, treestorage.dtype
+    SELECT dvobject.id, treestorage.owner_id, dvobject.tempStorageSize, treestorage.dtype
     FROM treestorage, dvobject
     WHERE treestorage.id = dvobject.owner_id
     AND dvobject.dtype = 'Dataverse'
 )
-
-UPDATE dvobject SET storagesize=storagesize+(SELECT COALESCE(SUM(storagesize),0)
+UPDATE dvobject SET tempStorageSize=tempStorageSize+(SELECT COALESCE(SUM(tempStorageSize),0)
 FROM treestorage WHERE owner_id=dvobject.id)
---FROM treestorage ts
---WHERE ts.owner_id=dvobject.id
 WHERE dvobject.dtype = 'Dataverse'
 AND dvobject.id IN (SELECT owner_id FROM treestorage WHERE owner_id IS NOT null);
+
+-- And, finally, we can move these calculated storage sizes of datasets and
+-- collection to the dedicated new table StorageUse:
+INSERT INTO storageuse (dvobjectcontainer_id,sizeinbytes) (SELECT id, tempstoragesize FROM dvobject WHERE dtype = 'Dataverse');
+INSERT INTO storageuse (dvobjectcontainer_id,sizeinbytes) (SELECT d.id, o.tempstoragesize FROM dvobject o, dataset d WHERE o.id = d.id AND d.harvestingclient_id IS NULL);
+-- ... and drop the temporary column we added to DvObject earlier:
+ALTER TABLE dvobject DROP column tempStorageSize

From 1b95665e37f18871c35e48b36a32191975e024af Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sat, 25 Nov 2023 19:22:11 -0500
Subject: [PATCH 234/546] new object that defines the size limits for the
 current upload session, replacing the test class used during the proof of
 concept stage. (#8549)

---
 .../storageuse/UploadSessionQuotaLimit.java   | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java
new file mode 100644
index 00000000000..06bbe986f70
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java
@@ -0,0 +1,42 @@
+/*
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
+ */
+package edu.harvard.iq.dataverse.storageuse;
+
+/**
+ *
+ * @author landreev
+ */
+public class UploadSessionQuotaLimit {
+        private Long totalAllocatedInBytes = 0L; 
+        private Long totalUsageInBytes = 0L;
+        
+        public UploadSessionQuotaLimit(Long allocated, Long used) {
+            this.totalAllocatedInBytes = allocated;
+            this.totalUsageInBytes = used; 
+        }
+        
+        public Long getTotalAllocatedInBytes() {
+            return totalAllocatedInBytes;
+        }
+        
+        public void setTotalAllocatedInBytes(Long totalAllocatedInBytes) {
+            this.totalAllocatedInBytes = totalAllocatedInBytes;
+        }
+        
+        public Long getTotalUsageInBytes() {
+            return totalUsageInBytes;
+        }
+        
+        public void setTotalUsageInBytes(Long totalUsageInBytes) {
+            this.totalUsageInBytes = totalUsageInBytes;
+        }
+        
+        public Long getRemainingQuotaInBytes() {
+            if (totalUsageInBytes > totalAllocatedInBytes) {
+                return 0L; 
+            }
+            return totalAllocatedInBytes - totalUsageInBytes;
+        }
+    }
\ No newline at end of file

From 627a7b1974084a02922b6c63b5474a8540235d5e Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sat, 25 Nov 2023 20:22:40 -0500
Subject: [PATCH 235/546] a stub for the release note (#8549)

---
 doc/release-notes/8549-collection-quotas.md | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 doc/release-notes/8549-collection-quotas.md

diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md
new file mode 100644
index 00000000000..8af222f09e0
--- /dev/null
+++ b/doc/release-notes/8549-collection-quotas.md
@@ -0,0 +1,2 @@
+This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS.
+

From a5196df759522aa2e30ee7ed3b52b24bfd84c301 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 26 Nov 2023 18:06:34 -0500
Subject: [PATCH 236/546] All the bits and pieces that haven't been committed
 yet. (#8549)

---
 doc/release-notes/8549-collection-quotas.md   |  1 +
 doc/sphinx-guides/source/api/native-api.rst   | 25 +++++
 .../edu/harvard/iq/dataverse/DataFile.java    | 17 ++++
 .../iq/dataverse/DataFileServiceBean.java     | 93 ++++++++-----------
 .../edu/harvard/iq/dataverse/Dataset.java     | 10 ++
 .../edu/harvard/iq/dataverse/Dataverse.java   |  7 +-
 .../edu/harvard/iq/dataverse/DvObject.java    | 25 +++--
 .../iq/dataverse/EditDatafilesPage.java       | 34 ++++---
 .../iq/dataverse/EjbDataverseEngine.java      | 10 ++
 .../harvard/iq/dataverse/api/Dataverses.java  | 18 ++++
 .../datadeposit/MediaResourceManagerImpl.java |  6 +-
 .../datasetutility/AddReplaceFileHelper.java  |  5 +-
 .../engine/command/CommandContext.java        |  3 +
 .../impl/CreateNewDataFilesCommand.java       | 31 ++++---
 .../command/impl/DeleteDataFileCommand.java   | 14 +++
 .../impl/GetCollectionQuotaCommand.java       | 16 +++-
 .../impl/GetCollectionStorageUseCommand.java  | 49 ++++++++++
 .../command/impl/UningestFileCommand.java     | 13 +++
 .../dataverse/ingest/IngestServiceBean.java   | 47 ++++++++--
 .../iq/dataverse/storageuse/StorageQuota.java |  4 +-
 .../iq/dataverse/storageuse/StorageUse.java   | 10 +-
 .../storageuse/StorageUseServiceBean.java     | 45 +++++++--
 src/main/java/propertyFiles/Bundle.properties |  3 +-
 .../V6.0.0.3__8549-collection-quotas.sql      | 63 ++++++-------
 .../dataverse/engine/TestCommandContext.java  |  6 ++
 25 files changed, 399 insertions(+), 156 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java

diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md
index 8af222f09e0..e3bf37425ea 100644
--- a/doc/release-notes/8549-collection-quotas.md
+++ b/doc/release-notes/8549-collection-quotas.md
@@ -1,2 +1,3 @@
 This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS.
+Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the deployments. 
 
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 7ae1194ebc6..1b56ee132e8 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -754,6 +754,31 @@ The following attributes are supported:
 * ``affiliation`` Affiliation
 * ``filePIDsEnabled`` ("true" or "false") Restricted to use by superusers and only when the :ref:`:AllowEnablingFilePIDsPerCollection <:AllowEnablingFilePIDsPerCollection>` setting is true. Enables or disables registration of file-level PIDs in datasets within the collection (overriding the instance-wide setting).
 
+.. _collection-storage-quotas:
+  
+Collection Storage Quotas
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: 
+
+  curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota"
+
+Will output the storage quota allocated (in bytes), or a message indicating that the quota is not defined for the collection.
+
+To set or change the storage allocation quota for a collection:
+
+.. code-block:: 
+
+  curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota/$SIZE_IN_BYTES"
+
+To delete a storage quota configured for a collection:
+
+.. code-block:: 
+
+  curl -X DELETE -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota"
+
+
+
 
 Datasets
 --------
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
index 407282a5372..2770118d41b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
@@ -640,6 +640,23 @@ public String getFriendlySize() {
             return BundleUtil.getStringFromBundle("file.sizeNotAvailable");
         }
     }
+    
+    /**
+     * Experimental - record the pre-calculated "storage size" of the file, and 
+     * all its associated auxiliary file objects:
+
+    @Column(nullable = true)
+    private Long storageSize;
+    
+
+    public Long getStorageSize() {
+        return storageSize; 
+    }
+    
+    public void setStorageSize(Long storageSize) {
+        this.storageSize = storageSize; 
+    }
+    * */
 
     public boolean isRestricted() {
         return restricted;
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index 332a39912d2..42c47505b70 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -8,6 +8,9 @@
 import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
 import edu.harvard.iq.dataverse.search.SolrSearchResult;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
 import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -41,8 +44,6 @@
  *
  * @author Leonid Andreev
  * 
- * Basic skeleton of the new DataFile service for DVN 4.0
- * 
  */
 
 @Stateless
@@ -66,6 +67,9 @@ public class DataFileServiceBean implements java.io.Serializable {
     
     @EJB SystemConfig systemConfig;
     
+    @EJB
+    StorageUseServiceBean storageUseService; 
+    
     @PersistenceContext(unitName = "VDCNet-ejbPU")
     private EntityManager em;
     
@@ -139,39 +143,6 @@ public class DataFileServiceBean implements java.io.Serializable {
      */
     public static final String MIME_TYPE_PACKAGE_FILE = "application/vnd.dataverse.file-package";
     
-    public class UserStorageQuota {
-        private Long totalAllocatedInBytes = 0L; 
-        private Long totalUsageInBytes = 0L;
-        
-        public UserStorageQuota(Long allocated, Long used) {
-            this.totalAllocatedInBytes = allocated;
-            this.totalUsageInBytes = used; 
-        }
-        
-        public Long getTotalAllocatedInBytes() {
-            return totalAllocatedInBytes;
-        }
-        
-        public void setTotalAllocatedInBytes(Long totalAllocatedInBytes) {
-            this.totalAllocatedInBytes = totalAllocatedInBytes;
-        }
-        
-        public Long getTotalUsageInBytes() {
-            return totalUsageInBytes;
-        }
-        
-        public void setTotalUsageInBytes(Long totalUsageInBytes) {
-            this.totalUsageInBytes = totalUsageInBytes;
-        }
-        
-        public Long getRemainingQuotaInBytes() {
-            if (totalUsageInBytes > totalAllocatedInBytes) {
-                return 0L; 
-            }
-            return totalAllocatedInBytes - totalUsageInBytes;
-        }
-    }
-    
     public DataFile find(Object pk) {
         return em.find(DataFile.class, pk);
     }   
@@ -1396,28 +1367,38 @@ public Embargo findEmbargo(Long id) {
         return d.getEmbargo();
     }
     
-    public Long getStorageUsageByCreator(AuthenticatedUser user) {
-        Query query = em.createQuery("SELECT SUM(o.filesize) FROM DataFile o WHERE o.creator.id=:creatorId");
-        
-        try {
-            Long totalSize = (Long)query.setParameter("creatorId", user.getId()).getSingleResult();
-            logger.info("total size for user: "+totalSize);
-            return totalSize == null ? 0L : totalSize; 
-        } catch (NoResultException nre) { // ?
-            logger.info("NoResultException, returning 0L");
-            return 0L;
+    /**
+     * Checks if the supplied DvObjectContainer (Dataset or Collection; although
+     * only collection-level storage quotas are officially supported as if now)
+     * has a quota configured, and if not, keeps checking if any of the direct
+     * ancestor Collections further up have a configured quota. If it finds one, 
+     * it will retrieve the current total content size for that specific ancestor 
+     * dvObjectContainer and use it to define the quota limit for the upload
+     * session in progress. 
+     * 
+     * @param parent - DvObjectContainer, Dataset or Collection
+     * @return upload session size limit spec, or null if quota not defined on 
+     * any of the ancestor DvObjectContainers
+     */
+    public UploadSessionQuotaLimit getUploadSessionQuotaLimit(DvObjectContainer parent) {
+        DvObjectContainer testDvContainer = parent; 
+        StorageQuota quota = testDvContainer.getStorageQuota();
+        while (quota == null && testDvContainer.getOwner() != null) {
+            testDvContainer = testDvContainer.getOwner();
+            quota = testDvContainer.getStorageQuota();
+            if (quota != null) {
+                break;
+            }
+        }    
+        if (quota == null || quota.getAllocation() == null) {
+            return null; 
         }
-    }
-    
-    public UserStorageQuota getUserStorageQuota(AuthenticatedUser user, Dataset dataset) {
-        // this is for testing only - one pre-set, installation-wide quota limit
-        // for everybody:
-        Long totalAllocated = systemConfig.getTestStorageQuotaLimit();
-        // again, this is for testing only - we are only counting the total size
-        // of all the files created by this user; it will likely be a much more 
-        // complex calculation in real life applications:
-        Long totalUsed = getStorageUsageByCreator(user); 
         
-        return new UserStorageQuota(totalAllocated, totalUsed);
+        // Note that we are checking the recorded storage use not on the 
+        // immediate parent necessarily, but on the specific ancestor 
+        // DvObjectContainer on which the storage quota is defined:
+        Long currentSize = storageUseService.findStorageSizeByDvContainerId(testDvContainer.getId()); 
+        
+        return new UploadSessionQuotaLimit(quota.getAllocation(), currentSize);
     }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
index 245bdf0efd2..a2f560bc959 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
@@ -35,6 +35,7 @@
 import jakarta.persistence.TemporalType;
 
 import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.storageuse.StorageUse;
 import edu.harvard.iq.dataverse.util.StringUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 
@@ -189,6 +190,10 @@ public void setTemplate(Template template) {
     }
 
     public Dataset() {
+        this(false);
+    }
+    
+    public Dataset(boolean isHarvested) {
         DatasetVersion datasetVersion = new DatasetVersion();
         datasetVersion.setDataset(this);
         datasetVersion.setVersionState(DatasetVersion.VersionState.DRAFT);
@@ -196,6 +201,11 @@ public Dataset() {
         datasetVersion.setVersionNumber((long) 1);
         datasetVersion.setMinorVersionNumber((long) 0);
         versions.add(datasetVersion);
+        
+        if (!isHarvested) {
+            StorageUse storageUse = new StorageUse(this); 
+            this.setStorageUse(storageUse);
+        }
     }
     
     /**
diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java
index 682c1dc6744..c1de9d63410 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java
@@ -3,6 +3,7 @@
 import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
 import edu.harvard.iq.dataverse.authorization.DataverseRole;
 import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch;
+import edu.harvard.iq.dataverse.storageuse.StorageUse;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 
@@ -103,7 +104,11 @@ public enum DataverseType {
      * dataverses.
      */
     protected boolean permissionRoot;
-
+    
+    public Dataverse() {
+        StorageUse storageUse = new StorageUse(this); 
+        this.setStorageUse(storageUse);
+    }
     
     public DataverseType getDataverseType() {
         return dataverseType;
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
index b86fabd0a07..515d9f9f153 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -157,10 +157,7 @@ public String visit(DataFile df) {
     private String identifier;
     
     private boolean identifierRegistered;
-    
-    @Column(nullable = true)
-    private Long storageSize; 
-    
+        
     private transient GlobalId globalId = null;
     
     @OneToMany(mappedBy = "dvObject", cascade = CascadeType.ALL, orphanRemoval = true)
@@ -185,7 +182,7 @@ public void setAlternativePersistentIndentifiers(Set<AlternativePersistentIdenti
     @OneToOne(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true)
     private StorageQuota storageQuota;
     
-    @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE}, orphanRemoval=true)
+    @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE, CascadeType.PERSIST}, orphanRemoval=true)
     private StorageUse storageUse;
     
     
@@ -470,14 +467,6 @@ public void setStorageIdentifier(String storageIdentifier) {
         this.storageIdentifier = storageIdentifier;
     }
     
-    public Long getStorageSize() {
-        return storageSize; 
-    }
-    
-    public void setStorageSize(Long storageSize) {
-        this.storageSize = storageSize; 
-    }
-    
     public StorageQuota getStorageQuota() {
         return storageQuota;
     }
@@ -495,4 +484,14 @@ public void setStorageQuota(StorageQuota storageQuota) {
     
     @OneToMany(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true)
     List<RoleAssignment> roleAssignments;
+    
+    /**
+     * Should only be used in constructors for DvObjectContainers (Datasets and 
+     * Collections), to make sure new entries are created and persisted in the 
+     * database StorageUse table for every DvObject container we create.
+     * @param storageUse 
+     */
+    public void setStorageUse(StorageUse storageUse) {
+        this.storageUse = storageUse;
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index c40399e160f..a3256c9159f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -2,7 +2,6 @@
 
 import edu.harvard.iq.dataverse.provenance.ProvPopupFragmentBean;
 import edu.harvard.iq.dataverse.DataFile.ChecksumType;
-import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
 import edu.harvard.iq.dataverse.api.AbstractApiBean;
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.Permission;
@@ -38,6 +37,7 @@
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.Setting;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.JsfHelper;
 import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -206,7 +206,7 @@ public enum Referrer {
     private final int NUMBER_OF_SCROLL_ROWS = 25;
 
     private DataFile singleFile = null;
-    private UserStorageQuota userStorageQuota = null; 
+    private UploadSessionQuotaLimit uploadSessionQuota = null; 
 
     public DataFile getSingleFile() {
         return singleFile;
@@ -359,7 +359,7 @@ public String getHumanMaxTotalUploadSizeInBytes() {
     }
     
     public boolean isStorageQuotaEnforced() {
-        return userStorageQuota != null; 
+        return uploadSessionQuota != null; 
     }
 
     public Long getMaxIngestSizeInBytes() {
@@ -530,8 +530,10 @@ public String initCreateMode(String modeToken, DatasetVersion version, MutableBo
 
         this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId());
         if (systemConfig.isStorageQuotasEnforced()) {
-            this.userStorageQuota = datafileService.getUserStorageQuota((AuthenticatedUser) session.getUser(), dataset);
-            this.maxTotalUploadSizeInBytes = userStorageQuota.getRemainingQuotaInBytes();
+            this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset);
+            if (this.uploadSessionQuota != null) {
+                this.maxTotalUploadSizeInBytes = uploadSessionQuota.getRemainingQuotaInBytes();
+            }
         } else {
             this.maxTotalUploadSizeInBytes = null; 
         }
@@ -547,7 +549,7 @@ public String initCreateMode(String modeToken, DatasetVersion version, MutableBo
     }
     
     public boolean isQuotaExceeded() {
-        return systemConfig.isStorageQuotasEnforced() && userStorageQuota != null && userStorageQuota.getRemainingQuotaInBytes() == 0;
+        return systemConfig.isStorageQuotasEnforced() && uploadSessionQuota != null && uploadSessionQuota.getRemainingQuotaInBytes() == 0;
     }
 
     public String init() {
@@ -592,8 +594,10 @@ public String init() {
         clone = workingVersion.cloneDatasetVersion();
         this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId());
         if (systemConfig.isStorageQuotasEnforced()) {
-            this.userStorageQuota = datafileService.getUserStorageQuota((AuthenticatedUser) session.getUser(), dataset);
-            this.maxTotalUploadSizeInBytes = userStorageQuota.getRemainingQuotaInBytes();
+            this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset);
+            if (this.uploadSessionQuota != null) {
+                this.maxTotalUploadSizeInBytes = uploadSessionQuota.getRemainingQuotaInBytes();
+            }
         }
         this.maxIngestSizeInBytes = systemConfig.getTabularIngestSizeLimit();
         this.humanPerFormatTabularLimits = populateHumanPerFormatTabularLimits();
@@ -1098,7 +1102,11 @@ public String save() {
             }
 
             // Try to save the NEW files permanently: 
-            List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true);
+            // ... but first, refresh the session quota specifiction, if defined:
+            if (systemConfig.isStorageQuotasEnforced()) {
+                this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset);
+            }
+            List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true, uploadSessionQuota);
             
             // reset the working list of fileMetadatas, as to only include the ones
             // that have been added to the version successfully: 
@@ -1529,7 +1537,7 @@ public void handleDropBoxUpload(ActionEvent event) {
                 // zip file.
                 //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream");
                 //CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", null, null, systemConfig);
-                Command<CreateDataFileResult> cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, dropBoxStream, fileName, "application/octet-stream", null, userStorageQuota, null);
+                Command<CreateDataFileResult> cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, dropBoxStream, fileName, "application/octet-stream", null, uploadSessionQuota, null);
                 CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
                 datafiles = createDataFilesResult.getDataFiles();
                 Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage));
@@ -2068,9 +2076,9 @@ public void handleFileUpload(FileUploadEvent event) throws IOException {
                 // dataset that does not yet exist in the database. We must 
                 // use the version of the Create New Files constructor that takes
                 // the parent Dataverse as the extra argument:
-                cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, userStorageQuota, null, null, null, workingVersion.getDataset().getOwner());
+                cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, uploadSessionQuota, null, null, null, workingVersion.getDataset().getOwner());
             } else {
-                cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, userStorageQuota, null);
+                cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, uploadSessionQuota, null);
             }
             CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
 
@@ -2208,7 +2216,7 @@ public void handleExternalUpload() {
                 
                 try {
   
-                    Command<CreateDataFileResult> cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, null, fileName, contentType, fullStorageIdentifier, userStorageQuota, checksumValue, checksumType, fileSize, parent);
+                    Command<CreateDataFileResult> cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, null, fileName, contentType, fullStorageIdentifier, uploadSessionQuota, checksumValue, checksumType, fileSize, parent);
                     CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
                     datafiles = createDataFilesResult.getDataFiles();
                     Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage));
diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java
index bad8903c091..5a689c06019 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java
@@ -31,6 +31,7 @@
 import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
 import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.ConstraintViolationUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -184,6 +185,9 @@ public class EjbDataverseEngine {
     @EJB
     ConfirmEmailServiceBean confirmEmailService;
     
+    @EJB
+    StorageUseServiceBean storageUseService; 
+    
     @EJB
     EjbDataverseEngineInner innerEngine;
     
@@ -528,6 +532,12 @@ public DataverseLinkingServiceBean dvLinking() {
                 public DatasetLinkingServiceBean dsLinking() {
                     return dsLinking;
                 }
+                
+                @Override
+                public StorageUseServiceBean storageUse() {
+                    return storageUseService;
+                }
+                
                 @Override
                 public DataverseEngine engine() {
                     return new DataverseEngine() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
index 6539e42b07d..b1e7559f858 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
@@ -46,6 +46,7 @@
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionQuotaCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionStorageUseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand;
 import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand;
@@ -979,6 +980,23 @@ public Response deleteCollectionQuota(@Context ContainerRequestContext crc, @Pat
         }
     }
     
+    /**
+     *
+     * @param crc
+     * @param identifier
+     * @return
+     * @throws edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse 
+     * @todo: add an optional parameter that would force the recorded storage use
+     * to be recalculated (or should that be a POST version of this API?)
+     */
+    @GET
+    @AuthRequired
+    @Path("{identifier}/storage/use")
+    public Response getCollectionStorageUse(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier) throws WrappedResponse {
+        return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.storage.use"),
+                execCommand(new GetCollectionStorageUseCommand(req, findDataverseOrDie(identifier))))), getRequestUser(crc));
+    }
+
     @GET
     @AuthRequired
     @Path("{identifier}/roles")
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
index 15838a09456..fcbb1315c24 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
@@ -11,7 +11,6 @@
 import edu.harvard.iq.dataverse.PermissionServiceBean;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException;
-import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
 import edu.harvard.iq.dataverse.engine.command.Command;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
@@ -19,6 +18,7 @@
 import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand;
 import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.ConstraintViolationUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -307,9 +307,9 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au
 
             try {
                 //CreateDataFileResult createDataFilesResponse =  FileUtil.createDataFiles(editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, null, systemConfig);
-                UserStorageQuota quota = null; 
+                UploadSessionQuotaLimit quota = null; 
                 if (systemConfig.isStorageQuotasEnforced()) {
-                    quota = dataFileService.getUserStorageQuota(user, dataset);
+                    quota = dataFileService.getUploadSessionQuotaLimit(dataset);
                 }
                 Command<CreateDataFileResult> cmd = new CreateNewDataFilesCommand(dvReq, editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, quota, null);
                 CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index d44388f39f7..0143fced87c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -61,6 +61,7 @@
 import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
 import org.apache.commons.io.IOUtils;
 import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDataFilesCommand;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
 import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
 
 /**
@@ -1212,9 +1213,9 @@ private boolean step_030_createNewFilesViaIngest(){
                     this.newCheckSumType,
                     this.systemConfig);*/
             
-            DataFileServiceBean.UserStorageQuota quota = null; 
+            UploadSessionQuotaLimit quota = null; 
             if (systemConfig.isStorageQuotasEnforced()) {
-                quota = fileService.getUserStorageQuota(dvRequest.getAuthenticatedUser(), dataset);
+                quota = fileService.getUploadSessionQuotaLimit(dataset);
             }
             Command<CreateDataFileResult> cmd = new CreateNewDataFilesCommand(dvRequest, workingVersion, newFileInputStream, newFileName, newFileContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType);
             CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java
index 55a375acb6c..f74c1222bb0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java
@@ -39,6 +39,7 @@
 import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
 import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean;
 import java.util.Stack;
@@ -126,6 +127,8 @@ public interface CommandContext {
     public UserNotificationServiceBean notifications();
 
     public AuthenticationServiceBean authentication();
+    
+    public StorageUseServiceBean storageUse();
 
     public SystemConfig systemConfig();
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
index 7a32e398b8f..3a21345448b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
@@ -13,8 +13,8 @@
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException;
 import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper;
-import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
 import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
 import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.FileUtil;
@@ -74,7 +74,7 @@ public class CreateNewDataFilesCommand extends AbstractCommand<CreateDataFileRes
     private final InputStream inputStream;
     private final String fileName;
     private final String suppliedContentType; 
-    private final UserStorageQuota quota;
+    private final UploadSessionQuotaLimit quota;
     // parent Dataverse must be specified when the command is called on Create 
     // of a new dataset that does not exist in the database yet (for the purposes
     // of authorization - see getRequiredPermissions() below):
@@ -85,18 +85,18 @@ public class CreateNewDataFilesCommand extends AbstractCommand<CreateDataFileRes
     private DataFile.ChecksumType newCheckSumType;
     private final Long newFileSize;
 
-    public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UserStorageQuota quota, String newCheckSum) {
+    public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum) {
         this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, null);
     }
     
-    public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UserStorageQuota quota, String newCheckSum, DataFile.ChecksumType newCheckSumType) {
+    public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum, DataFile.ChecksumType newCheckSumType) {
         this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType, null, null);
     }
     
     // This version of the command must be used when files are created in the 
     // context of creating a brand new dataset (from the Add Dataset page):
     
-    public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UserStorageQuota quota, String newCheckSum, DataFile.ChecksumType newCheckSumType, Long newFileSize, Dataverse dataverse) {
+    public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UploadSessionQuotaLimit quota, String newCheckSum, DataFile.ChecksumType newCheckSumType, Long newFileSize, Dataverse dataverse) {
         super(aRequest, dataverse);
         
         this.version = version;
@@ -701,7 +701,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
         
         DataFile datafile = FileUtil.createSingleDataFile(version, newFile, newStorageIdentifier, fileName, finalType, newCheckSumType, newCheckSum);
 
-        if (datafile != null && ((newFile != null) || (newStorageIdentifier != null))) {
+        if (datafile != null) {
 
             if (warningMessage != null) {
                 createIngestFailureReport(datafile, warningMessage);
@@ -712,14 +712,19 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
             }
             datafiles.add(datafile);
 
-            // Update quota (not necessary in the context of direct upload, will be done later)
-            // On a second thought, @todo: we should delay updating the storage size/quotas
-            // until the file is saved and finalized, for all upload cases!
-            if (newFile != null) {
-                if (fileSize > 0 && quota != null) {
-                    quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize);
-                }
+            // Update the quota definition for the *current upload session*
+            // This is relevant for the uploads going through the UI page 
+            // (where there may be an appreciable amount of time between the user
+            // uploading the files and clicking "save". The file size should be 
+            // available here for both direct and local uploads via the UI. 
+            // It is not yet available if this is direct-via-API - but 
+            // for API uploads the quota check will be enforced during the final 
+            // save. 
+            if (fileSize > 0 && quota != null) {
+                logger.info("Setting total usage in bytes to " + (quota.getTotalUsageInBytes() + fileSize));
+                quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize);
             }
+
             return CreateDataFileResult.success(fileName, finalType, datafiles);
         }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java
index 83d0f877d61..e2730ec06d3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java
@@ -235,6 +235,20 @@ public String describe() {
 
     @Override
     public boolean onSuccess(CommandContext ctxt, Object r) {
+        // Adjust the storage use for the parent containers: 
+        if (!doomed.isHarvested()) {
+            long storedSize = doomed.getFilesize();
+            // ingested tabular data files also have saved originals that 
+            // are counted as "storage use"
+            Long savedOriginalSize = doomed.getOriginalFileSize(); 
+            if (savedOriginalSize != null) {
+                // Note that DataFile.getFilesize() can return -1 (for "unknown"):
+                storedSize = storedSize > 0 ? storedSize + savedOriginalSize : savedOriginalSize; 
+            }
+            if (storedSize > 0) {
+                ctxt.storageUse().incrementStorageSizeRecursively(doomed.getOwner().getId(), (0L - storedSize));
+            }
+        }
         /**
          * We *could* re-index the entire dataset but it's more efficient to
          * target individual files for deletion, which should always be drafts.
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java
index f07fde9508e..49f14e7c280 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java
@@ -1,6 +1,5 @@
 package edu.harvard.iq.dataverse.engine.command.impl;
 
-import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.authorization.Permission;
 import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
@@ -8,9 +7,9 @@
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
 import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
-import edu.harvard.iq.dataverse.util.BundleUtil;
-import java.io.IOException;
-import java.util.List;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
 import java.util.logging.Logger;
 
 /**
@@ -19,7 +18,7 @@
  * The command doesn't do much. It's sole purpose is to check the permissions
  * when it's called by the /api/dataverses/.../storage/quota api. 
  */
-@RequiredPermissions(Permission.ManageDataversePermissions)
+// @RequiredPermissions - none defined, dynamic
 public class GetCollectionQuotaCommand  extends AbstractCommand<Long> {
 
     private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName());
@@ -39,6 +38,13 @@ public Long execute(CommandContext ctxt) throws CommandException {
         }
         
         return null;
+    }
+
+    @Override
+    public Map<String, Set<Permission>> getRequiredPermissions() {
+        return Collections.singletonMap("",
+                dataverse.isReleased() ? Collections.<Permission>emptySet()
+                : Collections.singleton(Permission.ViewUnpublishedDataverse));
     }    
 }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java
new file mode 100644
index 00000000000..40b3128b80d
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java
@@ -0,0 +1,49 @@
+/*
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
+ * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
+ */
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author landreev
+ */
+@RequiredPermissions(Permission.ManageDataversePermissions)
+// alternatively, we could make it dynamic - public for published collections
+// and Permission.ViewUnpublishedDataverse required otherwise (?)
+public class GetCollectionStorageUseCommand extends AbstractCommand<Long> {
+
+    private static final Logger logger = Logger.getLogger(GetCollectionStorageUseCommand.class.getCanonicalName());
+    
+    private final Dataverse collection;
+    
+    public GetCollectionStorageUseCommand(DataverseRequest aRequest, Dataverse target) {
+        super(aRequest, target);
+        collection = target;
+    } 
+        
+    @Override
+    public Long execute(CommandContext ctxt) throws CommandException {
+               
+        if (collection == null) {
+            throw new CommandException("null collection passed to get storage use command", this);
+        }
+        return ctxt.storageUse().findStorageSizeByDvContainerId(collection.getId());        
+    }
+
+    /*@Override
+    public Map<String, Set<Permission>> getRequiredPermissions() {
+        return Collections.singletonMap("",
+                dataverse.isReleased() ? Collections.<Permission>emptySet()
+                : Collections.singleton(Permission.ViewUnpublishedDataverse));
+    }*/   
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UningestFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UningestFileCommand.java
index f2b89746160..3e85630dd59 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UningestFileCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UningestFileCommand.java
@@ -105,6 +105,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException {
         // all the attribute of the file that are stored in the database: 
         
         // the file size: 
+        long archivalFileSize = uningest.getFilesize();
         uningest.setFilesize(storedOriginalFileSize);
         
         // original file format:
@@ -170,8 +171,20 @@ protected void executeImpl(CommandContext ctxt) throws CommandException {
             logger.warning("Io Exception deleting all aux objects : " + uningest.getId());
         }
         
+        // Finally, adjust the recorded storage use for the ancestral 
+        // DvObjectContainers (the parent dataset + all the parent collections
+        // up to the root):
+        if (archivalFileSize > 0) {
+            ctxt.storageUse().incrementStorageSizeRecursively(uningest.getOwner().getId(), (0L - archivalFileSize));
+        }
+        
     }
     
+    @Override
+    public boolean onSuccess(CommandContext ctxt, Object r) {
+        
+        return true; 
+    }
     
     private void resetIngestStats(DataFile uningest, CommandContext ctxt){
         
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 391f28674e4..9ef0cf07808 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -30,7 +30,6 @@
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.DataFileCategory;
 import edu.harvard.iq.dataverse.DataFileServiceBean;
-import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
 import edu.harvard.iq.dataverse.DataTable;
 import edu.harvard.iq.dataverse.DatasetField;
 import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
@@ -73,6 +72,8 @@
 import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReader;
 import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReaderSpi;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
 import edu.harvard.iq.dataverse.util.*;
 import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
 
@@ -151,6 +152,8 @@ public class IngestServiceBean {
     @EJB
     AuxiliaryFileServiceBean auxiliaryFileService;
     @EJB
+    StorageUseServiceBean storageUseService; 
+    @EJB
     SystemConfig systemConfig;
 
     @Resource(lookup = "java:app/jms/queue/ingest")
@@ -185,7 +188,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
             List<DataFile> newFiles,
             DataFile fileToReplace,
             boolean tabIngest, 
-            UserStorageQuota quota) /*throws FileExceedsMaxSizeException, FileExceedsStorageQuotaException*/ {
+            UploadSessionQuotaLimit quota) /*throws FileExceedsMaxSizeException, FileExceedsStorageQuotaException*/ {
         List<DataFile> ret = new ArrayList<>();
 
         if (newFiles != null && newFiles.size() > 0) {
@@ -360,20 +363,41 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                     }
                 }
                 
-                // We will perform (another) quota check, and if still under quota
-                // (it's not impossible that somebody else may have uploaded more 
-                // stuff into the same collection/dataset etc., before this user 
-                // decided to click "save", for example!)
+                // If quotas are enforced, we will perform a quota check here. 
+                // If this is an upload via the UI, we must have already 
+                // performed this check once. But it is possible that somebody 
+                // else may have added more data to the same collection/dataset 
+                // etc., before this user was ready to click "save", so this is
+                // necessary. For other cases, such as the direct uploads via 
+                // the API, this is the single point in the workflow where  
+                // storage quotas are enforced. 
         
                 if (systemConfig.isStorageQuotasEnforced() && quota != null) {
                     long storageQuotaLimit = quota.getRemainingQuotaInBytes();
                     if (confirmedFileSize > storageQuotaLimit) {
                         savedSuccess = false; 
+                        logger.warning("file size over quota limit, skipping");
+                        // @todo: we need to figure out how to better communicate
+                        // this (potentially partial) failure to the user.  
                         //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(confirmedFileSize), bytesToHumanReadable(storageQuotaLimit)));
+                    } else {
+                        // Update storage use for all the parent dvobjects: 
+                        // @todo: Do we want to do this after after *each* file is saved? - there may be 
+                        // quite a few files being saved here all at once. We could alternatively
+                        // perform this update only once, after this loop is completed (are there any
+                        // risks/accuracy loss?)
+                        // This update is performed with a direct native query that 
+                        // is supposed to be quite fast. But still. 
+                        logger.info("Incrementing recorded storage use by "+confirmedFileSize+" bytes for dataset "+dataset.getId());
+                        // (@todo: need to consider what happens when this code is called on Create?)
+                        storageUseService.incrementStorageSizeRecursively(dataset.getId(), confirmedFileSize);
+                        // Adjust quota: 
+                        logger.info("Setting total usage in bytes to "+(quota.getTotalUsageInBytes() + confirmedFileSize));
+                        quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + confirmedFileSize);
                     }
                 }
 
-                logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset.");
+                logger.fine("Done! Finished saving new file in permanent storage and adding them to the dataset.");
                 boolean belowLimit = false;
 
                 try {
@@ -1078,7 +1102,14 @@ public boolean ingestAsTabular(Long datafile_id) {
                     }
                 }
 
-                if (!databaseSaveSuccessful) {
+                if (databaseSaveSuccessful) {
+                    // Add the size of the tab-delimited version of the data file 
+                    // that we have produced and stored to the recorded storage 
+                    // size of all the ancestor DvObjectContainers: 
+                    if (dataFile.getFilesize() > 0) {
+                        storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), dataFile.getFilesize());
+                    }
+                } else {
                     logger.warning("Ingest failure (failed to save the tabular data in the database; file left intact as uploaded).");
                     return false;
                 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java
index 68ff6d95d00..0cfebe4167a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java
@@ -66,7 +66,9 @@ public void setId(Long id) {
     
     public StorageQuota() {}
     
-    /*public String getAssigneeIdentifier() {
+    /**
+     * Could be uncommented if/when we want to add per-user quotas (see above)
+    public String getAssigneeIdentifier() {
         return assigneeIdentifier;
     }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
index c91ff69b0ae..11a2a8b706c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
@@ -24,14 +24,14 @@
 @NamedQueries({
     @NamedQuery(name = "StorageUse.findByteSizeByDvContainerId",query = "SELECT su.sizeInBytes FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "),
     @NamedQuery(name = "StorageUse.findByDvContainerId",query = "SELECT su FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "),
-    @NamedQuery(name = "StorageUse.incrementByteSizeByDvContainerId", query = "UPDATE StorageUse SET sizeInBytes = (sizeInBytes + :fileSize) WHERE dvObjectContainer.id=dv.ObjectId")
+    @NamedQuery(name = "StorageUse.incrementByteSizeByDvContainerId", query = "UPDATE StorageUse su SET su.sizeInBytes = su.sizeInBytes +:fileSize WHERE su.dvObjectContainer.id =:dvObjectId")
 })
 @Entity
 public class StorageUse implements Serializable {
 
     private static final long serialVersionUID = 1L;
     @Id
-    @GeneratedValue(strategy = GenerationType.AUTO)
+    @GeneratedValue(strategy = GenerationType.IDENTITY)
     private Long id;
 
     public Long getId() {
@@ -49,6 +49,12 @@ public void setId(Long id) {
     @Column
     private Long sizeInBytes = null; 
     
+    public StorageUse() {}
+    
+    public StorageUse(DvObjectContainer dvObjectContainer) {
+        this(dvObjectContainer, 0L);
+    }
+    
     public StorageUse(DvObjectContainer dvObjectContainer, Long sizeInBytes) {
         this.dvObjectContainer = dvObjectContainer;
         this.sizeInBytes = sizeInBytes;
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
index fd04344c234..e92ba43e950 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
@@ -4,11 +4,10 @@
  */
 package edu.harvard.iq.dataverse.storageuse;
 
-import edu.harvard.iq.dataverse.DataverseServiceBean;
-import edu.harvard.iq.dataverse.DvObject;
 import edu.harvard.iq.dataverse.DvObjectContainer;
-import jakarta.ejb.EJB;
 import jakarta.ejb.Stateless;
+import jakarta.ejb.TransactionAttribute;
+import jakarta.ejb.TransactionAttributeType;
 import jakarta.inject.Named;
 import jakarta.persistence.EntityManager;
 import jakarta.persistence.PersistenceContext;
@@ -22,8 +21,6 @@
 @Named
 public class StorageUseServiceBean  implements java.io.Serializable {
     private static final Logger logger = Logger.getLogger(StorageUseServiceBean.class.getCanonicalName());
-    @EJB
-    DataverseServiceBean dataverseService;
     
     @PersistenceContext(unitName = "VDCNet-ejbPU")
     private EntityManager em;
@@ -32,8 +29,16 @@ public StorageUse findByDvContainerId(Long dvObjectId) {
         return em.createNamedQuery("StorageUse.findByDvContainerId", StorageUse.class).setParameter("dvObjectId", dvObjectId).getSingleResult();
     }
     
+    /**
+     * Looks up the current storage use size, using a named query in a new 
+     * transaction
+     * @param dvObjectId
+     * @return 
+     */
+    @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
     public Long findStorageSizeByDvContainerId(Long dvObjectId) {
-        return em.createNamedQuery("StorageUse.findByteSizeByDvContainerId", Long.class).setParameter("dvObjectId", dvObjectId).getSingleResult();
+        Long res = em.createNamedQuery("StorageUse.findByteSizeByDvContainerId", Long.class).setParameter("dvObjectId", dvObjectId).getSingleResult();
+        return res == null ? 0L : res;
     }
     
     public void incrementStorageSizeHierarchy(DvObjectContainer dvObject, Long filesize) {
@@ -46,7 +51,6 @@ public void incrementStorageSizeHierarchy(DvObjectContainer dvObject, Long files
     }
     
     /**
-     * Should this be done in a new transaction?
      * @param dvObject
      * @param filesize 
      */
@@ -62,4 +66,31 @@ public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) {
         }
     }
     
+    /**
+     * Increments the recorded storage size for all the dvobject parents of a
+     * datafile, recursively. 
+     * @param dvObjectContainerId database id of the immediate parent (dataset)
+     * @param increment size in bytes of the file(s) being added 
+     */
+    @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
+    public void incrementStorageSizeRecursively(Long dvObjectContainerId, Long increment) {
+        //@todo should throw exceptions if either parameter is null
+        String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n"
+                + "("
+                + "    SELECT id, owner_id\n"
+                + "    FROM dvobject\n"
+                + "    WHERE id=" + dvObjectContainerId + "\n"
+                + "    UNION ALL\n"
+                + "    SELECT dvobject.id, dvobject.owner_id\n"
+                + "    FROM dvobject\n"
+                + "    JOIN uptree ON dvobject.id = uptree.owner_id)\n"
+                + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n"
+                + "FROM uptree\n"
+                + "WHERE dvobjectcontainer_id = uptree.id;";
+        
+        int parentsUpdated = em.createNativeQuery(queryString).executeUpdate();
+        // @todo throw an exception if the number of parent dvobjects updated by
+        // the query is < 2 - ? 
+    }
+    
 }
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index c5b94b7be58..1b7bff9eeca 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -924,7 +924,8 @@ dataverse.datasize=Total size of the files stored in this dataverse: {0} bytes
 dataverse.storage.quota.allocation=Total quota allocation for this collection: {0} bytes
 dataverse.storage.quota.notdefined=No quota defined for this collection
 dataverse.storage.quota.updated=Storage quota successfully updated for the collection
-dataverse.storage.quota.deleted=Storage quota successfully disabled the collection
+dataverse.storage.quota.deleted=Storage quota successfully disabled for the collection
+dataverse.storage.use=Total recorded size of the files stored in this collection (user-uploaded files plus the versions in the archival tab-delimited format when applicable): {0} bytes
 dataverse.datasize.ioerror=Fatal IO error while trying to determine the total size of the files stored in the dataverse. Please report this error to the Dataverse administrator.
 dataverse.inherited=(inherited from enclosing Dataverse)
 dataverse.default=(Default)
diff --git a/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql
index 5644d06a682..3657642c267 100644
--- a/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql
+++ b/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql
@@ -1,42 +1,43 @@
--- Adding the storagesize column to the datafile table:
-ALTER TABLE datafile ADD COLUMN IF NOT EXISTS storagesize BIGINT;
 -- The somewhat convoluted queries below populate the storage sizes for the entire
 -- DvObject tree, fast. It IS possible, to do it all with one recursive PostgresQL
 -- query, that will crawl the tree from the leaves (DataFiles) up and add up the
--- sizes for all the Datasets/Collections above. Unfortunately, that takes some hours
--- on a database the size of the one at IQSS. So what we are doing instead is compute
--- the total sizes of all the *directly* linked objects, with 3 linear queries. This
--- will correctly calculate the sizes of all the Datasets (since they can only
--- contain DataFiles, directly, without any extra hierarchy possible) and those
--- Collections that only contain Datasets; but not the sizes of Collections that
--- have sub-collections. To take any sub-collections into account we are then running
--- a recursive query - but then we only need to run it on the tree of Collections only,
--- which should make it manageably fast on any real life instance. 
-----UPDATE dvobject SET storagesize=0;
--- For datafiles, the storage size = main file size by default:
--- (we are excluding any harvested files)
-UPDATE datafile SET storagesize=COALESCE(filesize,0) FROM dataset d, dvobject o WHERE datafile.id = o.id AND o.owner_id = d.id AND d.harvestingclient_id IS null;
+-- sizes for all the Datasets/Collections above. Unfortunately, that appears to take
+-- some hours on a database the size of the one at IQSS. So what we are doing
+-- instead is first compute the total sizes of all the *directly* linked objects,
+-- with a couple of linear queries. This will correctly calculate the sizes of all the
+-- Datasets (since they can only contain DataFiles, without any other hierarchy) and
+-- those Collections that only contain Datasets; but not the sizes of Collections that
+-- have sub-collections. To take any sub-collections into account we will then run
+-- a recursive query - but we only need to run it on the tree of Collections only,
+-- which makes it reasonably fast on any real life instance. 
+-- *Temporarily* add this "tempstoragesize" column to the DvObject table.
+-- It will be used to calculate the storage sizes of all the DvObjectContainers
+-- (Datasets and Collections), as a matter of convenience. Once calculated, the values
+-- will will be moved to the permanent StorageUse table. 
+ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS tempStorageSize BIGINT;
+-- First we calculate the storage size of each individual dataset (a simple sum
+-- of the storage sizes of all the files in the dataset). 
+-- For datafiles, the storage size = main file size by default
+-- (we are excluding any harvested files and datasets):
+UPDATE dvobject SET tempStorageSize=o.combinedStorageSize
+FROM (SELECT datasetobject.id, SUM(file.filesize) AS combinedStorageSize
+FROM dvobject fileobject, dataset datasetobject, datafile file
+WHERE fileobject.owner_id = datasetobject.id
+AND fileobject.id = file.id
+AND datasetobject.harvestingclient_id IS null
+GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null;
+
 -- ... but for ingested tabular files the size of the saved original needs to be added, since
 -- those also take space:
 -- (should be safe to assume that there are no *harvested ingested* files)
-UPDATE datafile SET storagesize=datafile.storagesize + COALESCE(datatable.originalFileSize,0) FROM datatable WHERE datatable.datafile_id = datafile.id;
--- *Temporarily* add this "storagesize" column to the DvObject table.
--- It will be used to calculate the storage sizes for all 3 types of DvObjects;
--- a recursive query is used for this purpose further down, and it is convenient
--- to use a column directly in the same table. 
--- Once calculated, the values will will be moved to the permanent locations
--- in the DataFile and StorageUse tables. 
-ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS tempStorageSize BIGINT;
--- Now we can calculate storage sizes of each individual dataset (a simple sum
--- of the storage sizes of all the files in the dataset):
--- (excluding the harvested datasets; this is less important, since there should be
--- significantly fewer datasets than files, but might as well)
-UPDATE dvobject SET tempStorageSize=o.combinedStorageSize
-FROM (SELECT datasetobject.id, SUM(file.storagesize) AS combinedStorageSize
-FROM dvobject fileobject, dvobject datasetobject, datafile file
+UPDATE dvobject SET tempStorageSize=tempStorageSize+o.combinedStorageSize
+FROM (SELECT datasetobject.id, COALESCE(SUM(dt.originalFileSize),0) AS combinedStorageSize
+FROM dvobject fileobject, dvobject datasetobject, datafile file, datatable dt
 WHERE fileobject.owner_id = datasetobject.id
 AND fileobject.id = file.id
+AND dt.datafile_id = file.id
 GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null;
+
 -- ... and then we can repeat the same for collections, by setting the storage size
 -- to the sum of the storage sizes of the datasets *directly* in each collection:
 -- (no attemp is made yet to recursively count the sizes all the chilld sub-collections)
@@ -56,7 +57,7 @@ WITH RECURSIVE treestorage (id, owner_id, tempStorageSize, dtype) AS
     FROM dvobject
     WHERE dtype = 'Dataverse'
 
-    UNION
+    UNION ALL
 
     -- Recursive Member:
     SELECT dvobject.id, treestorage.owner_id, dvobject.tempStorageSize, treestorage.dtype
diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java
index 99da9198296..a80adb33b8d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java
+++ b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java
@@ -20,6 +20,7 @@
 import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
 import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean;
 import java.util.Stack;
@@ -244,6 +245,11 @@ public ConfirmEmailServiceBean confirmEmail() {
     public ActionLogServiceBean actionLog() {
         return null;
     }
+    
+    @Override
+    public StorageUseServiceBean storageUse() {
+        return null;
+    }
 
     @Override
     public void beginCommandSequence() {

From 03e1860f39523a798f65452c339edb9ce20a177f Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 26 Nov 2023 18:14:30 -0500
Subject: [PATCH 237/546] renaming the flyway script, since there is a 6.0.0.3
 in the develop branch already. #8549

---
 ...collection-quotas.sql => V6.0.0.4__8549-collection-quotas.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V6.0.0.3__8549-collection-quotas.sql => V6.0.0.4__8549-collection-quotas.sql} (100%)

diff --git a/src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.4__8549-collection-quotas.sql
similarity index 100%
rename from src/main/resources/db/migration/V6.0.0.3__8549-collection-quotas.sql
rename to src/main/resources/db/migration/V6.0.0.4__8549-collection-quotas.sql

From 8d49fc46d1c4bde6cfe6fc5af30c210201ca9c00 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 26 Nov 2023 19:45:45 -0500
Subject: [PATCH 238/546] a quick stab at a restassured test for the collection
 quotas #8549

---
 src/main/java/propertyFiles/Bundle.properties |  2 +-
 .../edu/harvard/iq/dataverse/api/FilesIT.java | 43 +++++++++++++++++++
 .../edu/harvard/iq/dataverse/api/UtilIT.java  | 12 ++++++
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 1b7bff9eeca..b66d9baa79d 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -923,7 +923,7 @@ dataverse.listing.error=Fatal error trying to list the contents of the dataverse
 dataverse.datasize=Total size of the files stored in this dataverse: {0} bytes
 dataverse.storage.quota.allocation=Total quota allocation for this collection: {0} bytes
 dataverse.storage.quota.notdefined=No quota defined for this collection
-dataverse.storage.quota.updated=Storage quota successfully updated for the collection
+dataverse.storage.quota.updated=Storage quota successfully set for the collection
 dataverse.storage.quota.deleted=Storage quota successfully disabled for the collection
 dataverse.storage.use=Total recorded size of the files stored in this collection (user-uploaded files plus the versions in the archival tab-delimited format when applicable): {0} bytes
 dataverse.datasize.ioerror=Fatal IO error while trying to determine the total size of the files stored in the dataverse. Please report this error to the Dataverse administrator.
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 16726485dee..6bcd935f533 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -2362,4 +2362,47 @@ public void testGetHasBeenDeleted() {
         fileHasBeenDeleted = JsonPath.from(getHasBeenDeletedResponse.body().asString()).getBoolean("data");
         assertTrue(fileHasBeenDeleted);
     }
+    
+    @Test
+    public void testCollectionStorageQuotas() {
+        // A minimal storage quota functionality test: 
+        // - We create a collection and define a storage quota
+        // - We configure Dataverse to enforce it 
+        // - We confirm that we can upload a file with the size under the quota
+        // - We confirm that we cannot upload a file once the quota is reached
+        // - We disable the quota on the collection via the API
+        
+        Response createUser = UtilIT.createRandomUser();
+        createUser.then().assertThat().statusCode(OK.getStatusCode());
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+
+        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode());
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
+        createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode());
+        Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
+        
+        Response checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken);
+        checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
+        // This brand new collection shouldn't have any quota defined yet: 
+        assertEquals("No quota defined for this collection", JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+        
+        // Set quota to 1K:
+        Response setQuotaResponse = UtilIT.setCollectionQuota(dataverseAlias, 1024, apiToken);
+        setQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
+        assertEquals("Storage quota successfully set for the collection", JsonPath.from(setQuotaResponse.body().asString()).getString("data.message"));
+        
+        // Check again:
+        checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken);
+        checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
+        assertEquals("Total quota allocation for this collection: 1,024 bytes", JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+
+        UtilIT.enableSetting(SettingsServiceBean.Key.UseStorageQuotas);
+        
+        // Upload a small file: 
+        
+        // [To be continued/work in progress]
+    }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 2336bf8beb8..3cfb625e372 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -2983,6 +2983,18 @@ static Response findDataverseStorageSize(String dataverseId, String apiToken) {
                 .get("/api/dataverses/" + dataverseId + "/storagesize");
     }
     
+    static Response checkCollectionQuota(String collectionId, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .get("/api/dataverses/" + collectionId + "/storage/quota");
+    }
+    
+    static Response setCollectionQuota(String collectionId, long allocatedSize, String apiToken) {
+        Response response = given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .post("/api/dataverses/" + collectionId + "/storage/quota" + allocatedSize);
+        return response;
+    }
     
     /**
      * Determine the "payload" storage size of a dataverse

From 0cbdb1c4bad298d310c5d46429adb30e61689cde Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 27 Nov 2023 09:15:53 -0500
Subject: [PATCH 239/546] #9686 fix metrics service bean

---
 .../dataverse/metrics/MetricsServiceBean.java | 49 +++++++++----------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
index 6b540595e77..f83819cd34d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
@@ -156,8 +156,6 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
      * @param d
      */
     public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
-        
-        System.out.print("datasets to month...");
         String dataLocationLine = "(date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
@@ -208,17 +206,17 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
         // A published local datasets may have more than one released version!
         // So that's why we have to jump through some extra hoops below
         // in order to select the latest one:
-        String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" +
-                "(\n" +
-                "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" +
-                "       from datasetversion\n" +
-                "       join dataset on dataset.id = datasetversion.dataset_id\n" +
-                "       join dvobject on dataset.id = dvobject.id \n" +
-                "       where versionstate='RELEASED'\n" +
-                "       	     and dataset.harvestingclient_id is null\n" +
-                "       	     and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
-                "       group by dataset_id\n" +
-                "))\n";
+        String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n"
+                + "(\n"
+                + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
+                + "       from datasetversion\n"
+                + "       join dataset on dataset.id = datasetversion.dataset_id\n"
+                + "       join dvobject on dataset.id = dvobject.id\n"
+                + "       where versionstate='RELEASED'\n"
+                + "       and dvobject.harvestingclient_id is null"
+                + "       and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
+                + "       group by dataset_id\n"
+                + "))\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
@@ -227,8 +225,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
             // so the query is simpler:
             String harvestOriginClause = "(\n" +
                     "   datasetversion.dataset_id = dataset.id\n" +
-                    " dvobject.id = dataset.id \n" +
-                    "   AND dataset.harvestingclient_id IS NOT null \n" +
+                    "   AND dvobject.harvestingclient_id IS NOT null \n" +
                     "   AND date_trunc('month', datasetversion.createtime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
                     ")\n";
 
@@ -247,7 +244,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n"
                 + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n"
                 + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n"
-                + ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n")
+                + "JOIN dvobject ON dvobject.id = dataset.id\n"
                 + "WHERE\n"
                 + originClause
                 + "AND datasetfieldtype.name = 'subject'\n"
@@ -256,16 +253,16 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 + "ORDER BY count(dataset.id) desc;"
         );
         logger.log(Level.FINE, "Metric query: {0}", query);
-        System.out.print("by sub to month: " + query);
+
         return query.getResultList();
     }
 
     public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
-        String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n";
+        String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
-            String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n";
+            String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n";
             if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
                 dataLocationLine = harvestBaseLine; // replace
             } else if (DATA_LOCATION_ALL.equals(dataLocation)) {
@@ -279,7 +276,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
                         + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n"
                         + "from datasetversion\n"
                         + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                        + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+                        + "join dvobject on dvobject.id = dataset.id\n"
                         + "where versionstate='RELEASED' \n"
                         + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
                         + "and \n"
@@ -307,7 +304,7 @@ public JsonArray filesTimeSeries(Dataverse d) {
                         + "where datasetversion.id=filemetadata.datasetversion_id\n"
                         + "and versionstate='RELEASED' \n"
                         + "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n"
-                        + "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n "
+                        + "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n "
                         + ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ")
                         + "group by filemetadata.id) as subq group by subq.date order by date;");
         logger.log(Level.FINE, "Metric query: {0}", query);
@@ -330,11 +327,11 @@ public long filesToMonth(String yyyymm, Dataverse d) {
                 + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
                 + "from datasetversion\n"
                 + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+                +  "join dvobject on dvobject.id = dataset.id\n"
                 + "where versionstate='RELEASED'\n"
                 + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
                 + "and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
-                + "and dataset.harvestingclient_id is null\n"
+                + "and dvobject.harvestingclient_id is null\n"
                 + "group by dataset_id \n"
                 + ");"
         );
@@ -353,11 +350,11 @@ public long filesPastDays(int days, Dataverse d) {
                 + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
                 + "from datasetversion\n"
                 + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+                + "join dvobject on dvobject.id = dataset.id\n"
                 + "where versionstate='RELEASED'\n"
                 + "and releasetime > current_date - interval '" + days + "' day\n"
                 + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
-                + "and dataset.harvestingclient_id is null\n"
+                + "and dvobject.harvestingclient_id is null\n"
                 + "group by dataset_id \n"
                 + ");"
         );
@@ -619,7 +616,7 @@ public String returnUnexpiredCacheDayBased(String metricName, String days, Strin
 
     public String returnUnexpiredCacheMonthly(String metricName, String yyyymm, String dataLocation, Dataverse d) {
         Metric queriedMetric = getMetric(metricName, dataLocation, yyyymm, d);
-        System.out.print("returnUnexpiredCacheMonthly: " + queriedMetric);
+
         if (!doWeQueryAgainMonthly(queriedMetric)) {
             return queriedMetric.getValueJson();
         }

From c9dc1552f39316f7f6178ee069601e66f0f689d1 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 27 Nov 2023 14:11:20 -0500
Subject: [PATCH 240/546] restassured test/work in progress #8549

---
 src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 6bcd935f533..9ad3c353da8 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -2403,6 +2403,10 @@ public void testCollectionStorageQuotas() {
         
         // Upload a small file: 
         
+        String pathToFileUnder1K = "src/test/resources/FileRecordJobIT.properties"; 
+        String pathToFileOver1K = "src/test/resources/datacite.xml";
+        
+        
         // [To be continued/work in progress]
     }
 }

From 212baf2ed89f21d9d66da2b5ef6c7652c505141e Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 27 Nov 2023 14:41:43 -0500
Subject: [PATCH 241/546] #9464 return json object as api response

---
 .../DataverseFieldTypeInputLevel.java         |  5 +-
 ...taverseFieldTypeInputLevelServiceBean.java | 10 ++++
 .../iq/dataverse/DataverseServiceBean.java    | 55 ++++++++++++++++---
 .../harvard/iq/dataverse/api/Dataverses.java  |  3 +-
 4 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java
index c4749be0cb3..a3425987bf8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java
@@ -30,8 +30,9 @@
     @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdDatasetFieldTypeId",
             query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id = :datasetFieldTypeId"),
     @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdAndDatasetFieldTypeIdList",
-            query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList")
- 
+            query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList"),
+    @NamedQuery(name = "DataverseFieldTypeInputLevel.findRequiredByDataverseId",
+            query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.required = 'true' ")
 })
 @Table(name="DataverseFieldTypeInputLevel"
         ,  uniqueConstraints={
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java
index 66c700f59ce..1bd290ecc4d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java
@@ -88,6 +88,16 @@ public DataverseFieldTypeInputLevel findByDataverseIdDatasetFieldTypeId(Long dat
             return null;
         }         
     }
+    
+    public List<DataverseFieldTypeInputLevel> findRequiredByDataverseId(Long dataverseId) {
+        Query query = em.createNamedQuery("DataverseFieldTypeInputLevel.findRequiredByDataverseId", DataverseFieldTypeInputLevel.class);
+        query.setParameter("dataverseId", dataverseId);
+        try{
+            return query.getResultList();
+        } catch ( NoResultException nre ) {
+            return null;
+        }         
+    }
 
     public void delete(DataverseFieldTypeInputLevel dataverseFieldTypeInputLevel) {
         em.remove(em.merge(dataverseFieldTypeInputLevel));
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index bbf35535915..ed46caf65a1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -21,6 +21,7 @@
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.StringUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
 import java.io.File;
 import java.io.IOException;
 import java.sql.Timestamp;
@@ -43,6 +44,8 @@
 import jakarta.persistence.NonUniqueResultException;
 import jakarta.persistence.PersistenceContext;
 import jakarta.persistence.TypedQuery;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.everit.json.schema.Schema;
@@ -931,12 +934,9 @@ public List<Object[]> getDatasetTitlesWithinDataverse(Long dataverseId) {
 
         
     public  String getCollectionDatasetSchema(String dataverseAlias) {
-       
-        List<MetadataBlock> selectedBlocks = new ArrayList<>();
-        List<DatasetFieldType> requiredDSFT = new ArrayList<>();
-
+        
         Dataverse testDV = this.findByAlias(dataverseAlias);
-
+        
         while (!testDV.isMetadataBlockRoot()) {
             if (testDV.getOwner() == null) {
                 break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value
@@ -944,6 +944,25 @@ public  String getCollectionDatasetSchema(String dataverseAlias) {
             testDV = testDV.getOwner();
         }
         
+        /* Couldn't get the 'return base if no extra required fields to work with the path provided
+        leaving it as 'out of scope' for now SEK 11/27/2023
+
+        List<DataverseFieldTypeInputLevel> required = new ArrayList<>();
+
+        required = dataverseFieldTypeInputLevelService.findRequiredByDataverseId(testDV.getId());
+        
+        if (required == null || required.isEmpty()){
+            String pathToJsonFile = "src/main/resources/edu/harvas/iq/dataverse/baseDatasetSchema.json";
+            String baseSchema = getBaseSchemaStringFromFile(pathToJsonFile);
+            if (baseSchema != null && !baseSchema.isEmpty()){
+                return baseSchema;
+            }
+        }
+        
+        */
+        List<MetadataBlock> selectedBlocks = new ArrayList<>();
+        List<DatasetFieldType> requiredDSFT = new ArrayList<>();
+        
         selectedBlocks.addAll(testDV.getMetadataBlocks());
 
         for (MetadataBlock mdb : selectedBlocks) {
@@ -992,9 +1011,13 @@ public  String getCollectionDatasetSchema(String dataverseAlias) {
                 }
             }
         }
-        
+        int countMDB = 0;
         for (MetadataBlock mdb : hasReqFields) {
+            if (countMDB>0){
+                retval += ",";
+            }
             retval += getCustomMDBSchema(mdb, requiredDSFT);
+            countMDB++;            
         }
         
         retval += "\n                     }";
@@ -1037,7 +1060,7 @@ private String getCustomMDBSchema (MetadataBlock mdb, List<DatasetFieldType> req
         }
         
         }
-
+        
         return retval;
     }
     
@@ -1058,6 +1081,20 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) {
         return BundleUtil.getStringFromBundle("dataverses.api.validate.json.succeeded");
     }
     
+    static String getBaseSchemaStringFromFile(String pathToJsonFile) {
+        File datasetSchemaJson = new File(pathToJsonFile);
+        try {
+            String datasetSchemaAsJson = new String(Files.readAllBytes(Paths.get(datasetSchemaJson.getAbsolutePath())));
+            return datasetSchemaAsJson;
+        } catch (IOException ex) {
+            logger.info("IO - failed to get schema file  - will build on fly " +ex.getMessage());
+            return null;
+        } catch (Exception e){
+            logger.info("Other exception - failed to get schema file  - will build on fly. " + e.getMessage());
+            return null;
+        }
+    }
+    
     private  String datasetSchemaPreface = 
     "{\n" +
     "    \"$schema\": \"http://json-schema.org/draft-04/schema#\",\n" +
@@ -1140,7 +1177,7 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) {
 "                                }\n" +
 "                            },\n" +
 "                            \"required\": [\"fields\"]\n" +
-"                        },";
+"                        }";
     
     private String endOfjson = ",\n" +
 "                    \"required\": [blockNames]\n" +
@@ -1152,5 +1189,5 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) {
 "    \"required\": [\"datasetVersion\"]\n" +
 "}\n";
     
-            
+    
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
index 557b7df202b..c6ae619f36a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
@@ -258,7 +258,8 @@ public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathPara
 
         try {
             String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf)));
-            return ok(datasetSchema);
+            JsonObject jsonObject = JsonUtil.getJsonObject(datasetSchema);
+            return ok(jsonObject);
         } catch (WrappedResponse ex) {
             Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex);
             return ex.getResponse();

From 93670260de18059ff75a2fa3f8ca00e395b64110 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 27 Nov 2023 14:46:30 -0500
Subject: [PATCH 242/546] #9464 revert harvesting changes made in error

---
 .../dataverse/metrics/MetricsServiceBean.java | 51 +++++++++----------
 .../harvard/iq/dataverse/api/MetricsIT.java   | 14 ++---
 2 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
index a50797fe443..1b5619c53e0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
@@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
                 + "from datasetversion\n"
                 + "where versionstate='RELEASED' \n"
                 + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n")
-                + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
-                + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n "  : "")
+                + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
+                + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n "  : "")
                 + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
                 + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n")
                 + "group by dataset_id) as subq group by subq.date order by date;"
@@ -156,11 +156,11 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
      * @param d
      */
     public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
-        String dataLocationLine = "(date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n";
+        String dataLocationLine = "(date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
-            String harvestBaseLine = "(date_trunc('month', createtime) <=  to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n";
+            String harvestBaseLine = "(date_trunc('month', createtime) <=  to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n";
             if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
                 dataLocationLine = harvestBaseLine; // replace
             } else if (DATA_LOCATION_ALL.equals(dataLocation)) {
@@ -189,7 +189,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
                         + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
                         + "from datasetversion\n"
                         + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                        + "join dvobject on dvobject.id = dataset.id\n"
+                        + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
                         + "where versionstate='RELEASED' \n"
                         + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
                         + "and \n"
@@ -207,17 +207,16 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
         // A published local datasets may have more than one released version!
         // So that's why we have to jump through some extra hoops below
         // in order to select the latest one:
-        String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n"
-                + "(\n"
-                + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
-                + "       from datasetversion\n"
-                + "       join dataset on dataset.id = datasetversion.dataset_id\n"
-                + "       join dvobject on dataset.id = dvobject.id\n"
-                + "       where versionstate='RELEASED'\n"
-                + "       and dvobject.harvestingclient_id is null"
-                + "       and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
-                + "       group by dataset_id\n"
-                + "))\n";
+        String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" +
+                "(\n" +
+                "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" +
+                "       from datasetversion\n" +
+                "       join dataset on dataset.id = datasetversion.dataset_id\n" +
+                "       where versionstate='RELEASED'\n" +
+                "       	     and dataset.harvestingclient_id is null\n" +
+                "       	     and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
+                "       group by dataset_id\n" +
+                "))\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
@@ -226,7 +225,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
             // so the query is simpler:
             String harvestOriginClause = "(\n" +
                     "   datasetversion.dataset_id = dataset.id\n" +
-                    "   AND dvobject.harvestingclient_id IS NOT null \n" +
+                    "   AND dataset.harvestingclient_id IS NOT null \n" +
                     "   AND date_trunc('month', datasetversion.createtime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
                     ")\n";
 
@@ -245,7 +244,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n"
                 + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n"
                 + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n"
-                + "JOIN dvobject ON dvobject.id = dataset.id\n"
+                + ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n")
                 + "WHERE\n"
                 + originClause
                 + "AND datasetfieldtype.name = 'subject'\n"
@@ -259,11 +258,11 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
     }
 
     public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
-        String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n";
+        String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
-            String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n";
+            String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n";
             if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
                 dataLocationLine = harvestBaseLine; // replace
             } else if (DATA_LOCATION_ALL.equals(dataLocation)) {
@@ -277,7 +276,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
                         + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n"
                         + "from datasetversion\n"
                         + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                        + "join dvobject on dvobject.id = dataset.id\n"
+                        + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
                         + "where versionstate='RELEASED' \n"
                         + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
                         + "and \n"
@@ -305,7 +304,7 @@ public JsonArray filesTimeSeries(Dataverse d) {
                         + "where datasetversion.id=filemetadata.datasetversion_id\n"
                         + "and versionstate='RELEASED' \n"
                         + "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n"
-                        + "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n "
+                        + "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n "
                         + ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ")
                         + "group by filemetadata.id) as subq group by subq.date order by date;");
         logger.log(Level.FINE, "Metric query: {0}", query);
@@ -328,11 +327,11 @@ public long filesToMonth(String yyyymm, Dataverse d) {
                 + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
                 + "from datasetversion\n"
                 + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                +  "join dvobject on dvobject.id = dataset.id\n"
+                + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
                 + "where versionstate='RELEASED'\n"
                 + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
                 + "and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
-                + "and dvobject.harvestingclient_id is null\n"
+                + "and dataset.harvestingclient_id is null\n"
                 + "group by dataset_id \n"
                 + ");"
         );
@@ -351,11 +350,11 @@ public long filesPastDays(int days, Dataverse d) {
                 + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
                 + "from datasetversion\n"
                 + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                + "join dvobject on dvobject.id = dataset.id\n"
+                + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
                 + "where versionstate='RELEASED'\n"
                 + "and releasetime > current_date - interval '" + days + "' day\n"
                 + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
-                + "and dvobject.harvestingclient_id is null\n"
+                + "and dataset.harvestingclient_id is null\n"
                 + "group by dataset_id \n"
                 + ");"
         );
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
index b961a86dc0b..e3328eefb4a 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
@@ -30,7 +30,7 @@ public static void cleanUpClass() {
 
     @Test
     public void testGetDataversesToMonth() {
-        String yyyymm = "2023-04";
+        String yyyymm = "2018-04";
 //        yyyymm = null;
         Response response = UtilIT.metricsDataversesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -54,7 +54,7 @@ public void testGetDataversesToMonth() {
 
     @Test
     public void testGetDatasetsToMonth() {
-        String yyyymm = "2023-04";
+        String yyyymm = "2018-04";
 //        yyyymm = null;
         Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -77,7 +77,7 @@ public void testGetDatasetsToMonth() {
 
     @Test
     public void testGetFilesToMonth() {
-        String yyyymm = "2023-04";
+        String yyyymm = "2018-04";
 //        yyyymm = null;
         Response response = UtilIT.metricsFilesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -100,7 +100,7 @@ public void testGetFilesToMonth() {
 
     @Test
     public void testGetDownloadsToMonth() {
-        String yyyymm = "2023-04";
+        String yyyymm = "2018-04";
 //        yyyymm = null;
         Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null);
         String precache = response.prettyPrint();
@@ -283,12 +283,6 @@ public void testGetDatasetsBySubject() {
         response = UtilIT.metricsDatasetsBySubject("dataLocation=local");
         response.then().assertThat()
                 .statusCode(OK.getStatusCode());
-        
-        //Test ok when passing remote
-        response = UtilIT.metricsDatasetsBySubject("dataLocation=remote");
-        response.prettyPrint();
-        response.then().assertThat()
-                .statusCode(OK.getStatusCode());
     }
 
     @Test

From 808efdd7cf08c3ec4955dd25d8bbd1078c3f09db Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Mon, 27 Nov 2023 14:54:25 -0500
Subject: [PATCH 243/546] fixt east/west mixup

---
 .../harvard/iq/dataverse/DatasetFieldValueValidator.java  | 2 +-
 src/main/java/propertyFiles/Bundle.properties             | 2 +-
 .../iq/dataverse/DatasetFieldValueValidatorTest.java      | 8 ++++----
 .../harvard/iq/dataverse/search/IndexServiceBeanTest.java | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
index 7941fa5efa1..23301bf86a1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
@@ -274,7 +274,7 @@ public static boolean validateBoundingBox(final String westLon, final String eas
             Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon);
             Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat);
             Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat);
-            returnVal = east < west && south < north;
+            returnVal = west < east && south < north;
         } catch (IllegalArgumentException e) {
             returnVal = false;
         }
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 15b0ebb8020..4db66006a80 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -1623,7 +1623,7 @@ dataset.metadata.alternativePersistentId.tip=A previously used persistent identi
 dataset.metadata.invalidEntry=is not a valid entry.
 dataset.metadata.invalidDate=is not a valid date. "yyyy" is a supported format.
 dataset.metadata.invalidNumber=is not a valid number.
-dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. West must be greater than East and North must be greater than South.
+dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. East must be greater than West and North must be greater than South.
 dataset.metadata.invalidInteger=is not a valid integer.
 dataset.metadata.invalidURL=is not a valid URL.
 dataset.metadata.invalidEmail=is not a valid email address.
diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
index 2729e5b042a..37274f5306a 100644
--- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
@@ -216,12 +216,12 @@ public void testInvalidEmail() {
     @Test
     public void testBoundingBoxValidity() {
         // valid tests
-        assertTrue(DatasetFieldValueValidator.validateBoundingBox("180", "-180", "90", "-90"));
-        assertTrue(DatasetFieldValueValidator.validateBoundingBox("180", "-180", "90", null));
+        assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "-90"));
+        assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", null));
 
         // invalid tests
-        assertTrue(!DatasetFieldValueValidator.validateBoundingBox("180", "-180", "90", "junk"));
-        assertTrue(!DatasetFieldValueValidator.validateBoundingBox("40", "45", "90", "0"));
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "junk"));
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox("45", "40", "90", "0"));
         assertTrue(!DatasetFieldValueValidator.validateBoundingBox("360", "0", "90", "-90"));
     }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java
index a0330e1fcad..adf48e05f09 100644
--- a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java
@@ -110,8 +110,8 @@ public void testValidateBoundingBox() throws SolrServerException, IOException {
         DatasetFieldCompoundValue val = new DatasetFieldCompoundValue();
         val.setParentDatasetField(dsf);
         val.setChildDatasetFields(Arrays.asList(
-                constructBoundingBoxValue(DatasetFieldConstant.westLongitude, "34.8"),
-                constructBoundingBoxValue(DatasetFieldConstant.eastLongitude, "34.9"), // bad value. must be less than west
+                constructBoundingBoxValue(DatasetFieldConstant.westLongitude, "34.9"), // bad value. must be less than east
+                constructBoundingBoxValue(DatasetFieldConstant.eastLongitude, "34.8"),
                 constructBoundingBoxValue(DatasetFieldConstant.northLatitude, "34.2"),
                 constructBoundingBoxValue(DatasetFieldConstant.southLatitude, "34.1")
         ));

From b7a3e7803dd6689c0060fdd2d5a2dab5e00a51ff Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 27 Nov 2023 14:29:01 -0500
Subject: [PATCH 244/546] add dataset JSON Schema to API guide, add test #9464

---
 .../source/_static/api/dataset-schema.json    | 123 ++++++++++++++++++
 doc/sphinx-guides/source/api/native-api.rst   |   2 +
 .../harvard/iq/dataverse/api/DatasetsIT.java  |   9 ++
 3 files changed, 134 insertions(+)
 create mode 100644 doc/sphinx-guides/source/_static/api/dataset-schema.json

diff --git a/doc/sphinx-guides/source/_static/api/dataset-schema.json b/doc/sphinx-guides/source/_static/api/dataset-schema.json
new file mode 100644
index 00000000000..a66ab43bd0f
--- /dev/null
+++ b/doc/sphinx-guides/source/_static/api/dataset-schema.json
@@ -0,0 +1,123 @@
+{
+    "$schema": "http://json-schema.org/draft-04/schema#",
+    "$defs": {
+    "field": {
+        "type": "object",
+        "required": ["typeClass", "multiple", "typeName"],
+        "properties": {
+            "value": {
+                "anyOf": [
+                    {
+                        "type": "array"
+                    },
+                    {
+                        "type": "string"
+                    },
+                    {
+                        "$ref": "#/$defs/field"
+                    }
+                ]
+            },
+            "typeClass": {
+                "type": "string"
+            },
+            "multiple": {
+                "type": "boolean"
+            },
+            "typeName": {
+                "type": "string"
+            }
+        }
+    }
+},
+"type": "object",
+"properties": {
+    "datasetVersion": {
+        "type": "object",
+        "properties": {
+           "license": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string"
+                    },
+                    "uri": {
+                        "type": "string",
+                        "format": "uri"
+                   }
+                },
+                "required": ["name", "uri"]
+            },
+            "metadataBlocks": {
+                "type": "object",
+               "properties": {
+                           "citation": {
+                            "type": "object",
+                            "properties": {
+                                "fields": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/$defs/field"
+                                    },
+                                    "minItems": 5,
+                                    "allOf": [
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "title"
+                                                    }
+                                                }
+                                            }
+                                        },
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "author"
+                                                    }
+                                                }
+                                            }
+                                        },
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "datasetContact"
+                                                    }
+                                                }
+                                            }
+                                        },
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "dsDescription"
+                                                    }
+                                                }
+                                            }
+                                        },
+                                        {
+                                            "contains": {
+                                                "properties": {
+                                                    "typeName": {
+                                                        "const": "subject"
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    ]
+                                }
+                            },
+                            "required": ["fields"]
+                        }
+                     },
+                    "required": ["citation"]
+                }
+            },
+            "required": ["metadataBlocks"]
+        }
+    },
+    "required": ["datasetVersion"]
+}
+
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 61b1a9443b2..ca043af39e3 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -528,6 +528,8 @@ The fully expanded example above (without environment variables) looks like this
 
 Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint.
 
+While it is recommended to download a copy of the JSON Schema from the collection (as above) to account for any fields that have been marked as required, you can also download a minimal :download:`dataset-schema.json <../_static/api/dataset-schema.json>` to get a sense of the schema when no customizations have been made.
+
 .. _validate-dataset-json:
 
 Validate Dataset.json file for a Collection
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 9fe08498de9..6a746b7c5b5 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -62,6 +62,7 @@
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.json.JSONLDUtil;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
 
 import java.io.File;
 import java.io.IOException;
@@ -179,6 +180,14 @@ public void testCollectionSchema(){
         getCollectionSchemaResponse.prettyPrint();
         getCollectionSchemaResponse.then().assertThat()
                 .statusCode(200);
+
+        JsonObject expectedSchema = null;
+        try {
+            expectedSchema = JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json");
+        } catch (IOException ex) {
+        }
+
+        assertEquals(JsonUtil.prettyPrint(expectedSchema), JsonUtil.prettyPrint(getCollectionSchemaResponse.body().asString()));
         
         String expectedJson = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch1.json");
         

From 6ce88a98717662558b9abf4a6e0af8acdda4df1c Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 27 Nov 2023 15:25:52 -0500
Subject: [PATCH 245/546] #9686 rename migration script

---
 ...gclient-id.sql => V6.0.0.5__9686-move-harvestingclient-id.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V6.0.0.4__9686-move-harvestingclient-id.sql => V6.0.0.5__9686-move-harvestingclient-id.sql} (100%)

diff --git a/src/main/resources/db/migration/V6.0.0.4__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql
similarity index 100%
rename from src/main/resources/db/migration/V6.0.0.4__9686-move-harvestingclient-id.sql
rename to src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql

From 2d3f7aba2bc180c8d151e076a4f476d039cc27f3 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 27 Nov 2023 15:45:12 -0500
Subject: [PATCH 246/546] just return the JSON Schema, don't wrap in "data,
 message" #9464

---
 src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
index c6ae619f36a..202a54a9e77 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
@@ -259,7 +259,7 @@ public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathPara
         try {
             String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf)));
             JsonObject jsonObject = JsonUtil.getJsonObject(datasetSchema);
-            return ok(jsonObject);
+            return Response.ok(jsonObject).build();
         } catch (WrappedResponse ex) {
             Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex);
             return ex.getResponse();

From 0a77e2a938510d79645ad3ce5fcd349da3e8495f Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 27 Nov 2023 15:53:06 -0500
Subject: [PATCH 247/546] tweak docs #9464

---
 doc/sphinx-guides/source/api/native-api.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index ca043af39e3..bd49f945d4e 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -507,10 +507,10 @@ The fully expanded example above (without environment variables) looks like this
 
 .. _get-dataset-json-schema:
 
-Retrieve a JSON schema for a Collection
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Retrieve a Dataset JSON Schema for a Collection
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Retrieves a JSON schema customized for a given Dataverse collection in order to validate a Dataset JSON file prior to creating the dataset:
+Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset:
 
 .. code-block:: bash
 
@@ -526,16 +526,16 @@ The fully expanded example above (without environment variables) looks like this
 
   curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/datasetSchema"
 
-Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint.
+Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint.
 
 While it is recommended to download a copy of the JSON Schema from the collection (as above) to account for any fields that have been marked as required, you can also download a minimal :download:`dataset-schema.json <../_static/api/dataset-schema.json>` to get a sense of the schema when no customizations have been made.
 
 .. _validate-dataset-json:
 
-Validate Dataset.json file for a Collection
+Validate Dataset JSON File for a Collection
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Validates a Dataset json file customized for a given Dataverse collection prior to creating the dataset:
+Validates a dataset JSON file customized for a given collection prior to creating the dataset:
 
 .. code-block:: bash
 
@@ -543,15 +543,15 @@ Validates a Dataset json file customized for a given Dataverse collection prior
   export SERVER_URL=https://demo.dataverse.org
   export ID=root
 
-  curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/validateDatasetJson" --upload-file dataset.json -H 'Content-type:application/json'
+  curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json
 
 The fully expanded example above (without environment variables) looks like this:
 
 .. code-block:: bash
 
-  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" --upload-file dataset.json -H 'Content-type:application/json'
+  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json
 
-Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint.
+Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint.
 
 .. _create-dataset-command: 
 

From 7db36293364118244e0582c3d529a34cd5b66395 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 27 Nov 2023 15:57:24 -0500
Subject: [PATCH 248/546] removing trailing newline #9464

---
 doc/sphinx-guides/source/_static/api/dataset-schema.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/sphinx-guides/source/_static/api/dataset-schema.json b/doc/sphinx-guides/source/_static/api/dataset-schema.json
index a66ab43bd0f..34b8a1eeedb 100644
--- a/doc/sphinx-guides/source/_static/api/dataset-schema.json
+++ b/doc/sphinx-guides/source/_static/api/dataset-schema.json
@@ -120,4 +120,3 @@
     },
     "required": ["datasetVersion"]
 }
-

From 194945b36bd052569e6699c53a78de839d2c1f23 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 28 Nov 2023 09:16:50 -0500
Subject: [PATCH 249/546] remove cruft (unused) #9464

---
 .../iq/dataverse/baseDatasetSchema.json       | 123 ------------------
 1 file changed, 123 deletions(-)
 delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json

diff --git a/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json b/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json
deleted file mode 100644
index a37e216b2ea..00000000000
--- a/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json
+++ /dev/null
@@ -1,123 +0,0 @@
-{
-    "$schema": "http://json-schema.org/draft-04/schema#",
-    "$defs": {
-    "field": {
-        "type": "object",
-        "required": ["typeClass", "multiple", "typeName"],
-        "properties": {
-            "value": {
-                "anyOf": [
-                    {
-                        "type": "array"
-                    },
-                    {
-                        "type": "string"
-                    },
-                    {
-                        "$ref": "#/$defs/field"
-                    }
-                ]
-            },
-            "typeClass": {
-                "type": "string"
-            },
-            "multiple": {
-                "type": "boolean"
-            },
-            "typeName": {
-                "type": "string"
-            }
-        }
-    }
-},
-"type": "object",
-"properties": {
-    "datasetVersion": {
-        "type": "object",
-        "properties": {
-           "license": {
-                "type": "object",
-                "properties": {
-                    "name": {
-                        "type": "string"
-                    },
-                    "uri": {
-                        "type": "string",
-                        "format": "uri"
-                   }
-                },
-                "required": ["name", "uri"]
-            },
-            "metadataBlocks": {
-                "type": "object",
-                "properties": {
-                           "citation": {
-                            "type": "object",
-                            "properties": {
-                                "fields": {
-                                    "type": "array",
-                                    "items": {
-                                        "$ref": "#/$defs/field"
-                                    },
-                                    "minItems": 5,
-                                    "allOf": [
-                                        {
-                                            "contains": {
-                                                "properties": {
-                                                    "typeName": {
-                                                        "const": "title"
-                                                    }
-                                                }
-                                            }
-                                        },
-                                        {
-                                            "contains": {
-                                                "properties": {
-                                                    "typeName": {
-                                                        "const": "author"
-                                                    }
-                                                }
-                                            }
-                                        },
-                                        {
-                                            "contains": {
-                                                "properties": {
-                                                    "typeName": {
-                                                        "const": "datasetContact"
-                                                    }
-                                                }
-                                            }
-                                        },
-                                        {
-                                            "contains": {
-                                                "properties": {
-                                                    "typeName": {
-                                                        "const": "dsDescription"
-                                                    }
-                                                }
-                                            }
-                                        },
-                                        {
-                                            "contains": {
-                                                "properties": {
-                                                    "typeName": {
-                                                        "const": "subject"
-                                                    }
-                                                }
-                                            }
-                                        }
-                                    ]
-                                }
-                            },
-                            "required": ["fields"]
-                        }
-                    },
-                    "required": ["citation"]
-                }
-            },
-            "required": ["metadataBlocks"]
-        }
-    },
-    "required": ["datasetVersion"]
-}
-

From c1bd009b8d2ebc297a51e6d23358ab76137c3848 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 28 Nov 2023 09:18:57 -0500
Subject: [PATCH 250/546] format code (no-op) #9464

---
 .../java/edu/harvard/iq/dataverse/api/UtilIT.java    | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index b3f8d639721..cd05719402f 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -406,26 +406,22 @@ static Response getGuestbookResponses(String dataverseAlias, Long guestbookId, S
         }
         return requestSpec.get("/api/dataverses/" + dataverseAlias + "/guestbookResponses/");
     }
-    
-    static Response getCollectionSchema (String dataverseAlias, String apiToken){
-        
+
+    static Response getCollectionSchema(String dataverseAlias, String apiToken) {
         Response getCollectionSchemaResponse = given()
                 .header(API_TOKEN_HTTP_HEADER, apiToken)
                 .contentType("application/json")
                 .get("/api/dataverses/" + dataverseAlias + "/datasetSchema");
         return getCollectionSchemaResponse;
-        
     }
-    
-    static Response validateDatasetJson (String dataverseAlias, String datasetJson, String apiToken){
-        
+
+    static Response validateDatasetJson(String dataverseAlias, String datasetJson, String apiToken) {
         Response getValidateDatasetJsonResponse = given()
                 .header(API_TOKEN_HTTP_HEADER, apiToken)
                 .body(datasetJson)
                 .contentType("application/json")
                 .post("/api/dataverses/" + dataverseAlias + "/validateDatasetJson");
         return getValidateDatasetJsonResponse;
-        
     }
 
     static Response createRandomDatasetViaNativeApi(String dataverseAlias, String apiToken) {

From c4d9b6e4a9741d07cc9193e794455c35a08320fd Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 28 Nov 2023 09:26:34 -0500
Subject: [PATCH 251/546] add new endpoints to API changelog #9464

---
 doc/sphinx-guides/source/api/changelog.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index 1726736e75c..dd2d129a420 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -8,6 +8,11 @@ API Changelog
 6.1
 ---
 
+New
+~~~
+- **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`.
+- **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`.
+
 Changes
 ~~~~~~~
 - **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`.

From 45df764c3b9c8d8f504f1134fb302bb4ea10a6bd Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 28 Nov 2023 09:28:30 -0500
Subject: [PATCH 252/546] tweak release note #9464

---
 doc/release-notes/9464-json-validation.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md
index 3703b420225..4b08f2ca9dd 100644
--- a/doc/release-notes/9464-json-validation.md
+++ b/doc/release-notes/9464-json-validation.md
@@ -1,3 +1,3 @@
-Functionality has been added to help validate dataset json prior to dataset creation. There are two new API endpoints in this release. The first takes in a Dataverse Collection alias and returns a custom schema based on the required fields of the collection.
-The second takes in a Dataverse collection alias and a dataset json file and does an automated validation of the json file against the custom schema for the collection. (Issue 9464 and 9465)
+Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. (Issue #9464 and #9465)
 
+For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html

From d8e327d8c56793f24213385b9dcca864b2a134e3 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 28 Nov 2023 09:46:16 -0500
Subject: [PATCH 253/546] add "v" to make anchor links meaningful #9464 #10060

Anchors like #v6.1 are much better than #id1, which is meaningless.
---
 doc/sphinx-guides/source/api/changelog.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index dd2d129a420..cbf1cb329b9 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -5,8 +5,8 @@ API Changelog
     :local:
     :depth: 1
 
-6.1
----
+v6.1
+----
 
 New
 ~~~
@@ -17,8 +17,8 @@ Changes
 ~~~~~~~
 - **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`.
 
-6.0
----
+v6.0
+----
 
 Changes
 ~~~~~~~

From 8e69c6d8d7b995607d121a87561f9850ce387ae0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 28 Nov 2023 10:29:12 -0500
Subject: [PATCH 254/546] remove new `response()` method, use regular `ok()`
 instead #9466

---
 .../edu/harvard/iq/dataverse/api/AbstractApiBean.java  | 10 ----------
 src/main/java/edu/harvard/iq/dataverse/api/Info.java   | 10 +++++-----
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
index 754ea95e427..027f9e0fcb1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
@@ -636,16 +636,6 @@ protected Response response(DataverseRequestHandler hdl, User user) {
         }
     }
 
-    /***
-     * The preferred way of handling a request from an open API.
-     *
-     * @param hdl handling code block.
-     * @return HTTP Response appropriate for the way {@code hdl} executed.
-     */
-    protected Response response(DataverseRequestHandler hdl) {
-        return response(hdl, null);
-    }
-
     private Response handleDataverseRequestHandlerException(Exception ex) {
         String incidentId = UUID.randomUUID().toString();
         logger.log(Level.SEVERE, "API internal error " + incidentId +": " + ex.getMessage(), ex);
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
index 5478c3be0bd..40ce6cd25b7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
@@ -38,21 +38,21 @@ public Response getInfo() {
         String[] comps = versionStr.split("build",2);
         String version = comps[0].trim();
         JsonValue build = comps.length > 1 ? Json.createArrayBuilder().add(comps[1].trim()).build().get(0) : JsonValue.NULL;
-
-        return response( req -> ok( Json.createObjectBuilder().add("version", version)
-                                                              .add("build", build)));
+        return ok(Json.createObjectBuilder()
+                .add("version", version)
+                .add("build", build));
     }
 
     @GET
     @Path("server")
     public Response getServer() {
-        return response( req -> ok(JvmSettings.FQDN.lookup()));
+        return ok(JvmSettings.FQDN.lookup());
     }
 
     @GET
     @Path("apiTermsOfUse")
     public Response getTermsOfUse() {
-        return response( req -> ok(systemConfig.getApiTermsOfUse()));
+        return ok(systemConfig.getApiTermsOfUse());
     }
 
     @GET

From 78404cef91d20f0f41753866e4ee156d51cd3a69 Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Tue, 28 Nov 2023 11:18:26 -0500
Subject: [PATCH 255/546] fixt box validation for misisng values

---
 .../harvard/iq/dataverse/DatasetFieldValueValidator.java  | 8 ++++----
 src/main/java/propertyFiles/Bundle.properties             | 2 +-
 .../iq/dataverse/DatasetFieldValueValidatorTest.java      | 4 +++-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
index 23301bf86a1..821497299a9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
@@ -274,7 +274,7 @@ public static boolean validateBoundingBox(final String westLon, final String eas
             Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon);
             Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat);
             Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat);
-            returnVal = west < east && south < north;
+            returnVal = west <= east && south <= north;
         } catch (IllegalArgumentException e) {
             returnVal = false;
         }
@@ -286,9 +286,9 @@ private static Float verifyBoundingBoxCoordinatesWithinRange(final String name,
         int max = name.equals(DatasetFieldConstant.westLongitude) || name.equals(DatasetFieldConstant.eastLongitude) ? 180 : 90;
         int min = max * -1;
 
-        final Float returnVal = value != null ? Float.parseFloat(value) : min; // defaults to min if value is missing
-        if (returnVal < min || returnVal > max) {
-            throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal, min, max));
+        final Float returnVal = value != null ? Float.parseFloat(value) : Float.NaN;
+        if (returnVal.isNaN() || returnVal < min || returnVal > max) {
+            throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal.isNaN() ? "missing" : returnVal, min, max));
         }
         return returnVal;
     }
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 4db66006a80..6b2c30f2a4d 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -1623,7 +1623,7 @@ dataset.metadata.alternativePersistentId.tip=A previously used persistent identi
 dataset.metadata.invalidEntry=is not a valid entry.
 dataset.metadata.invalidDate=is not a valid date. "yyyy" is a supported format.
 dataset.metadata.invalidNumber=is not a valid number.
-dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. East must be greater than West and North must be greater than South.
+dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. East must be greater than West and North must be greater than South. Missing values are NOT allowed.
 dataset.metadata.invalidInteger=is not a valid integer.
 dataset.metadata.invalidURL=is not a valid URL.
 dataset.metadata.invalidEmail=is not a valid email address.
diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
index 37274f5306a..31e450a2884 100644
--- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
@@ -217,11 +217,13 @@ public void testInvalidEmail() {
     public void testBoundingBoxValidity() {
         // valid tests
         assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "-90"));
-        assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", null));
 
         // invalid tests
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", null, "90", null));
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, "180", null, "90"));
         assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "junk"));
         assertTrue(!DatasetFieldValueValidator.validateBoundingBox("45", "40", "90", "0"));
         assertTrue(!DatasetFieldValueValidator.validateBoundingBox("360", "0", "90", "-90"));
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, null, "90", "-90"));
     }
 }

From 908a624a1b84726a195f961ed60be1a228cb069c Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 28 Nov 2023 13:32:20 -0500
Subject: [PATCH 256/546] Checking in the trivial fix for the failing tests.
 Still looking into ways to improve detecting bad facet queries early on (and
 trying to decide if it's out of scope or not). #9635

---
 .../edu/harvard/iq/dataverse/search/SearchIncludeFragment.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index dd9cd78982a..47e82959dd9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -396,7 +396,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 }
             }
             
-            if (selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable() && !isFacetsDisabled()) {
+            if (!wasSolrErrorEncountered() && selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable() && !isFacetsDisabled()) {
                 // If some types are NOT currently selected, we will need to 
                 // run a second search to obtain the numbers of the unselected types:
                 

From 8403a595d7157f06b190796417773feb17e4a733 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 28 Nov 2023 14:25:29 -0500
Subject: [PATCH 257/546] one extra weird edge case (restores the existing
 behavior). #9635

---
 .../iq/dataverse/search/SearchIncludeFragment.java     | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 47e82959dd9..41c5ac18d79 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -403,14 +403,14 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused
                 List<String> filterQueriesFinalSecondPass = new ArrayList<>();
                 filterQueriesFinalSecondPass.addAll(filterQueriesExtended);
                    
-                arr = new String[3 - selectedTypesList.size()];
-                int c = 0; 
+                arr = new String[3];
+                int c = 0;
                 for (String dvObjectType : Arrays.asList("dataverses", "datasets", "files")) {
                     if (!selectedTypesList.contains(dvObjectType)) {
                         arr[c++] = dvObjectType;
                     }
                 }
-                filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ") + ")");
+                filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ", c) + ")");
                 logger.fine("second pass query: " + queryToPassToSolr);
                 logger.fine("second pass filter query: "+filterQueriesFinalSecondPass.toString());
 
@@ -660,6 +660,10 @@ public void incrementFacets(String name, int incrementNum) {
     // http://stackoverflow.com/questions/1515437/java-function-for-arrays-like-phps-join/1515548#1515548
     String combine(String[] s, String glue) {
         int k = s.length;
+        return combine(s, glue, k);
+    }
+    
+    String combine(String[] s, String glue, int k) {
         if (k == 0) {
             return null;
         }

From aa75382897d52f4264dc463d9adf0d5b450284be Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Tue, 28 Nov 2023 14:46:34 -0500
Subject: [PATCH 258/546] address review comments - unused imports/method

---
 .../iq/dataverse/ThumbnailServiceWrapper.java | 57 -------------------
 1 file changed, 57 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index 8039a161224..ae81a9326c4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -5,33 +5,19 @@
  */
 package edu.harvard.iq.dataverse;
 
-import edu.harvard.iq.dataverse.api.Datasets;
-import edu.harvard.iq.dataverse.dataaccess.DataAccess;
-import edu.harvard.iq.dataverse.dataaccess.StorageIO;
-import edu.harvard.iq.dataverse.dataset.DatasetUtil;
 import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
 
-import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail;
 import edu.harvard.iq.dataverse.search.SolrSearchResult;
-import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.Base64;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.logging.Logger;
 
 import jakarta.ejb.EJB;
 import jakarta.enterprise.context.RequestScoped;
-import jakarta.faces.view.ViewScoped;
 import jakarta.inject.Inject;
 import jakarta.inject.Named;
-import org.apache.commons.io.IOUtils;
 
 /**
  *
@@ -59,49 +45,6 @@ public class ThumbnailServiceWrapper implements java.io.Serializable  {
     private Map<Long, DvObject> dvobjectViewMap = new HashMap<>();
     private Map<Long, Boolean> hasThumbMap = new HashMap<>();
 
-    private String getAssignedDatasetImage(Dataset dataset, int size) {
-        if (dataset == null) {
-            return null;
-        }
-
-        DataFile assignedThumbnailFile = dataset.getThumbnailFile();
-
-        if (assignedThumbnailFile != null) {
-            Long assignedThumbnailFileId = assignedThumbnailFile.getId();
-
-            if (this.dvobjectThumbnailsMap.containsKey(assignedThumbnailFileId)) {
-                // Yes, return previous answer
-                //logger.info("using cached result for ... "+assignedThumbnailFileId);
-                if (!"".equals(this.dvobjectThumbnailsMap.get(assignedThumbnailFileId))) {
-                    return this.dvobjectThumbnailsMap.get(assignedThumbnailFileId);
-                }
-                return null;
-            }
-
-            String imageSourceBase64 = ImageThumbConverter.getImageThumbnailAsBase64(assignedThumbnailFile,
-                    size);
-                    //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
-
-            if (imageSourceBase64 != null) {
-                this.dvobjectThumbnailsMap.put(assignedThumbnailFileId, imageSourceBase64);
-                return imageSourceBase64;
-            }
-
-            // OK - we can't use this "assigned" image, because of permissions, or because 
-            // the thumbnail failed to generate, etc... in this case we'll 
-            // mark this dataset in the lookup map - so that we don't have to
-            // do all these lookups again...
-            this.dvobjectThumbnailsMap.put(assignedThumbnailFileId, "");
-            
-            // TODO: (?)
-            // do we need to cache this datafile object in the view map?
-            // -- L.A., 4.2.2
-        }
-
-        return null;
-
-    }
-
     // it's the responsibility of the user - to make sure the search result
     // passed to this method is of the Datafile type!
     public String getFileCardImageAsBase64Url(SolrSearchResult result) {

From b90a1d793d0a5d97abd9d6e0bafbaf5b1540b215 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Tue, 28 Nov 2023 14:47:40 -0500
Subject: [PATCH 259/546] update flyway number

---
 ...thumb-failures.sql => V6.0.0.5__9506-track-thumb-failures.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V5.13.0.2__9506-track-thumb-failures.sql => V6.0.0.5__9506-track-thumb-failures.sql} (100%)

diff --git a/src/main/resources/db/migration/V5.13.0.2__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql
similarity index 100%
rename from src/main/resources/db/migration/V5.13.0.2__9506-track-thumb-failures.sql
rename to src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql

From 811743d07b5cf645207539294c7e5c0bb1fb6dea Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Tue, 28 Nov 2023 15:13:27 -0500
Subject: [PATCH 260/546] fixt box validation for missing values

---
 .../iq/dataverse/DatasetFieldValueValidator.java    | 13 +++++++++----
 .../dataverse/DatasetFieldValueValidatorTest.java   |  4 +++-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
index 821497299a9..ea9f6c9c3cc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
@@ -67,12 +67,17 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte
         }
 
         // validate fields that are siblings and depend on each others values
-        if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null) {
+        if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null &&
+                value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().getValidationMessage() == null) {
             Optional<String> failureMessage = validateChildConstraints(value.getDatasetField());
             if (failureMessage.isPresent()) {
                 try {
                     context.buildConstraintViolationWithTemplate(dsfType.getParentDatasetFieldType().getDisplayName() +  "  " +
                             BundleUtil.getStringFromBundle(failureMessage.get()) ).addConstraintViolation();
+
+                    // save the failure message in the parent so we don't keep validating the children
+                    value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().setValidationMessage(failureMessage.get());
+
                 } catch (NullPointerException npe) {
                 }
                 return false;
@@ -240,15 +245,15 @@ public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) {
         return pattern.matcher(userInput).matches();
     }
 
-    // Validate child fields against each other and return failure message or null if success
+    // Validate child fields against each other and return failure message or Optional.empty() if success
     public Optional<String> validateChildConstraints(DatasetField dsf) {
         final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : "";
         Optional<String> returnFailureMessage = Optional.empty();
 
         // Validate Child Constraint for Geospatial Bounding Box
         // validate the four points of the box to insure proper layout
-        // only validate on one value for speed. picked northLatitude since it was at the bottom of the UI
-        if (fieldName.equals(DatasetFieldConstant.northLatitude)) {
+        if (fieldName.equals(DatasetFieldConstant.northLatitude) || fieldName.equals(DatasetFieldConstant.westLongitude)
+                || fieldName.equals(DatasetFieldConstant.eastLongitude) || fieldName.equals(DatasetFieldConstant.southLatitude)) {
             final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates";
             DatasetFieldCompoundValue cv = dsf.getParentDatasetFieldCompoundValue();
             List<DatasetField> cdsf = cv.getChildDatasetFields();
diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
index 31e450a2884..3f85acc1f87 100644
--- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java
@@ -217,6 +217,7 @@ public void testInvalidEmail() {
     public void testBoundingBoxValidity() {
         // valid tests
         assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "-90"));
+        assertTrue(DatasetFieldValueValidator.validateBoundingBox("0", "0", "0", "0"));
 
         // invalid tests
         assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", null, "90", null));
@@ -224,6 +225,7 @@ public void testBoundingBoxValidity() {
         assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "junk"));
         assertTrue(!DatasetFieldValueValidator.validateBoundingBox("45", "40", "90", "0"));
         assertTrue(!DatasetFieldValueValidator.validateBoundingBox("360", "0", "90", "-90"));
-        assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, null, "90", "-90"));
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox("", "", "", ""));
+        assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, null, null, null));
     }
 }

From 866b5eaa1e7108a483c87b0a2a6b532e1f7ef5c1 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 28 Nov 2023 16:16:40 -0500
Subject: [PATCH 261/546] Adds -X POST on the docs for validateDatasetJson

---
 doc/sphinx-guides/source/api/native-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index bd49f945d4e..1f0c5a62d12 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -549,7 +549,7 @@ The fully expanded example above (without environment variables) looks like this
 
 .. code-block:: bash
 
-  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json
+  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json
 
 Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint.
 

From 28cb8663e556a61a8577a24187208738b1cb492c Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 28 Nov 2023 19:03:17 -0500
Subject: [PATCH 262/546] basic restassured test added. #8549

---
 .../iq/dataverse/EditDatafilesPage.java       |  6 +--
 .../dataverse/ingest/IngestServiceBean.java   | 27 ++++++-------
 ...l => V6.0.0.5__8549-collection-quotas.sql} |  0
 .../edu/harvard/iq/dataverse/api/FilesIT.java | 38 +++++++++++++++++--
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  6 +++
 5 files changed, 55 insertions(+), 22 deletions(-)
 rename src/main/resources/db/migration/{V6.0.0.4__8549-collection-quotas.sql => V6.0.0.5__8549-collection-quotas.sql} (100%)

diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index a3256c9159f..a6f31e24764 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -1102,11 +1102,7 @@ public String save() {
             }
 
             // Try to save the NEW files permanently: 
-            // ... but first, refresh the session quota specifiction, if defined:
-            if (systemConfig.isStorageQuotasEnforced()) {
-                this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset);
-            }
-            List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true, uploadSessionQuota);
+            List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true); 
             
             // reset the working list of fileMetadatas, as to only include the ones
             // that have been added to the version successfully: 
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 9ef0cf07808..8af9a03f19f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -178,17 +178,13 @@ public class IngestServiceBean {
     // the database by calling the Save command on the dataset and/or version.
     
     // There is way too much going on in this method. :(
+    
+    // @todo: Is this method a good candidate for turning into a dedicated Command? 
     public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
             List<DataFile> newFiles,
             DataFile fileToReplace,
             boolean tabIngest) {
-        return saveAndAddFilesToDataset(version, newFiles, fileToReplace, tabIngest, null);
-    }
-    public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
-            List<DataFile> newFiles,
-            DataFile fileToReplace,
-            boolean tabIngest, 
-            UploadSessionQuotaLimit quota) /*throws FileExceedsMaxSizeException, FileExceedsStorageQuotaException*/ {
+        UploadSessionQuotaLimit uploadSessionQuota = null; 
         List<DataFile> ret = new ArrayList<>();
 
         if (newFiles != null && newFiles.size() > 0) {
@@ -200,6 +196,11 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
             IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles, fileToReplace);
             Dataset dataset = version.getDataset();
 
+            if (systemConfig.isStorageQuotasEnforced()) {
+                // Check if this dataset is subject to any storage quotas:
+                uploadSessionQuota = fileService.getUploadSessionQuotaLimit(dataset);
+            }
+            
             for (DataFile dataFile : newFiles) {
                 boolean unattached = false;
                 boolean savedSuccess = false;
@@ -372,10 +373,10 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                 // the API, this is the single point in the workflow where  
                 // storage quotas are enforced. 
         
-                if (systemConfig.isStorageQuotasEnforced() && quota != null) {
-                    long storageQuotaLimit = quota.getRemainingQuotaInBytes();
+                if (uploadSessionQuota != null) {
+                    long storageQuotaLimit = uploadSessionQuota.getRemainingQuotaInBytes();
                     if (confirmedFileSize > storageQuotaLimit) {
-                        savedSuccess = false; 
+                        savedSuccess = false;
                         logger.warning("file size over quota limit, skipping");
                         // @todo: we need to figure out how to better communicate
                         // this (potentially partial) failure to the user.  
@@ -388,12 +389,12 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                         // risks/accuracy loss?)
                         // This update is performed with a direct native query that 
                         // is supposed to be quite fast. But still. 
-                        logger.info("Incrementing recorded storage use by "+confirmedFileSize+" bytes for dataset "+dataset.getId());
+                        logger.info("Incrementing recorded storage use by " + confirmedFileSize + " bytes for dataset " + dataset.getId());
                         // (@todo: need to consider what happens when this code is called on Create?)
                         storageUseService.incrementStorageSizeRecursively(dataset.getId(), confirmedFileSize);
                         // Adjust quota: 
-                        logger.info("Setting total usage in bytes to "+(quota.getTotalUsageInBytes() + confirmedFileSize));
-                        quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + confirmedFileSize);
+                        logger.info("Setting total usage in bytes to " + (uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize));
+                        uploadSessionQuota.setTotalUsageInBytes(uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize);
                     }
                 }
 
diff --git a/src/main/resources/db/migration/V6.0.0.4__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql
similarity index 100%
rename from src/main/resources/db/migration/V6.0.0.4__8549-collection-quotas.sql
rename to src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 9ad3c353da8..7e6bb682160 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -2400,13 +2400,43 @@ public void testCollectionStorageQuotas() {
         assertEquals("Total quota allocation for this collection: 1,024 bytes", JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
 
         UtilIT.enableSetting(SettingsServiceBean.Key.UseStorageQuotas);
-        
+                
+        String pathToFile306bytes = "src/test/resources/FileRecordJobIT.properties"; 
+        String pathToFile1787bytes = "src/test/resources/datacite.xml";
+
         // Upload a small file: 
         
-        String pathToFileUnder1K = "src/test/resources/FileRecordJobIT.properties"; 
-        String pathToFileOver1K = "src/test/resources/datacite.xml";
+        Response uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToFile306bytes, Json.createObjectBuilder().build(), apiToken);
+        uploadResponse.then().assertThat().statusCode(OK.getStatusCode());
+        
+        // Check the recorded storage use: 
+        
+        Response checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken);
+        checkStorageUseResponse.then().assertThat().statusCode(OK.getStatusCode());
+        String expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306 B"));
+        assertEquals(expectedApiMessage, JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+
+        // Attempt to upload the second file - this should get us over the quota, 
+        // so it should be rejected:
+        
+        uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToFile1787bytes, Json.createObjectBuilder().build(), apiToken);
+        uploadResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());
+        // We should get this error message made up from 2 Bundle strings:
+        expectedApiMessage = BundleUtil.getStringFromBundle("file.addreplace.error.ingest_create_file_err");
+        expectedApiMessage = expectedApiMessage + " " + BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded", Arrays.asList("1.7 KB", "718 B"));
+        assertEquals(expectedApiMessage, JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+        
+        // Check Storage Use again - should be unchanged: 
+        
+        checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken);
+        checkStorageUseResponse.then().assertThat().statusCode(OK.getStatusCode());
+        expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306 B"));
+        assertEquals(expectedApiMessage, JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+
+        // Disable the quota on the collection; try again:
         
+        // [work in progress]         
         
-        // [To be continued/work in progress]
+        UtilIT.deleteSetting(SettingsServiceBean.Key.UseStorageQuotas);
     }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index fd928b4e5a9..adaa2c07c7f 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -2996,6 +2996,12 @@ static Response setCollectionQuota(String collectionId, long allocatedSize, Stri
         return response;
     }
     
+    static Response checkCollectionStorageUse(String collectionId, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .get("/api/dataverses/" + collectionId + "/storage/use");
+    }
+    
     /**
      * Determine the "payload" storage size of a dataverse
      *

From de844575988b9c9c9e406f09755852c6e97b1549 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 29 Nov 2023 07:18:51 -0500
Subject: [PATCH 263/546] missing '/' #8549

---
 src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java | 6 +++---
 src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 7e6bb682160..d66f7ca9e6d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -2387,17 +2387,17 @@ public void testCollectionStorageQuotas() {
         Response checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken);
         checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
         // This brand new collection shouldn't have any quota defined yet: 
-        assertEquals("No quota defined for this collection", JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+        assertEquals(BundleUtil.getStringFromBundle("dataverse.storage.quota.notdefined"), JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
         
         // Set quota to 1K:
         Response setQuotaResponse = UtilIT.setCollectionQuota(dataverseAlias, 1024, apiToken);
         setQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
-        assertEquals("Storage quota successfully set for the collection", JsonPath.from(setQuotaResponse.body().asString()).getString("data.message"));
+        assertEquals(BundleUtil.getStringFromBundle("dataverse.storage.quota.updated"), JsonPath.from(setQuotaResponse.body().asString()).getString("data.message"));
         
         // Check again:
         checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken);
         checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
-        assertEquals("Total quota allocation for this collection: 1,024 bytes", JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+        assertEquals(BundleUtil.getStringFromBundle("dataverse.storage.quota.allocation", Arrays.asList("1,024")), JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
 
         UtilIT.enableSetting(SettingsServiceBean.Key.UseStorageQuotas);
                 
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index adaa2c07c7f..b8eec5471a1 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -2992,7 +2992,7 @@ static Response checkCollectionQuota(String collectionId, String apiToken) {
     static Response setCollectionQuota(String collectionId, long allocatedSize, String apiToken) {
         Response response = given()
                 .header(API_TOKEN_HTTP_HEADER, apiToken)
-                .post("/api/dataverses/" + collectionId + "/storage/quota" + allocatedSize);
+                .post("/api/dataverses/" + collectionId + "/storage/quota/" + allocatedSize);
         return response;
     }
     

From 6c67dc2aedc926b4ed0064f313f83abf568ce0ea Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Wed, 29 Nov 2023 10:07:54 -0500
Subject: [PATCH 264/546] Update
 src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index 42c47505b70..fca84790866 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -1369,7 +1369,7 @@ public Embargo findEmbargo(Long id) {
     
     /**
      * Checks if the supplied DvObjectContainer (Dataset or Collection; although
-     * only collection-level storage quotas are officially supported as if now)
+     * only collection-level storage quotas are officially supported as of now)
      * has a quota configured, and if not, keeps checking if any of the direct
      * ancestor Collections further up have a configured quota. If it finds one, 
      * it will retrieve the current total content size for that specific ancestor 

From f9f375cfdf4823856c16a68af4d54bb2c491d2ba Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Wed, 29 Nov 2023 10:10:23 -0500
Subject: [PATCH 265/546] Update doc/release-notes/8549-collection-quotas.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 doc/release-notes/8549-collection-quotas.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md
index e3bf37425ea..29b84213cfb 100644
--- a/doc/release-notes/8549-collection-quotas.md
+++ b/doc/release-notes/8549-collection-quotas.md
@@ -1,3 +1,3 @@
 This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS.
-Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the deployments. 
+Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the deployment.
 

From 5bf52f06499427ef0c2c56b4170a6dc920b08865 Mon Sep 17 00:00:00 2001
From: sbondka <sabrine.bondka@open-groupe.com>
Date: Wed, 29 Nov 2023 18:34:23 +0100
Subject: [PATCH 266/546] Add JupyterHub Connector to Dataverse guide

---
 doc/sphinx-guides/source/admin/integrations.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst
index 9a24cf0715c..a9b962f33ca 100644
--- a/doc/sphinx-guides/source/admin/integrations.rst
+++ b/doc/sphinx-guides/source/admin/integrations.rst
@@ -185,6 +185,16 @@ Avgidea Data Search
 
 Researchers can use a Google Sheets add-on to search for Dataverse installation's CSV data and then import that data into a sheet. See `Avgidea Data Search <https://www.avgidea.io/avgidea-data-platform.html>`_ for details.
 
+JupyterHub
+++++++++++
+
+The Dataverse-to-JupyterHub Data Transfer Connector streamlines data transfer between Dataverse repositories and the cloud-based platform JupyterHub, enhancing collaborative research.
+This connector facilitates seamless two-way transfer of datasets and files, emphasizing the potential of an integrated research environment.
+It is a lightweight client-side web application built using React and relying on the Dataverse External Tool feature, allowing for easy deployment on modern integration systems. Currently, it supports small to medium-sized files, with plans to enable support for large files and signed Dataverse endpoints in the future.
+
+What kind of user is the feature intended for?
+The feature is intended for reasearchers, scientists and data analyst working with Dataverse instances and JupyterHub looking to ease the data transfer process.
+
 .. _integrations-discovery:
 
 Discoverability

From f9b58a184de79f27d4ad6454a9e3dc263e4d7ca9 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 29 Nov 2023 12:41:18 -0500
Subject: [PATCH 267/546] test fixes #8549

---
 .../java/edu/harvard/iq/dataverse/api/FilesIT.java     | 10 +++++-----
 src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java |  7 +++++++
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index d66f7ca9e6d..d516e6d4bff 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -2413,8 +2413,8 @@ public void testCollectionStorageQuotas() {
         
         Response checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken);
         checkStorageUseResponse.then().assertThat().statusCode(OK.getStatusCode());
-        String expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306 B"));
-        assertEquals(expectedApiMessage, JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+        String expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306"));
+        assertEquals(expectedApiMessage, JsonPath.from(checkStorageUseResponse.body().asString()).getString("data.message"));
 
         // Attempt to upload the second file - this should get us over the quota, 
         // so it should be rejected:
@@ -2424,14 +2424,14 @@ public void testCollectionStorageQuotas() {
         // We should get this error message made up from 2 Bundle strings:
         expectedApiMessage = BundleUtil.getStringFromBundle("file.addreplace.error.ingest_create_file_err");
         expectedApiMessage = expectedApiMessage + " " + BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded", Arrays.asList("1.7 KB", "718 B"));
-        assertEquals(expectedApiMessage, JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+        assertEquals(expectedApiMessage, JsonPath.from(uploadResponse.body().asString()).getString("message"));
         
         // Check Storage Use again - should be unchanged: 
         
         checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken);
         checkStorageUseResponse.then().assertThat().statusCode(OK.getStatusCode());
-        expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306 B"));
-        assertEquals(expectedApiMessage, JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+        expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306"));
+        assertEquals(expectedApiMessage, JsonPath.from(checkStorageUseResponse.body().asString()).getString("data.message"));
 
         // Disable the quota on the collection; try again:
         
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index b8eec5471a1..47bdc6ab201 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -2996,6 +2996,13 @@ static Response setCollectionQuota(String collectionId, long allocatedSize, Stri
         return response;
     }
     
+    static Response disableCollectionQuota(String collectionId, String apiToken) {
+        Response response = given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .delete("/api/dataverses/" + collectionId + "/storage/quota");
+        return response;
+    }
+    
     static Response checkCollectionStorageUse(String collectionId, String apiToken) {
         return given()
                 .header(API_TOKEN_HTTP_HEADER, apiToken)

From cda06a3c941d4d72a535e373e6d4057f5648072b Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 29 Nov 2023 14:19:24 -0500
Subject: [PATCH 268/546] quota tests should be working now #8549

---
 .../dataverse/ingest/IngestServiceBean.java   | 36 ++++++++------
 .../edu/harvard/iq/dataverse/api/FilesIT.java | 47 +++++++++++++++++--
 2 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 8af9a03f19f..9b3ddd228e9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -372,16 +372,27 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                 // necessary. For other cases, such as the direct uploads via 
                 // the API, this is the single point in the workflow where  
                 // storage quotas are enforced. 
-        
-                if (uploadSessionQuota != null) {
-                    long storageQuotaLimit = uploadSessionQuota.getRemainingQuotaInBytes();
-                    if (confirmedFileSize > storageQuotaLimit) {
-                        savedSuccess = false;
-                        logger.warning("file size over quota limit, skipping");
-                        // @todo: we need to figure out how to better communicate
-                        // this (potentially partial) failure to the user.  
-                        //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(confirmedFileSize), bytesToHumanReadable(storageQuotaLimit)));
-                    } else {
+
+                if (savedSuccess) {
+                    if (uploadSessionQuota != null) {
+                        if (confirmedFileSize > uploadSessionQuota.getRemainingQuotaInBytes()) {
+                            savedSuccess = false;
+                            logger.warning("file size over quota limit, skipping");
+                            // @todo: we need to figure out how to better communicate
+                            // this (potentially partial) failure to the user.  
+                            //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(confirmedFileSize), bytesToHumanReadable(storageQuotaLimit)));
+                        } else {
+
+                            // Adjust quota: 
+                            logger.info("Setting total usage in bytes to " + (uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize));
+                            uploadSessionQuota.setTotalUsageInBytes(uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize);
+                        }
+                    }
+
+                    // ... unless we had to reject the file just now because of 
+                    // the quota limits, increment the storage use record(s):
+                    
+                    if (savedSuccess) {
                         // Update storage use for all the parent dvobjects: 
                         // @todo: Do we want to do this after after *each* file is saved? - there may be 
                         // quite a few files being saved here all at once. We could alternatively
@@ -392,13 +403,10 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                         logger.info("Incrementing recorded storage use by " + confirmedFileSize + " bytes for dataset " + dataset.getId());
                         // (@todo: need to consider what happens when this code is called on Create?)
                         storageUseService.incrementStorageSizeRecursively(dataset.getId(), confirmedFileSize);
-                        // Adjust quota: 
-                        logger.info("Setting total usage in bytes to " + (uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize));
-                        uploadSessionQuota.setTotalUsageInBytes(uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize);
                     }
                 }
 
-                logger.fine("Done! Finished saving new file in permanent storage and adding them to the dataset.");
+                logger.fine("Done! Finished saving new file in permanent storage and adding it to the dataset.");
                 boolean belowLimit = false;
 
                 try {
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index d516e6d4bff..e391e17d8d5 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -2384,6 +2384,8 @@ public void testCollectionStorageQuotas() {
         createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode());
         Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
         
+        System.out.println("dataset id: "+datasetId);
+        
         Response checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken);
         checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
         // This brand new collection shouldn't have any quota defined yet: 
@@ -2397,8 +2399,11 @@ public void testCollectionStorageQuotas() {
         // Check again:
         checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken);
         checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
-        assertEquals(BundleUtil.getStringFromBundle("dataverse.storage.quota.allocation", Arrays.asList("1,024")), JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+        String expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.quota.allocation", Arrays.asList("1,024"));
+        assertEquals(expectedApiMessage, JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
 
+        System.out.println(expectedApiMessage);
+        
         UtilIT.enableSetting(SettingsServiceBean.Key.UseStorageQuotas);
                 
         String pathToFile306bytes = "src/test/resources/FileRecordJobIT.properties"; 
@@ -2413,9 +2418,11 @@ public void testCollectionStorageQuotas() {
         
         Response checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken);
         checkStorageUseResponse.then().assertThat().statusCode(OK.getStatusCode());
-        String expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306"));
+        expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306"));
         assertEquals(expectedApiMessage, JsonPath.from(checkStorageUseResponse.body().asString()).getString("data.message"));
 
+        System.out.println(expectedApiMessage);
+        
         // Attempt to upload the second file - this should get us over the quota, 
         // so it should be rejected:
         
@@ -2426,6 +2433,8 @@ public void testCollectionStorageQuotas() {
         expectedApiMessage = expectedApiMessage + " " + BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded", Arrays.asList("1.7 KB", "718 B"));
         assertEquals(expectedApiMessage, JsonPath.from(uploadResponse.body().asString()).getString("message"));
         
+        System.out.println(expectedApiMessage);
+        
         // Check Storage Use again - should be unchanged: 
         
         checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken);
@@ -2435,7 +2444,39 @@ public void testCollectionStorageQuotas() {
 
         // Disable the quota on the collection; try again:
         
-        // [work in progress]         
+        Response disableQuotaResponse = UtilIT.disableCollectionQuota(dataverseAlias, apiToken);
+        disableQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
+        expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.quota.deleted");
+        assertEquals(expectedApiMessage, JsonPath.from(disableQuotaResponse.body().asString()).getString("data.message"));
+
+        // Check again: 
+        
+        checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken);
+        checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode());
+        // ... should say "no quota", again: 
+        assertEquals(BundleUtil.getStringFromBundle("dataverse.storage.quota.notdefined"), JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message"));
+        
+        // And try to upload the larger file again:
+        
+        uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToFile1787bytes, Json.createObjectBuilder().build(), apiToken);
+        // ... should work this time around:
+        uploadResponse.then().assertThat().statusCode(OK.getStatusCode());
+            
+        // Let's confirm that the total storage use has been properly implemented:
+
+        //try {sleep(1000);}catch(InterruptedException ie){}
+        
+        checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken);
+        checkStorageUseResponse.then().assertThat().statusCode(OK.getStatusCode());
+        expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("2,093"));
+        assertEquals(expectedApiMessage, JsonPath.from(checkStorageUseResponse.body().asString()).getString("data.message"));
+
+        System.out.println(expectedApiMessage);
+        
+        // @todo: a test for the storage use hierarchy? - create a couple of 
+        // sub-collections, upload a file into a dataset in the farthest branch 
+        // collection, make sure the usage has been incremented all the way up 
+        // to the root? 
         
         UtilIT.deleteSetting(SettingsServiceBean.Key.UseStorageQuotas);
     }

From 857998d322ba996654082e9f85790c9214bbeae1 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 29 Nov 2023 17:29:56 -0500
Subject: [PATCH 269/546] cosmetic changes to the geospatial block - more
 descriptive labels for the entry form + extra formatting for display. #9547

---
 scripts/api/data/metadatablocks/geospatial.tsv |  8 ++++----
 .../java/propertyFiles/geospatial.properties   | 18 +++++++++---------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv
index a3a8e7efd58..2d86a198a9f 100644
--- a/scripts/api/data/metadatablocks/geospatial.tsv
+++ b/scripts/api/data/metadatablocks/geospatial.tsv
@@ -8,10 +8,10 @@
 	otherGeographicCoverage	Other	Other information on the geographic coverage of the data.		text	4	#VALUE, 	FALSE	FALSE	FALSE	TRUE	FALSE	FALSE	geographicCoverage	geospatial
 	geographicUnit	Geographic Unit	Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region.		text	5		TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		geospatial
 	geographicBoundingBox	Geographic Bounding Box	The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the  Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. 		none	6		FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		geospatial
-	westLongitude	West Longitude	Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -180,0 <= West  Bounding Longitude Value <= 180,0.		text	7		FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
-	eastLongitude	East Longitude	Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0.		text	8		FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
-	northLongitude	North Latitude	Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0.		text	9		FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
-	southLongitude	South Latitude	Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0.		text	10		FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
+	westLongitude	Westernmost (Left) Longitude	Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -180,0 <= West  Bounding Longitude Value <= 180,0.		text	7	#VALUEW	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
+	eastLongitude	Easternmost (Right) Longitude	Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0.		text	8	#VALUEE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
+	northLongitude	Northernmost (Top) Latitude	Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0.		text	9	#VALUEN	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
+	southLongitude	Southernmost (Bottom) Latitude	Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0.		text	10	#VALUES	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
 #controlledVocabulary	DatasetField	Value	identifier	displayOrder											
 	country	Afghanistan		0											
 	country	Albania		1											
diff --git a/src/main/java/propertyFiles/geospatial.properties b/src/main/java/propertyFiles/geospatial.properties
index 04db8d3d05f..86f297c29b9 100644
--- a/src/main/java/propertyFiles/geospatial.properties
+++ b/src/main/java/propertyFiles/geospatial.properties
@@ -8,10 +8,10 @@ datasetfieldtype.city.title=City
 datasetfieldtype.otherGeographicCoverage.title=Other
 datasetfieldtype.geographicUnit.title=Geographic Unit
 datasetfieldtype.geographicBoundingBox.title=Geographic Bounding Box
-datasetfieldtype.westLongitude.title=West Longitude
-datasetfieldtype.eastLongitude.title=East Longitude
-datasetfieldtype.northLongitude.title=North Latitude
-datasetfieldtype.southLongitude.title=South Latitude
+datasetfieldtype.westLongitude.title=Westernmost (Left) Longitude
+datasetfieldtype.eastLongitude.title=Easternmost (Right) Longitude
+datasetfieldtype.northLongitude.title=Northernmost (Top) Latitude
+datasetfieldtype.southLongitude.title=Southernmost (Bottom) Latitude
 datasetfieldtype.geographicCoverage.description=Information on the geographic coverage of the data. Includes the total geographic scope of the data.
 datasetfieldtype.country.description=The country or nation that the Dataset is about.
 datasetfieldtype.state.description=The state or province that the Dataset is about. Use GeoNames for correct spelling and avoid abbreviations.
@@ -89,10 +89,10 @@ controlledvocabulary.country.cook_islands=Cook Islands
 controlledvocabulary.country.costa_rica=Costa Rica
 controlledvocabulary.country.croatia=Croatia
 controlledvocabulary.country.cuba=Cuba
-controlledvocabulary.country.curacao=Cura�ao
+controlledvocabulary.country.curacao=Cura\u00e7ao
 controlledvocabulary.country.cyprus=Cyprus
 controlledvocabulary.country.czech_republic=Czech Republic
-controlledvocabulary.country.cote_d'ivoire=C�te d'Ivoire
+controlledvocabulary.country.cote_d'ivoire=C\u00f4te d'Ivoire
 controlledvocabulary.country.denmark=Denmark
 controlledvocabulary.country.djibouti=Djibouti
 controlledvocabulary.country.dominica=Dominica
@@ -216,8 +216,8 @@ controlledvocabulary.country.qatar=Qatar
 controlledvocabulary.country.romania=Romania
 controlledvocabulary.country.russian_federation=Russian Federation
 controlledvocabulary.country.rwanda=Rwanda
-controlledvocabulary.country.reunion=R�union
-controlledvocabulary.country.saint_barthelemy=Saint Barth�lemy
+controlledvocabulary.country.reunion=R\u00e9union
+controlledvocabulary.country.saint_barthelemy=Saint Barth\u00e9lemy
 controlledvocabulary.country.saint_helena,_ascension_and_tristan_da_cunha=Saint Helena, Ascension and Tristan da Cunha
 controlledvocabulary.country.saint_kitts_and_nevis=Saint Kitts and Nevis
 controlledvocabulary.country.saint_lucia=Saint Lucia
@@ -282,4 +282,4 @@ controlledvocabulary.country.western_sahara=Western Sahara
 controlledvocabulary.country.yemen=Yemen
 controlledvocabulary.country.zambia=Zambia
 controlledvocabulary.country.zimbabwe=Zimbabwe
-controlledvocabulary.country.aland_islands=�land Islands
+controlledvocabulary.country.aland_islands=\u00c5land Islands

From 6391f03767042b37d67a2df42a641492ca892f8f Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 29 Nov 2023 17:41:27 -0500
Subject: [PATCH 270/546] a short release note stub telling admins to update
 the geospatial block #9547

---
 .../9547-validation-for-geospatial-metadata.md           | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 doc/release-notes/9547-validation-for-geospatial-metadata.md

diff --git a/doc/release-notes/9547-validation-for-geospatial-metadata.md b/doc/release-notes/9547-validation-for-geospatial-metadata.md
new file mode 100644
index 00000000000..a44e1a3732b
--- /dev/null
+++ b/doc/release-notes/9547-validation-for-geospatial-metadata.md
@@ -0,0 +1,9 @@
+Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed.
+
+For the "upgrade" steps section:
+
+Update Geospatial Metadata Block
+
+- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv`
+- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv`
+

From 38ad2c5ef6454122f4ae61a88231bb0fd626ff6f Mon Sep 17 00:00:00 2001
From: Patrick <943484+iPaat@users.noreply.github.com>
Date: Thu, 30 Nov 2023 10:43:58 +0100
Subject: [PATCH 271/546] Update client-libraries.rst

Add idsc.dataverse to Client Libraries to guides.dataverse.org page.
---
 doc/sphinx-guides/source/api/client-libraries.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/sphinx-guides/source/api/client-libraries.rst b/doc/sphinx-guides/source/api/client-libraries.rst
index 9612616f127..bd0aa55ba99 100755
--- a/doc/sphinx-guides/source/api/client-libraries.rst
+++ b/doc/sphinx-guides/source/api/client-libraries.rst
@@ -62,6 +62,8 @@ There are multiple Python modules for interacting with Dataverse APIs.
 
 `Pooch <https://github.com/fatiando/pooch>`_ is a Python library that allows library and application developers to download data. Among other features, it takes care of various protocols, caching in OS-specific locations, checksum verification and adds optional features like progress bars or log messages. Among other popular repositories, Pooch supports Dataverse in the sense that you can reference Dataverse-hosted datasets by just a DOI and Pooch will determine the data repository type, query the Dataverse API for contained files and checksums, giving you an easy interface to download them.
 
+`idsc.dataverse <https://github.com/iza-institute-of-labor-economics/idsc.dataverse>`_ reads metadata and files of datasets from a dataverse dataverse.example1.com and writes them into ~/.idsc/dataverse/api/dataverse.example1.com organized in directories PID_type/prefix/suffix, where PID_type is one of: hdl, doi or ark. It can then ''export'' the local copy of the dataverse from ~/.idsc/dataverse/api/dataverse.example1.com to ~/.idsc/.cache/dataverse.example2.com so that one can upload them to dataverse.example2.com.
+
 R
 -
 

From d9b6cae80e50e17be26687c72d42ac916c85f1ad Mon Sep 17 00:00:00 2001
From: Patrick <943484+iPaat@users.noreply.github.com>
Date: Thu, 30 Nov 2023 10:54:46 +0100
Subject: [PATCH 272/546] Update apps.rst

Add idsc.dataverse to guides.dataverse.org migration scripts list.
---
 doc/sphinx-guides/source/api/apps.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/sphinx-guides/source/api/apps.rst b/doc/sphinx-guides/source/api/apps.rst
index a498c62d3d4..44db666736c 100755
--- a/doc/sphinx-guides/source/api/apps.rst
+++ b/doc/sphinx-guides/source/api/apps.rst
@@ -94,6 +94,13 @@ This series of Python scripts offers a starting point for migrating datasets fro
 
 https://github.com/scholarsportal/dataverse-migration-scripts
 
+idsc.dataverse
+~~~~~~~~~~~~~~
+
+This module can, among others, help you migrate one dataverse to another. (see `migrate.md <https://github.com/iza-institute-of-labor-economics/idsc.dataverse/blob/main/migrate.md>`_)
+
+https://github.com/iza-institute-of-labor-economics/idsc.dataverse
+
 Java
 ----
 

From 41784f39681f58d7af6b4233e7bbbfc0a0206adc Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Thu, 30 Nov 2023 09:20:11 -0500
Subject: [PATCH 273/546] fix for load test where coordinates come in a
 different order

---
 .../iq/dataverse/DatasetFieldValueValidator.java | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
index ea9f6c9c3cc..610bb70ff49 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
@@ -255,13 +255,17 @@ public Optional<String> validateChildConstraints(DatasetField dsf) {
         if (fieldName.equals(DatasetFieldConstant.northLatitude) || fieldName.equals(DatasetFieldConstant.westLongitude)
                 || fieldName.equals(DatasetFieldConstant.eastLongitude) || fieldName.equals(DatasetFieldConstant.southLatitude)) {
             final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates";
-            DatasetFieldCompoundValue cv = dsf.getParentDatasetFieldCompoundValue();
-            List<DatasetField> cdsf = cv.getChildDatasetFields();
+
             try {
-                if (cdsf.size() == 4) {
-                    if (!validateBoundingBox(cdsf.get(0).getValue(), cdsf.get(1).getValue(), cdsf.get(2).getValue(), cdsf.get(3).getValue())) {
-                        returnFailureMessage = Optional.of(failureMessage);
-                    }
+                final Map<String, String> coords = new HashMap<>();
+                dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields().forEach(f -> {
+                        coords.put(f.getDatasetFieldType().getName(), f.getValue());
+                });
+                if (!validateBoundingBox(coords.get(DatasetFieldConstant.westLongitude),
+                        coords.get(DatasetFieldConstant.eastLongitude),
+                        coords.get(DatasetFieldConstant.northLatitude),
+                        coords.get(DatasetFieldConstant.southLatitude))) {
+                    returnFailureMessage = Optional.of(failureMessage);
                 }
             } catch (IllegalArgumentException e) { // IllegalArgumentException NumberFormatException
                 returnFailureMessage = Optional.of(failureMessage);

From a6fd3f5a715f3717941d1997813b9b4f7313bab1 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 30 Nov 2023 09:49:56 -0500
Subject: [PATCH 274/546] #9686 changes from CR

---
 .../V6.0.0.5__9686-move-harvestingclient-id.sql |  2 +-
 .../edu/harvard/iq/dataverse/api/MetricsIT.java | 17 +++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql
index 0e4c9a58a93..22142b8fc41 100644
--- a/src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql
+++ b/src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql
@@ -4,5 +4,5 @@ update dvobject dvo set harvestingclient_id = s.harvestingclient_id from
 (select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s
 where s.id = dvo.id; 
 
---ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id;
+ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id;
 
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
index fa05a23b675..1425b7bc5d9 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
@@ -5,6 +5,8 @@
 import edu.harvard.iq.dataverse.metrics.MetricsUtil;
 import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
 import static jakarta.ws.rs.core.Response.Status.OK;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
 import org.junit.jupiter.api.AfterAll;
 
 import org.junit.jupiter.api.BeforeAll;
@@ -16,10 +18,13 @@
 //To improve these tests we should try adding data and see if the number DOESN'T
 //go up to show that the caching worked
 public class MetricsIT {
+    
+    private static String yyyymm;
 
     @BeforeAll
     public static void setUpClass() {
         RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
+        yyyymm = LocalDate.now().format(DateTimeFormatter.ofPattern(MetricsUtil.YEAR_AND_MONTH_PATTERN));
         UtilIT.clearMetricCache();
     }
 
@@ -30,8 +35,7 @@ public static void cleanUpClass() {
 
     @Test
     public void testGetDataversesToMonth() {
-        String yyyymm = "2023-11";
-//        yyyymm = null;
+
         Response response = UtilIT.metricsDataversesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
         response.then().assertThat()
@@ -54,8 +58,7 @@ public void testGetDataversesToMonth() {
 
     @Test
     public void testGetDatasetsToMonth() {
-        String yyyymm = "2023-11";
-//        yyyymm = null;
+
         Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null);
         String precache = response.prettyPrint();
         response.then().assertThat()
@@ -77,8 +80,7 @@ public void testGetDatasetsToMonth() {
 
     @Test
     public void testGetFilesToMonth() {
-        String yyyymm = "2023-11";
-//        yyyymm = null;
+
         Response response = UtilIT.metricsFilesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
         response.then().assertThat()
@@ -100,8 +102,7 @@ public void testGetFilesToMonth() {
 
     @Test
     public void testGetDownloadsToMonth() {
-        String yyyymm = "2023-11";
-//        yyyymm = null;
+
         Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null);
         String precache = response.prettyPrint();
         response.then().assertThat()

From 2d6efc5ee7f05082bcdcd3c837283b19edb8bec8 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 30 Nov 2023 10:43:05 -0500
Subject: [PATCH 275/546] undoing the formatting for the bbox fields: that
 formatting gets passed to solr! :( #9547

---
 scripts/api/data/metadatablocks/geospatial.tsv | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv
index 2d86a198a9f..ce481c1bf84 100644
--- a/scripts/api/data/metadatablocks/geospatial.tsv
+++ b/scripts/api/data/metadatablocks/geospatial.tsv
@@ -8,10 +8,10 @@
 	otherGeographicCoverage	Other	Other information on the geographic coverage of the data.		text	4	#VALUE, 	FALSE	FALSE	FALSE	TRUE	FALSE	FALSE	geographicCoverage	geospatial
 	geographicUnit	Geographic Unit	Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region.		text	5		TRUE	FALSE	TRUE	TRUE	FALSE	FALSE		geospatial
 	geographicBoundingBox	Geographic Bounding Box	The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the  Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. 		none	6		FALSE	FALSE	TRUE	FALSE	FALSE	FALSE		geospatial
-	westLongitude	Westernmost (Left) Longitude	Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -180,0 <= West  Bounding Longitude Value <= 180,0.		text	7	#VALUEW	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
-	eastLongitude	Easternmost (Right) Longitude	Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0.		text	8	#VALUEE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
-	northLongitude	Northernmost (Top) Latitude	Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0.		text	9	#VALUEN	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
-	southLongitude	Southernmost (Bottom) Latitude	Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0.		text	10	#VALUES	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
+	westLongitude	Westernmost (Left) Longitude	Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -180,0 <= West  Bounding Longitude Value <= 180,0.		text	7		FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
+	eastLongitude	Easternmost (Right) Longitude	Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0.		text	8		FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
+	northLongitude	Northernmost (Top) Latitude	Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0.		text	9		FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
+	southLongitude	Southernmost (Bottom) Latitude	Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values,  expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0.		text	10		FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	geographicBoundingBox	geospatial
 #controlledVocabulary	DatasetField	Value	identifier	displayOrder											
 	country	Afghanistan		0											
 	country	Albania		1											

From c7d4ac053f65496eff2d649d8aee6d59dfbdf578 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 30 Nov 2023 15:11:19 -0500
Subject: [PATCH 276/546] allow no body when not needed

---
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 597adb0ced6..5961b428bcb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3856,7 +3856,10 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat
         } catch (WrappedResponse wr) {
             return wr.getResponse();
         }
-        JsonObject body = JsonUtil.getJsonObject(jsonBody);
+        JsonObject body = null;
+        if (jsonBody != null) {
+            body = JsonUtil.getJsonObject(jsonBody);
+        }
         Set<String> fileIds = null;
         if (downloadId != null) {
             JsonObject files = globusService.getFilesForDownload(downloadId);
@@ -3864,7 +3867,7 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat
                 fileIds = files.keySet();
             }
         } else {
-            if (body.containsKey("fileIds")) {
+            if ((body!=null) && body.containsKey("fileIds")) {
                 Collection<JsonValue> fileVals = body.getJsonArray("fileIds").getValuesAs(JsonValue.class);
                 fileIds = new HashSet<String>(fileVals.size());
                 for (JsonValue fileVal : fileVals) {

From 54d228aca37bc8d53c975617a61dcec12d5f2099 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 30 Nov 2023 15:11:56 -0500
Subject: [PATCH 277/546] fix remote case

---
 .../iq/dataverse/globus/GlobusServiceBean.java        | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index b2ebea14835..d8742fc90d5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -660,12 +660,15 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<FileMeta
     public JsonObject getFilesMap(ArrayList<DataFile> dataFiles, Dataset d) {
         JsonObjectBuilder filesBuilder = Json.createObjectBuilder();
         for (DataFile df : dataFiles) {
-            String[] fileInfo = DataAccess.getDriverIdAndStorageLocation(DataAccess.getLocationFromStorageId(df.getStorageIdentifier(), d));
-            String driverId = fileInfo[0];
-            String fileLocation= fileInfo[1]; 
-            if(GlobusAccessibleStore.isDataverseManaged(driverId)) {
+            String storageId = df.getStorageIdentifier();
+            String[] parts = DataAccess.getDriverIdAndStorageLocation(DataAccess.getLocationFromStorageId(storageId, d));
+            String driverId =  parts[0];
+            String fileLocation = parts[1];
+            if (GlobusAccessibleStore.isDataverseManaged(driverId)) {
                 String endpointWithBasePath = GlobusAccessibleStore.getTransferEnpointWithPath(driverId);
                 fileLocation = endpointWithBasePath + "/" + fileLocation;
+            } else {
+                fileLocation = storageId.substring(storageId.lastIndexOf("//") + 2);
             }
             filesBuilder.add(df.getId().toString(), fileLocation);
         }

From d647f810c1e9928b40143019d2581be9411dba95 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 30 Nov 2023 15:38:03 -0500
Subject: [PATCH 278/546] tweak docs, add new APIs to API changelog #9506

---
 doc/sphinx-guides/source/api/changelog.rst  |  4 +++
 doc/sphinx-guides/source/api/native-api.rst | 35 +++++++++------------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index 1726736e75c..43367890381 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -8,6 +8,10 @@ API Changelog
 6.1
 ---
 
+New
+~~~
+- **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`.
+
 Changes
 ~~~~~~~
 - **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`.
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 882a5a872d1..34605a525ed 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -5360,7 +5360,22 @@ A curl example using an ``ID``
 
 Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`.
 
+.. _thumbnail_reset:
+
+Reset Thumbnail Failure Flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+If Dataverse attempts to create a thumbnail image for an image or PDF file and the attempt fails, Dataverse will set a flag for the file to avoid repeated attempts to generate the thumbnail.
+For cases where the problem may have been temporary (or fixed in a later Dataverse release), the API calls below can be used to reset this flag for all files or for a given file.
+
+.. code-block:: bash
+
+  export SERVER_URL=https://demo.dataverse.org
+  export FILE_ID=1234
+
+  curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag
+  
+  curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag/$FILE_ID
 
 MyData
 ------
@@ -5399,23 +5414,3 @@ Parameters:
 
 ``per_page`` Number of results returned per page.
 
-
-.. _thumbnail_reset:
-
-Reset Thumbnail Failure Flags
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-If Dataverse attempts to create a thumbnail image for an image or pdf file and the attempt fails, Dataverse will set a flag for the file to avoid repeated attempts to generate the thumbnail.
-For cases where the problem may have been temporary (or fixed in a later Dataverse release), two API calls exist to reset this flag for all files or for a given file.
-
-Curl examples
-
-.. code-block:: bash
-
-  export SERVER_URL=http://localhost
-  export fileID=1234
-
-  curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag
-  
-  curl -X DELETE $SERVER_URL/api/admin/clearThumbnailFailureFlag/$fileID
-

From 2a28e7d86a5b412b4c88176dd27e0e90635383d6 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 30 Nov 2023 15:42:13 -0500
Subject: [PATCH 279/546] fix display with no remote url

---
 src/main/webapp/filesFragment.xhtml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml
index 4c7dc2148b9..fbc48a0e884 100644
--- a/src/main/webapp/filesFragment.xhtml
+++ b/src/main/webapp/filesFragment.xhtml
@@ -520,7 +520,9 @@
             </div>
             <div class="label label-info remote-info" jsf:rendered="#{not empty fileMetadata.dataFile.storageIO.remoteStoreName}" 
               title="#{bundle['file.remotelyStored']}">
-              <a jsf:rendered="#{not empty fileMetadata.dataFile.storageIO.remoteStoreUrl}" href="#{fileMetadata.dataFile.storageIO.remoteStoreUrl.toString()}" target="_blank" rel="noopener">#{fileMetadata.dataFile.storageIO.remoteStoreName}</a>
+              <div jsf:rendered="#{not empty fileMetadata.dataFile.storageIO.remoteStoreUrl}">
+                  <a href="#{fileMetadata.dataFile.storageIO.remoteStoreUrl.toString()}" target="_blank" rel="noopener">#{fileMetadata.dataFile.storageIO.remoteStoreName}</a>
+              </div>
               <span jsf:rendered="#{empty fileMetadata.dataFile.storageIO.remoteStoreUrl}">#{fileMetadata.dataFile.storageIO.remoteStoreName}</span>
             </div>
             <!-- END: FILE LEVEL MSGs -->

From 4ad06ba1af38cf84f5b639a605eecaf95a4fe8b1 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 30 Nov 2023 15:54:20 -0500
Subject: [PATCH 280/546] rename previewshavefailed to previewimagefail #9506

This matches previewimageavailable, also in dvobject.

Plus it's clear we aren't talking about shaving. :)
---
 .../edu/harvard/iq/dataverse/DataFileServiceBean.java  |  2 +-
 .../iq/dataverse/DatasetVersionServiceBean.java        |  4 ++--
 src/main/java/edu/harvard/iq/dataverse/DvObject.java   | 10 +++++-----
 src/main/java/edu/harvard/iq/dataverse/api/Admin.java  |  2 +-
 .../iq/dataverse/dataaccess/ImageThumbConverter.java   |  4 ++--
 .../migration/V6.0.0.5__9506-track-thumb-failures.sql  |  2 +-
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index fae95f12a0c..446c66e5a8b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -987,7 +987,7 @@ public boolean isThumbnailAvailable (DataFile file) {
             this.save(file);
             return true;
         }
-        file.setPreviewsHaveFailed(true);
+        file.setPreviewImageFail(true);
         this.save(file);
         return false;
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
index d209f7d9e26..1ee517c9831 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
@@ -825,7 +825,7 @@ public Long getThumbnailByVersionId(Long versionId) {
                         + "AND df.id = o.id "
                         + "AND fm.datasetversion_id = dv.id "
                         + "AND fm.datafile_id = df.id "
-                        + "AND o.previewshavefailed = false "
+                        + "AND o.previewimagefail = false "
                         + "AND df.restricted = false "
                         + "AND df.embargo_id is null "
                         + "AND df.contenttype LIKE 'image/%' "
@@ -859,7 +859,7 @@ public Long getThumbnailByVersionId(Long versionId) {
                         + "AND df.id = o.id "
                         + "AND fm.datasetversion_id = dv.id "
                         + "AND fm.datafile_id = df.id "
-                        + "AND o.previewshavefailed = false "
+                        + "AND o.previewimagefail = false "
                         + "AND df.restricted = false "
                         + "AND df.embargo_id is null "
                         + "AND df.contenttype = 'application/pdf' "
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
index 12f0b63b3a1..c6d4a73bfd9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -191,14 +191,14 @@ public void setPreviewImageAvailable(boolean status) {
      * real failure in generating the thumbnail. In both cases, we won't want to try
      * again every time the preview/thumbnail is requested for a view.
      */
-    private boolean previewsHaveFailed;
+    private boolean previewImageFail;
 
-    public boolean isPreviewsHaveFailed() {
-        return previewsHaveFailed;
+    public boolean isPreviewImageFail() {
+        return previewImageFail;
     }
 
-    public void setPreviewsHaveFailed(boolean previewsHaveFailed) {
-        this.previewsHaveFailed = previewsHaveFailed;
+    public void setPreviewImageFail(boolean previewImageFail) {
+        this.previewImageFail = previewImageFail;
     }
     
     public Timestamp getModificationTime() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
index 2c2f49a0444..b1d31f8d44b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
@@ -2429,7 +2429,7 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur
     @DELETE
     @Path("/clearThumbnailFailureFlag")
     public Response clearThumbnailFailureFlag() {
-        em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE").executeUpdate();
+        em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE").executeUpdate();
         return ok("Thumnail Failure Flags cleared.");
     }
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index febf659b71a..2de37174a3b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -119,9 +119,9 @@ private static boolean isThumbnailAvailable(StorageIO<DataFile> storageIO, int s
     }
 
     private static boolean generateThumbnail(DataFile file, StorageIO<DataFile> storageIO, int size) {
-        logger.log(Level.FINE, (file.isPreviewsHaveFailed() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId());
+        logger.log(Level.FINE, (file.isPreviewImageFail() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId());
         // Don't try to generate if there have been failures:
-        if (!file.isPreviewsHaveFailed()) {
+        if (!file.isPreviewImageFail()) {
             boolean thumbnailGenerated = false;
             if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) {
                 thumbnailGenerated = generateImageThumbnail(storageIO, size);
diff --git a/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql
index 9b12d27db91..156960d2011 100644
--- a/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql
+++ b/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql
@@ -1 +1 @@
-ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewshavefailed BOOLEAN DEFAULT FALSE;
\ No newline at end of file
+ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewimagefail BOOLEAN DEFAULT FALSE;

From 7148158dec36576c33c1cbc96143128769dd938a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 30 Nov 2023 15:56:43 -0500
Subject: [PATCH 281/546] add tests #9506

---
 .../java/edu/harvard/iq/dataverse/api/AdminIT.java | 10 ++++++++++
 .../java/edu/harvard/iq/dataverse/api/UtilIT.java  | 14 +++++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java
index 0c5de662e8a..c29c8619d8c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java
@@ -818,6 +818,16 @@ public void testLoadMetadataBlock_ErrorHandling() {
           message
         );
     }
+    @Test
+    public void testClearThumbnailFailureFlag(){
+        Response nonExistentFile = UtilIT.clearThumbnailFailureFlag(Long.MAX_VALUE);
+        nonExistentFile.prettyPrint();
+        nonExistentFile.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());
+        
+        Response clearAllFlags = UtilIT.clearThumbnailFailureFlags();
+        clearAllFlags.prettyPrint();
+        clearAllFlags.then().assertThat().statusCode(OK.getStatusCode());
+    }
     
     @Test
     public void testBannerMessages(){
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 9b264086c27..58edbae18e0 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -223,7 +223,19 @@ public static Response validateDataFileHashValue(String fileId,  String apiToken
                 .post("/api/admin/validateDataFileHashValue/" + fileId + "?key=" + apiToken);
         return response;
     }
-    
+
+    public static Response clearThumbnailFailureFlags() {
+        Response response = given()
+                .delete("/api/admin/clearThumbnailFailureFlag");
+        return response;
+    }
+
+    public static Response clearThumbnailFailureFlag(long fileId) {
+        Response response = given()
+                .delete("/api/admin/clearThumbnailFailureFlag/" + fileId);
+        return response;
+    }
+
     private static String getAuthenticatedUserAsJsonString(String persistentUserId, String firstName, String lastName, String authenticationProviderId, String identifier) {
         JsonObjectBuilder builder = Json.createObjectBuilder();
         builder.add("authenticationProviderId", authenticationProviderId);

From 67502ca2326b0536077ad96eb0fe497ca70f37f6 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 30 Nov 2023 15:58:18 -0500
Subject: [PATCH 282/546] fix typos #9506

---
 src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
index b1d31f8d44b..1445db81e4c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
@@ -2430,7 +2430,7 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur
     @Path("/clearThumbnailFailureFlag")
     public Response clearThumbnailFailureFlag() {
         em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE").executeUpdate();
-        return ok("Thumnail Failure Flags cleared.");
+        return ok("Thumbnail Failure Flags cleared.");
     }
     
     @DELETE
@@ -2441,7 +2441,7 @@ public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String file
             Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE where id = ?");
             deleteQuery.setParameter(1, df.getId());
             deleteQuery.executeUpdate();
-            return ok("Thumnail Failure Flag cleared for file id=: " + df.getId() + ".");
+            return ok("Thumbnail Failure Flag cleared for file id=: " + df.getId() + ".");
         } catch (WrappedResponse r) {
             logger.info("Could not find file with the id: " + fileId);
             return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId);

From 82f0bc0eef833388b3e20bf48fe8bb46163640ee Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 30 Nov 2023 15:59:05 -0500
Subject: [PATCH 283/546] one more rename to previewimagefail #9506

This should have been part of 4ad06ba1a.
---
 src/main/java/edu/harvard/iq/dataverse/api/Admin.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
index 1445db81e4c..4cb0521d218 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
@@ -2438,7 +2438,7 @@ public Response clearThumbnailFailureFlag() {
     public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String fileId) {
         try {
             DataFile df = findDataFileOrDie(fileId);
-            Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewshavefailed = FALSE where id = ?");
+            Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE where id = ?");
             deleteQuery.setParameter(1, df.getId());
             deleteQuery.executeUpdate();
             return ok("Thumbnail Failure Flag cleared for file id=: " + df.getId() + ".");

From de2f9a4f6beaad2e34249616dd39748c29e15701 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 30 Nov 2023 16:37:35 -0500
Subject: [PATCH 284/546] popup separate tab for single file download transfer

---
 .../iq/dataverse/FileDownloadServiceBean.java | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
index 7a03f1a35dc..ca3f5b4bded 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
@@ -20,6 +20,8 @@
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.JsfHelper;
 import edu.harvard.iq.dataverse.util.StringUtil;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
+
 import java.io.IOException;
 import java.sql.Timestamp;
 import java.util.ArrayList;
@@ -310,13 +312,19 @@ private void redirectToCustomZipDownloadService(String customZipServiceUrl, Stri
         }
     }
 
-    private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, Long fileMetadataId) {
-        String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, fileMetadataId);
-        logger.fine("Redirecting to file download url: " + fileDownloadUrl);
-        try {
-            FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl);
-        } catch (IOException ex) {
-            logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex);
+    private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten,
+            Long fileMetadataId) {
+        String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten,
+                fileMetadataId);
+        if (downloadType.equals("GlobusTransfer")) {
+            PrimeFaces.current().executeScript(URLTokenUtil.getScriptForUrl(fileDownloadUrl));
+        } else {
+            logger.fine("Redirecting to file download url: " + fileDownloadUrl);
+            try {
+                FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl);
+            } catch (IOException ex) {
+                logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex);
+            }
         }
     }
     

From c82064ace53bcbf5e8b04a24f916fa333f863c9c Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 30 Nov 2023 16:38:17 -0500
Subject: [PATCH 285/546] fix old label in popup required case

---
 src/main/webapp/file-download-button-fragment.xhtml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml
index 8ef2af40431..318aab1454e 100644
--- a/src/main/webapp/file-download-button-fragment.xhtml
+++ b/src/main/webapp/file-download-button-fragment.xhtml
@@ -80,7 +80,7 @@
                          <f:actionListener binding="#{bean.setTermsGuestbookPopupAction(bundle.download)}"/>
                 <f:setPropertyActionListener target="#{fileMetadataForAction}" value="#{fileMetadata}" />
                 <!-- guest book or terms of use, etc. enabled - open "download popup" first: -->
-                GT: #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType}
+                <span class="globus-btn ui-icon" title="#{bundle['file.globus.transfer']}"/> #{bundle['file.globus.of']} #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType}
             </p:commandLink>
         </li>
         

From 2644faee02f7001e51d19e474e3ca5b1b1264302 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 30 Nov 2023 18:03:29 -0500
Subject: [PATCH 286/546] Rearranges the code that updates the Storage Use
 records to reflect the size of the saved content. #8549

---
 .../dataverse/ingest/IngestServiceBean.java   | 120 +++++++++++-------
 1 file changed, 76 insertions(+), 44 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 9b3ddd228e9..5efb4c06f48 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -177,7 +177,7 @@ public class IngestServiceBean {
     // It must be called before we attempt to permanently save the files in 
     // the database by calling the Save command on the dataset and/or version.
     
-    // There is way too much going on in this method. :(
+    // !! There is way too much going on in this method. :( !!
     
     // @todo: Is this method a good candidate for turning into a dedicated Command? 
     public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
@@ -195,6 +195,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
             // renamed FOOBAR-1.txt back to FOOBAR.txt...
             IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles, fileToReplace);
             Dataset dataset = version.getDataset();
+            long totalBytesSaved = 0L;
 
             if (systemConfig.isStorageQuotasEnforced()) {
                 // Check if this dataset is subject to any storage quotas:
@@ -205,6 +206,9 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                 boolean unattached = false;
                 boolean savedSuccess = false;
                 if (dataFile.getOwner() == null) {
+                    // is it ever "unattached"? 
+                    // do we ever call this method with dataFile.getOwner() != null? 
+                    // - we really shouldn't be, either. 
                     unattached = true;
                     dataFile.setOwner(dataset);
                 }
@@ -230,31 +234,38 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                         dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation);
 
                         logger.fine("Successfully created a new storageIO object.");
-                        /*
-						 * This commented-out code demonstrates how to copy bytes from a local
-						 * InputStream (or a readChannel) into the writable byte channel of a Dataverse
-						 * DataAccessIO object:
+                        /**
+                         * This commented-out code demonstrates how to copy
+                         * bytes from a local InputStream (or a readChannel)
+                         * into the writable byte channel of a Dataverse
+                         * DataAccessIO object:
                          */
 
- /*
-						 * storageIO.open(DataAccessOption.WRITE_ACCESS);
-						 * 
-						 * writeChannel = storageIO.getWriteChannel(); readChannel = new
-						 * FileInputStream(tempLocationPath.toFile()).getChannel();
-						 * 
-						 * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while (
-						 * start < readChannel.size() ) { readChannel.transferTo(start,
-						 * bytesPerIteration, writeChannel); start += bytesPerIteration; }
+                        /**
+                         * storageIO.open(DataAccessOption.WRITE_ACCESS);
+                         *
+                         * writeChannel = storageIO.getWriteChannel();
+                         * readChannel = new
+                         * FileInputStream(tempLocationPath.toFile()).getChannel();
+                         *
+                         * long bytesPerIteration = 16 * 1024; // 16K bytes long
+                         * start = 0; 
+                         * while ( start < readChannel.size() ) {
+                         *    readChannel.transferTo(start, bytesPerIteration, writeChannel); start += bytesPerIteration;
+                         * }
                          */
 
- /*
-						 * But it's easier to use this convenience method from the DataAccessIO:
-						 * 
-						 * (if the underlying storage method for this file is local filesystem, the
-						 * DataAccessIO will simply copy the file using Files.copy, like this:
-						 * 
-						 * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(),
-						 * StandardCopyOption.REPLACE_EXISTING);
+                        /**
+                         * But it's easier to use this convenience method from
+                         * the DataAccessIO:
+                         *
+                         * (if the underlying storage method for this file is
+                         * local filesystem, the DataAccessIO will simply copy
+                         * the file using Files.copy, like this:
+                         *
+                         * Files.copy(tempLocationPath,
+                         * storageIO.getFileSystemLocation(),
+                         * StandardCopyOption.REPLACE_EXISTING);
                          */
                         dataAccess.savePath(tempLocationPath);
 
@@ -265,7 +276,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                         savedSuccess = true;
                         logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel());
 
-                        // TODO: reformat this file to remove the many tabs added in cc08330
+                        // TODO: reformat this file to remove the many tabs added in cc08330 - done, I think?
                         extractMetadataNcml(dataFile, tempLocationPath);
 
                     } catch (IOException ioex) {
@@ -375,6 +386,15 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
 
                 if (savedSuccess) {
                     if (uploadSessionQuota != null) {
+                        // It may be worth considering refreshing the quota here, 
+                        // and incrementing the Storage Use record for 
+                        // all the parent objects in real time, as 
+                        // *each* individual file is being saved. I experimented
+                        // with that, but decided against it for performance 
+                        // reasons. But yes, there may be some edge case where 
+                        // parallel multi-file uploads can end up being able 
+                        // to save 2X worth the quota that was available at the 
+                        // beginning of each session. 
                         if (confirmedFileSize > uploadSessionQuota.getRemainingQuotaInBytes()) {
                             savedSuccess = false;
                             logger.warning("file size over quota limit, skipping");
@@ -382,7 +402,6 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                             // this (potentially partial) failure to the user.  
                             //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(confirmedFileSize), bytesToHumanReadable(storageQuotaLimit)));
                         } else {
-
                             // Adjust quota: 
                             logger.info("Setting total usage in bytes to " + (uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize));
                             uploadSessionQuota.setTotalUsageInBytes(uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize);
@@ -390,19 +409,12 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                     }
 
                     // ... unless we had to reject the file just now because of 
-                    // the quota limits, increment the storage use record(s):
+                    // the quota limits, count the number of bytes saved for the 
+                    // purposes of incrementing the total storage of the parent
+                    // DvObjectContainers:
                     
                     if (savedSuccess) {
-                        // Update storage use for all the parent dvobjects: 
-                        // @todo: Do we want to do this after after *each* file is saved? - there may be 
-                        // quite a few files being saved here all at once. We could alternatively
-                        // perform this update only once, after this loop is completed (are there any
-                        // risks/accuracy loss?)
-                        // This update is performed with a direct native query that 
-                        // is supposed to be quite fast. But still. 
-                        logger.info("Incrementing recorded storage use by " + confirmedFileSize + " bytes for dataset " + dataset.getId());
-                        // (@todo: need to consider what happens when this code is called on Create?)
-                        storageUseService.incrementStorageSizeRecursively(dataset.getId(), confirmedFileSize);
+                        totalBytesSaved += confirmedFileSize; 
                     }
                 }
 
@@ -425,12 +437,14 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                     boolean metadataExtracted = false;
                     boolean metadataExtractedFromNetcdf = false;
                     if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) {
-                        /*
-						 * Note that we don't try to ingest the file right away - instead we mark it as
-						 * "scheduled for ingest", then at the end of the save process it will be queued
-						 * for async. ingest in the background. In the meantime, the file will be
-						 * ingested as a regular, non-tabular file, and appear as such to the user,
-						 * until the ingest job is finished with the Ingest Service.
+                        /**
+                         * Note that we don't try to ingest the file right away
+                         * - instead we mark it as "scheduled for ingest", then
+                         * at the end of the save process it will be queued for
+                         * async. ingest in the background. In the meantime, the
+                         * file will be ingested as a regular, non-tabular file,
+                         * and appear as such to the user, until the ingest job
+                         * is finished with the Ingest Service.
                          */
                         dataFile.SetIngestScheduled();
                     } else if (fileMetadataExtractable(dataFile)) {
@@ -488,6 +502,10 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                     // dataset.getGlobalId());
                     // Make sure the file is attached to the dataset and to the version, if this
                     // hasn't been done yet:
+                    // @todo: but shouldn't we be doing the reverse if we haven't been 
+                    // able to save the file? - disconnect it from the dataset and 
+                    // the version?? - L.A. 2023 
+                    // (that said, is there *ever* a case where dataFile.getOwner() != null ?)
                     if (dataFile.getOwner() == null) {
                         dataFile.setOwner(dataset);
 
@@ -503,8 +521,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                                 DataFileCategory dataFileCategory = dfcIt.next();
 
                                 if (dataFileCategory.getDataset() == null) {
-                                    DataFileCategory newCategory = dataset
-                                            .getCategoryByName(dataFileCategory.getName());
+                                    DataFileCategory newCategory = dataset.getCategoryByName(dataFileCategory.getName());
                                     if (newCategory != null) {
                                         newCategory.addFileMetadata(dataFile.getFileMetadata());
                                         // dataFileCategory = newCategory;
@@ -516,10 +533,25 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                             }
                         }
                     }
+                    
+                    // Hmm. Noticing that the following two things - adding the 
+                    // files to the return list were being 
+                    // done outside of this "if (savedSuccess)" block. I'm pretty
+                    // sure that was wrong. - L.A. 11-30-2023
+                    ret.add(dataFile);
+                    // (unless that is that return value isn't used for anything - ?)
                 }
 
-                ret.add(dataFile);
             }
+            // Update storage use for all the parent dvobjects: 
+            logger.info("Incrementing recorded storage use by " + totalBytesSaved + " bytes for dataset " + dataset.getId());
+            // Q. Need to consider what happens when this code is called on Create?
+            // A. It works on create as well, yes. (the recursive increment
+            // query in the method below does need the parent dataset to 
+            // have the database id. But even if these files have been
+            // uploaded on the Create form, we first save the dataset, and 
+            // then add the files to it. - L.A. 
+            storageUseService.incrementStorageSizeRecursively(dataset.getId(), totalBytesSaved);
         }
 
         return ret;

From dc567848bdfcc9647d0779c01bb57f93ab593d89 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 30 Nov 2023 19:10:43 -0500
Subject: [PATCH 287/546] making the set/delete quota commands superuser-only
 (doh). #8549

---
 .../impl/DeleteCollectionQuotaCommand.java       | 13 ++++++++++++-
 .../command/impl/SetCollectionQuotaCommand.java  | 16 +++++++++++++---
 src/main/java/propertyFiles/Bundle.properties    |  1 +
 .../edu/harvard/iq/dataverse/api/FilesIT.java    |  3 +++
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
index 5fcbad929a9..bdeb9c6e8cb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
@@ -6,20 +6,25 @@
 
 import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
 import edu.harvard.iq.dataverse.engine.command.CommandContext;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
 import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
 import edu.harvard.iq.dataverse.storageuse.StorageQuota;
+import edu.harvard.iq.dataverse.util.BundleUtil;
 import java.util.logging.Logger;
 
 /**
  *
  * @author landreev
+ *
+ * A superuser-only command:
  */
-@RequiredPermissions(Permission.ManageDataversePermissions)
+@RequiredPermissions({})
 public class DeleteCollectionQuotaCommand  extends AbstractVoidCommand {
 
     private static final Logger logger = Logger.getLogger(DeleteCollectionQuotaCommand.class.getCanonicalName());
@@ -33,6 +38,12 @@ public DeleteCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target)
         
     @Override
     public void executeImpl(CommandContext ctxt) throws CommandException {
+        // first check if  user is a superuser
+        if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) {      
+            throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"),
+                this,  null, targetDataverse);                
+        }
+        
         if (targetDataverse == null) {
             throw new IllegalCommandException("", this);
         }
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
index a134cbefdb9..6b0d1bf313a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
@@ -6,6 +6,7 @@
 
 import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
 import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
 import edu.harvard.iq.dataverse.engine.command.CommandContext;
@@ -13,14 +14,18 @@
 import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
 import edu.harvard.iq.dataverse.storageuse.StorageQuota;
+import edu.harvard.iq.dataverse.util.BundleUtil;
 import java.util.logging.Logger;
 
 /**
  *
  * @author landreev
+ *
+ * A superuser-only command:
  */
-@RequiredPermissions(Permission.ManageDataversePermissions)
+@RequiredPermissions({})
 public class SetCollectionQuotaCommand  extends AbstractVoidCommand {
 
     private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName());
@@ -36,13 +41,18 @@ public SetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target, Lo
         
     @Override
     public void executeImpl(CommandContext ctxt) throws CommandException {
+        // Check if user is a superuser:
+        if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) {      
+            throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"),
+                this,  null, dataverse);                
+        }
         
         if (dataverse == null) {
-            throw new IllegalCommandException("", this);
+            throw new IllegalCommandException("Must specify valid collection", this);
         }
         
         if (allocation == null) {
-            throw new IllegalCommandException("", this);
+            throw new IllegalCommandException("Must specify valid allocation in bytes", this);
         }
         
         StorageQuota storageQuota = dataverse.getStorageQuota();
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 748b674a4e1..5033426175c 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -925,6 +925,7 @@ dataverse.storage.quota.allocation=Total quota allocation for this collection: {
 dataverse.storage.quota.notdefined=No quota defined for this collection
 dataverse.storage.quota.updated=Storage quota successfully set for the collection
 dataverse.storage.quota.deleted=Storage quota successfully disabled for the collection
+dataverse.storage.quota.superusersonly=Only superusers can change storage quotas.
 dataverse.storage.use=Total recorded size of the files stored in this collection (user-uploaded files plus the versions in the archival tab-delimited format when applicable): {0} bytes
 dataverse.datasize.ioerror=Fatal IO error while trying to determine the total size of the files stored in the dataverse. Please report this error to the Dataverse administrator.
 dataverse.inherited=(inherited from enclosing Dataverse)
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index e391e17d8d5..915f82a6de2 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -2375,6 +2375,9 @@ public void testCollectionStorageQuotas() {
         Response createUser = UtilIT.createRandomUser();
         createUser.then().assertThat().statusCode(OK.getStatusCode());
         String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+        String username = UtilIT.getUsernameFromResponse(createUser);
+        Response makeSuperUser = UtilIT.makeSuperUser(username);
+        assertEquals(200, makeSuperUser.getStatusCode());
 
         Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
         createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode());

From f4eee659021dfaab4dfa9c13e761b7c1875281c5 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 30 Nov 2023 19:18:15 -0500
Subject: [PATCH 288/546] removing the license template stubs (#8549)

---
 .../engine/command/impl/DeleteCollectionQuotaCommand.java   | 5 -----
 .../engine/command/impl/GetCollectionStorageUseCommand.java | 4 ----
 .../engine/command/impl/SetCollectionQuotaCommand.java      | 6 ------
 .../edu/harvard/iq/dataverse/storageuse/StorageQuota.java   | 4 ----
 .../edu/harvard/iq/dataverse/storageuse/StorageUse.java     | 4 ----
 .../iq/dataverse/storageuse/StorageUseServiceBean.java      | 4 ----
 .../iq/dataverse/storageuse/UploadSessionQuotaLimit.java    | 4 ----
 7 files changed, 31 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
index bdeb9c6e8cb..4015228366b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
@@ -1,11 +1,6 @@
-/*
- * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
- * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
- */
 package edu.harvard.iq.dataverse.engine.command.impl;
 
 import edu.harvard.iq.dataverse.Dataverse;
-import edu.harvard.iq.dataverse.authorization.Permission;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
 import edu.harvard.iq.dataverse.engine.command.CommandContext;
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java
index 40b3128b80d..c30a5a34a81 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java
@@ -1,7 +1,3 @@
-/*
- * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
- * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
- */
 package edu.harvard.iq.dataverse.engine.command.impl;
 
 import edu.harvard.iq.dataverse.Dataverse;
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
index 6b0d1bf313a..cf8fb6fd42e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
@@ -1,13 +1,7 @@
-/*
- * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
- * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
- */
 package edu.harvard.iq.dataverse.engine.command.impl;
 
 import edu.harvard.iq.dataverse.Dataverse;
-import edu.harvard.iq.dataverse.authorization.Permission;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
-import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
 import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
 import edu.harvard.iq.dataverse.engine.command.CommandContext;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java
index 0cfebe4167a..d00f7041e61 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java
@@ -1,7 +1,3 @@
-/*
- * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
- * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
- */
 package edu.harvard.iq.dataverse.storageuse;
 
 import edu.harvard.iq.dataverse.DvObject;
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
index 11a2a8b706c..240fba1037d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
@@ -1,7 +1,3 @@
-/*
- * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
- * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
- */
 package edu.harvard.iq.dataverse.storageuse;
 
 import edu.harvard.iq.dataverse.DvObject;
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
index e92ba43e950..b542a7cd661 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
@@ -1,7 +1,3 @@
-/*
- * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
- * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
- */
 package edu.harvard.iq.dataverse.storageuse;
 
 import edu.harvard.iq.dataverse.DvObjectContainer;
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java
index 06bbe986f70..f7dac52e886 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java
@@ -1,7 +1,3 @@
-/*
- * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license
- * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template
- */
 package edu.harvard.iq.dataverse.storageuse;
 
 /**

From 538921061604e4daacd864f8ec3865d6d0642561 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 1 Dec 2023 14:21:35 +0000
Subject: [PATCH 289/546] Stash: working on new canDownloadAtLeastOneFile
 Datasets API endpoint

---
 .../iq/dataverse/PermissionServiceBean.java   |  8 ++++++
 .../harvard/iq/dataverse/api/Datasets.java    | 14 +++++++++++
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 25 +++++++++++++++++++
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  6 +++++
 4 files changed, 53 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index a1de33a764e..9e6628617ce 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -837,4 +837,12 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio
         return false;
     }
 
+    public boolean canDownloadAtLeastOneFile(User requestUser, DatasetVersion datasetVersion) {
+        for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) {
+            if (userOn(requestUser, fileMetadata.getDataFile()).has(Permission.DownloadFile)) {
+                return true;
+            }
+        }
+        return false;
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index af6059cf882..a9cfefc33d8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -4134,4 +4134,18 @@ public Response getUserPermissionsOnDataset(@Context ContainerRequestContext crc
         jsonObjectBuilder.add("canDeleteDatasetDraft", permissionService.userOn(requestUser, dataset).has(Permission.DeleteDatasetDraft));
         return ok(jsonObjectBuilder);
     }
+
+    @GET
+    @AuthRequired
+    @Path("{id}/versions/{versionId}/canDownloadAtLeastOneFile")
+    public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext crc,
+                                                 @PathParam("id") String datasetId,
+                                                 @PathParam("versionId") String versionId,
+                                                 @Context UriInfo uriInfo,
+                                                 @Context HttpHeaders headers) {
+        return response(req -> {
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false);
+            return ok(permissionService.canDownloadAtLeastOneFile(getRequestUser(crc), datasetVersion));
+        }, getRequestUser(crc));
+    }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index d20f1e8a58b..945b741a94b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -4121,4 +4121,29 @@ public void testGetUserPermissionsOnDataset() {
         Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getUserPermissionsOnDataset("testInvalidId", apiToken);
         getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());
     }
+
+    @Test
+    public void testGetCanDownloadAtLeastOneFile() {
+        Response createUser = UtilIT.createRandomUser();
+        createUser.then().assertThat().statusCode(OK.getStatusCode());
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+
+        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode());
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
+        createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode());
+        int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
+
+        // Call with valid dataset id
+        Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, apiToken);
+        canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
+        boolean canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
+        assertTrue(canDownloadAtLeastOneFile);
+
+        // Call with invalid dataset id
+        Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, apiToken);
+        getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());
+    }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 9b264086c27..bf43733788a 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -3442,6 +3442,12 @@ static Response getUserPermissionsOnDataset(String datasetId, String apiToken) {
                 .get("/api/datasets/" + datasetId + "/userPermissions");
     }
 
+    static Response getCanDownloadAtLeastOneFile(String datasetId, String versionId, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .get("/api/datasets/" + datasetId + "/versions/" + versionId + "/canDownloadAtLeastOneFile");
+    }
+
     static Response createFileEmbargo(Integer datasetId, Integer fileId, String dateAvailable, String apiToken) {
         JsonObjectBuilder jsonBuilder = Json.createObjectBuilder();
         jsonBuilder.add("dateAvailable", dateAvailable);

From 8ec0984a663e4daa5b60049c1ee8d51004ca452c Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 1 Dec 2023 09:26:39 -0500
Subject: [PATCH 290/546] add page on Jenkins #10101

---
 doc/sphinx-guides/source/qa/index.md   |  1 +
 doc/sphinx-guides/source/qa/jenkins.md | 44 ++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 doc/sphinx-guides/source/qa/jenkins.md

diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md
index 08deb7ee27d..6027f07574f 100644
--- a/doc/sphinx-guides/source/qa/index.md
+++ b/doc/sphinx-guides/source/qa/index.md
@@ -7,4 +7,5 @@ performance-tests.md
 manual-testing.md
 test-automation.md
 other-approaches.md
+jenkins.md
 ```
diff --git a/doc/sphinx-guides/source/qa/jenkins.md b/doc/sphinx-guides/source/qa/jenkins.md
new file mode 100644
index 00000000000..dbfec0d60d0
--- /dev/null
+++ b/doc/sphinx-guides/source/qa/jenkins.md
@@ -0,0 +1,44 @@
+# Jenkins
+
+```{contents} Contents:
+:local: 
+:depth: 3
+```
+
+## Introduction
+
+Jenkins is our primary tool for knowing if our API tests are passing. (Unit tests are executed locally by developers.)
+
+You can find our Jenkins installation at <https://jenkins.dataverse.org>.
+
+Please note that while it has been open to the public in the past, it is currently firewalled off. We can poke a hole in the firewall for your IP address if necessary. Please get in touch. (You might also be interested in <https://github.com/IQSS/dataverse/issues/9916> which is about restoring the ability of contributors to see if their pull requests are passing API tests or not.)
+
+## Jobs
+
+Jenkins is organized into jobs. We'll highlight a few.
+
+### IQSS-dataverse-develop
+
+<https://jenkins.dataverse.org/job/IQSS-dataverse-develop>, which we will refer to as the "develop" job runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it!
+
+You can get to this job from the README at <https://github.com/IQSS/dataverse>.
+
+### IQSS-Dataverse-Develop-PR
+
+<https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/> can be thought of as "PR jobs". It's a collection of jobs run on pull requests. Typically, you will navigate directly into the job (and it's particular build number) from a pull request. For example, from <https://github.com/IQSS/dataverse/pull/10044>, look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like <https://jenkins.dataverse.org/blue/organizations/jenkins/IQSS-Dataverse-Develop-PR/detail/PR-10044/10/pipeline> (build #10).
+
+### guides.dataverse.org
+
+<https://jenkins.dataverse.org/job/guides.dataverse.org/> is what we use to build guides. See {doc}`/developers/making-releases` in the Developer Guide.
+
+## Checking if API Tests are Passing
+
+If API tests are failing, you should not merge the pull request.
+
+How can you know if API tests are passing? Here are the steps, by way of example.
+
+- From the pull request, navigate to the build. For example from <https://github.com/IQSS/dataverse/pull/10044>, look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like <https://jenkins.dataverse.org/blue/organizations/jenkins/IQSS-Dataverse-Develop-PR/detail/PR-10044/10/pipeline> (build #10).
+- You are now on the new "blue" interface for Jenkins. Click the button in the header called "go to classic" which should take you to (for example) <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10044/10/>.
+- Click "Test Result".
+- Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run.
+- Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error.

From f48f3a84a72b212d66a4bae1c1056e31dc8f7e52 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 1 Dec 2023 14:50:40 +0000
Subject: [PATCH 291/546] Fixed: DatasetVersionFilesServiceBean order by
 condition for type criteria

---
 .../DatasetVersionFilesServiceBean.java       | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
index 78fd896c897..99c3c65e3b8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
@@ -260,22 +260,27 @@ private Predicate createSearchCriteriaPredicate(DatasetVersion datasetVersion,
         return criteriaBuilder.and(predicates.toArray(new Predicate[]{}));
     }
 
-    private Order createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder,
-                                              FileOrderCriteria orderCriteria,
-                                              Root<FileMetadata> fileMetadataRoot) {
+    private List<Order> createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder,
+                                                    FileOrderCriteria orderCriteria,
+                                                    Root<FileMetadata> fileMetadataRoot) {
         Path<Object> label = fileMetadataRoot.get("label");
         Path<Object> dataFile = fileMetadataRoot.get("dataFile");
         Path<Timestamp> publicationDate = dataFile.get("publicationDate");
         Path<Timestamp> createDate = dataFile.get("createDate");
         Expression<Object> orderByLifetimeExpression = criteriaBuilder.selectCase().when(publicationDate.isNotNull(), publicationDate).otherwise(createDate);
-        return switch (orderCriteria) {
-            case NameZA -> criteriaBuilder.desc(label);
-            case Newest -> criteriaBuilder.desc(orderByLifetimeExpression);
-            case Oldest -> criteriaBuilder.asc(orderByLifetimeExpression);
-            case Size -> criteriaBuilder.asc(dataFile.get("filesize"));
-            case Type -> criteriaBuilder.asc(dataFile.get("contentType"));
-            default -> criteriaBuilder.asc(label);
-        };
+        List<Order> orderList = new ArrayList<>();
+        switch (orderCriteria) {
+            case NameZA -> orderList.add(criteriaBuilder.desc(label));
+            case Newest -> orderList.add(criteriaBuilder.desc(orderByLifetimeExpression));
+            case Oldest -> orderList.add(criteriaBuilder.asc(orderByLifetimeExpression));
+            case Size -> orderList.add(criteriaBuilder.asc(dataFile.get("filesize")));
+            case Type -> {
+                orderList.add(criteriaBuilder.asc(dataFile.get("contentType")));
+                orderList.add(criteriaBuilder.asc(label));
+            }
+            default -> orderList.add(criteriaBuilder.asc(label));
+        }
+        return orderList;
     }
 
     private long getOriginalTabularFilesSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) {

From a142ac82e7315370755f11245c38f388f7580b12 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 1 Dec 2023 12:51:55 -0500
Subject: [PATCH 292/546] Adds description about the "go to classic" button

---
 doc/sphinx-guides/source/qa/jenkins.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/qa/jenkins.md b/doc/sphinx-guides/source/qa/jenkins.md
index dbfec0d60d0..a4ca4d8688f 100644
--- a/doc/sphinx-guides/source/qa/jenkins.md
+++ b/doc/sphinx-guides/source/qa/jenkins.md
@@ -38,7 +38,7 @@ If API tests are failing, you should not merge the pull request.
 How can you know if API tests are passing? Here are the steps, by way of example.
 
 - From the pull request, navigate to the build. For example from <https://github.com/IQSS/dataverse/pull/10044>, look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like <https://jenkins.dataverse.org/blue/organizations/jenkins/IQSS-Dataverse-Develop-PR/detail/PR-10044/10/pipeline> (build #10).
-- You are now on the new "blue" interface for Jenkins. Click the button in the header called "go to classic" which should take you to (for example) <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10044/10/>.
+- You are now on the new "blue" interface for Jenkins. Click the button with an arrow on the right side of the header called "go to classic" which should take you to (for example) <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10044/10/>.
 - Click "Test Result".
 - Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run.
 - Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error.

From a29942bf4c8c78d7dee34d61fbb73f44b8ec699e Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:08:26 -0500
Subject: [PATCH 293/546] add files not accessible by dataverse flag

---
 .../dataaccess/AbstractRemoteOverlayAccessIO.java        | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
index 9de6bf69832..16defc26a4f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
@@ -48,6 +48,11 @@ public abstract class AbstractRemoteOverlayAccessIO<T extends DvObject> extends
     static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes";
     protected static final String REMOTE_STORE_NAME = "remote-store-name";
     protected static final String REMOTE_STORE_URL = "remote-store-url";
+    
+    // Whether Dataverse can access the file bytes
+    //Currently True for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits
+    static final String FILES_NOT_ACCESSIBLE_BY_DATAVERSE = "files-not-accessible-by-dataverse";
+
     protected StorageIO<DvObject> baseStore = null;
     protected String path = null;
     protected PoolingHttpClientConnectionManager cm = null;
@@ -329,6 +334,10 @@ protected String getStoragePath() throws IOException {
         logger.fine("fullStoragePath: " + fullStoragePath);
         return fullStoragePath;
     }
+    
+    public static boolean isNotDataverseAccessible(String storeId) {
+        return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(storeId, FILES_NOT_ACCESSIBLE_BY_DATAVERSE));
+    }
 
 
 

From 0d758398b64521e65c0d0d90d963aeb7b01af42d Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:09:03 -0500
Subject: [PATCH 294/546] add Globus store to the normal file upload (as for
 the remote store)

---
 .../java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
index 4a4d3f57f83..a1bcbe49327 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
@@ -263,7 +263,8 @@ public static <T extends DvObject> StorageIO<T> createNewStorageIO(T dvObject, S
         	storageIO = new S3AccessIO<>(dvObject, null, storageDriverId);
         	break;
         case REMOTE:
-            storageIO = createNewStorageIO(dvObject, storageTag, RemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ;
+        case GLOBUS:
+            storageIO = createNewStorageIO(dvObject, storageTag, AbstractRemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ;
             break;
         default:
         	logger.warning("Could not find storage driver for: " + storageTag);

From ce8bb6e97ff776777b642ceafb3c1fb7bae6129f Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:10:28 -0500
Subject: [PATCH 295/546] add Globus as a download option in file table header

requires changes to startGlobusTransfer in separate commit
---
 src/main/webapp/dataset.xhtml       |  2 +-
 src/main/webapp/filesFragment.xhtml | 21 ++++++++++++++++-----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index 2f76197e508..0b8983a7770 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -230,7 +230,7 @@
                                                             </ui:fragment>
                                                             <ui:fragment rendered="#{settingsWrapper.globusDownload and settingsWrapper.isGlobusEnabledStorageDriver(DatasetPage.dataset.effectiveStorageDriverId)}">
                                                                 <li>
-                                                                    <h:commandLink styleClass="btn-download" action="#{DatasetPage.startGlobusTransfer()}">
+                                                                    <h:commandLink styleClass="btn-download" action="#{DatasetPage.startGlobusTransfer(true)}">
                                                                         <h:outputText value="Globus Transfer"/>
                                                                     </h:commandLink>
                                                                 </li>
diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml
index fbc48a0e884..3d28e3170f7 100644
--- a/src/main/webapp/filesFragment.xhtml
+++ b/src/main/webapp/filesFragment.xhtml
@@ -436,7 +436,7 @@
                 <div jsf:id="downloadButtonBlockNormal" class="btn-group" 
                      jsf:rendered="#{(!(empty DatasetPage.workingVersion.fileMetadatas) 
                                      and DatasetPage.workingVersion.fileMetadatas.size() > 1) and DatasetPage.downloadButtonAvailable
-                                     and !DatasetPage.isVersionHasTabular()}">
+                                     and  !(DatasetPage.isVersionHasTabular()||DatasetPage.isVersionHasGlobus())}">
                     <p:commandLink 
                                    styleClass="btn btn-default btn-download"
                                    disabled="#{false and DatasetPage.lockedFromDownload}"
@@ -448,15 +448,15 @@
                         <span class="glyphicon glyphicon-download-alt"/> #{bundle.download}
                     </p:commandLink>
                 </div>
-                <div jsf:id="downloadButtonBlockTabular" class="btn-group" 
+                <div jsf:id="downloadButtonBlockMultiple" class="btn-group" 
                      jsf:rendered="#{(!(empty DatasetPage.workingVersion.fileMetadatas) 
                                      and DatasetPage.workingVersion.fileMetadatas.size() > 1) and DatasetPage.downloadButtonAvailable
-                                     and DatasetPage.isVersionHasTabular()}">
+                                     and (DatasetPage.isVersionHasTabular()||DatasetPage.isVersionHasGlobus())}">
                     <button type="button" class="btn btn-default btn-download dropdown-toggle" data-toggle="dropdown">  
                         <span class="glyphicon glyphicon-download-alt"/> #{bundle.download} <span class="caret"></span>
                     </button>
                     <ul jsf:id="downloadDropdownOptions" class="dropdown-menu multi-level pull-right text-left">
-                        <li >
+                        <li>
                             <p:commandLink
                                            disabled="#{false and DatasetPage.lockedFromDownload}"
                                            update="@form" oncomplete="showPopup();"
@@ -467,7 +467,7 @@
                                 #{bundle.downloadOriginal}
                             </p:commandLink>
                         </li>
-                        <li> 
+                        <li jsf:rendered="#{DatasetPage.isVersionHasTabular()}">
                             <p:commandLink
                                            disabled="#{false and DatasetPage.lockedFromDownload}"
                                            update="@form" oncomplete="showPopup();"
@@ -478,6 +478,17 @@
                                 #{bundle.downloadArchival}
                             </p:commandLink>
                         </li>
+                        <li  jsf:rendered="#{DatasetPage.isVersionHasGlobus()}"> 
+                            <p:commandLink
+                                           disabled="#{false and DatasetPage.lockedFromDownload}"
+                                           update="@form" oncomplete="showPopup();"
+                                           onclick="if (!testFilesSelected()) return false;" 
+                                           actionListener="#{DatasetPage.startGlobusTransfer(false)}">
+                            <f:actionListener binding="#{DatasetPage.setTermsGuestbookPopupAction(bundle.download)}"/>
+                            <f:setPropertyActionListener target="#{DatasetPage.fileMetadataForAction}" value="#{null}"/>
+                                #{bundle['file.globus.transfer']}
+                            </p:commandLink>
+                        </li>
                     </ul>
                 </div>
 

From 8e75a3e2f501b3f0e09fbc9cba9041c52f769737 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:11:56 -0500
Subject: [PATCH 296/546] Add logic for Globus transfer of some files

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 112 +++++++++++++-----
 1 file changed, 81 insertions(+), 31 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index a663b8588ad..0b0d0a2e4f5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -11,6 +11,9 @@
 import edu.harvard.iq.dataverse.authorization.users.User;
 import edu.harvard.iq.dataverse.branding.BrandingUtil;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO;
+import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
 import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
 import edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO;
 import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil;
@@ -361,6 +364,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) {
      * other boolean. 
      */
     private boolean versionHasTabular = false;
+    private boolean versionHasGlobus = false;
 
     private boolean showIngestSuccess;
     
@@ -2183,10 +2187,19 @@ private String init(boolean initFull) {
         // the total "originals" size of the dataset with direct custom queries; 
         // then we'll be able to drop the lookup hint for DataTable from the 
         // findDeep() method for the version and further speed up the lookup 
-        // a little bit. 
+        // a little bit.
+        boolean globusDownloadEnabled = systemConfig.isGlobusDownload();
         for (FileMetadata fmd : workingVersion.getFileMetadatas()) {
-            if (fmd.getDataFile().isTabularData()) {
+            DataFile df = fmd.getDataFile();
+            if (df.isTabularData()) {
                 versionHasTabular = true;
+            }
+            if(globusDownloadEnabled) {
+                if(GlobusAccessibleStore.isGlobusAccessible(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) {
+                    versionHasGlobus= true;
+                }
+            }
+            if(versionHasTabular &&(!globusDownloadEnabled || versionHasGlobus)) {
                 break;
             }
         }
@@ -2483,6 +2496,10 @@ private DefaultTreeNode createFileTreeNode(FileMetadata fileMetadata, TreeNode p
     public boolean isVersionHasTabular() {
         return versionHasTabular;
     }
+    
+    public boolean isVersionHasGlobus() {
+        return versionHasGlobus;
+    }
 
     public boolean isReadOnly() {
         return readOnly;
@@ -3089,6 +3106,16 @@ public void setSelectedNonDownloadableFiles(List<FileMetadata> selectedNonDownlo
         this.selectedNonDownloadableFiles = selectedNonDownloadableFiles;
     }
 
+    private List<FileMetadata> selectedGlobusTransferableFiles;
+
+    public List<FileMetadata> getSelectedGlobusTransferableFiles() {
+        return selectedGlobusTransferableFiles;
+    }
+
+    public void setSelectedGlobusTransferableFiles(List<FileMetadata> selectedGlobusTransferableFiles) {
+        this.selectedGlobusTransferableFiles = selectedGlobusTransferableFiles;
+    }
+    
     public String getSizeOfDataset() {
         return DatasetUtil.getDownloadSize(workingVersion, false);
     }
@@ -3247,8 +3274,8 @@ public boolean validateFilesForDownload(boolean downloadOriginal){
             }
         }
 
-        //if there are two or more files with a total size
-        //over the zip limit post a "too large" popup
+        //if there are two or more files, with a total size
+        //over the zip limit, post a "too large" popup
         if (bytes > settingsWrapper.getZipDownloadLimit() && selectedDownloadableFiles.size() > 1) {
             setValidateFilesOutcome("FailSize");
             return false;
@@ -3257,16 +3284,17 @@ public boolean validateFilesForDownload(boolean downloadOriginal){
         // If some of the files were restricted and we had to drop them off the
         // list, and NONE of the files are left on the downloadable list
         // - we show them a "you're out of luck" popup:
-        if (getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) {
+        if (getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) {
             setValidateFilesOutcome("FailRestricted");
             return false;
         }
 
-        if (!getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) {
+        if (!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty())
+                && !getSelectedNonDownloadableFiles().isEmpty()) {
             setValidateFilesOutcome("Mixed");
             return true;
         }
-
+        //ToDo - should Mixed not trigger this?
         if (isTermsPopupRequired() || isGuestbookPopupRequiredAtDownload()) {
             setValidateFilesOutcome("GuestbookRequired");
         }
@@ -3302,12 +3330,25 @@ private boolean filterSelectedFiles(){
         setSelectedNonDownloadableFiles(new ArrayList<>());
         setSelectedRestrictedFiles(new ArrayList<>());
         setSelectedUnrestrictedFiles(new ArrayList<>());
+        setSelectedGlobusTransferableFiles(new ArrayList<>());
 
         boolean someFiles = false;
+        boolean globusDownloadEnabled = systemConfig.isGlobusDownload();
         for (FileMetadata fmd : this.selectedFiles){
-            if(this.fileDownloadHelper.canDownloadFile(fmd)){
+            boolean downloadable=this.fileDownloadHelper.canDownloadFile(fmd);
+            
+            boolean globusTransferable = false;
+            if(globusDownloadEnabled) {
+                String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier());
+                globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId);
+                downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId); 
+            }
+            if(downloadable){
                 getSelectedDownloadableFiles().add(fmd);
                 someFiles=true;
+            } else if(globusTransferable) {
+                getSelectedGlobusTransferableFiles().add(fmd);
+                someFiles=true;
             } else {
                 getSelectedNonDownloadableFiles().add(fmd);
             }
@@ -5247,7 +5288,7 @@ public boolean isFileAccessRequestMultiButtonEnabled(){
         }
         return false;
     }
-
+/* These appear to be unused - toDo - delete
     private Boolean downloadButtonAllEnabled = null;
 
     public boolean isDownloadAllButtonEnabled() {
@@ -5276,7 +5317,7 @@ public boolean isDownloadSelectedButtonEnabled(){
         }
         return false;
     }
-
+*/
     public boolean isFileAccessRequestMultiSignUpButtonRequired(){
         if (isSessionUserAuthenticated()){
             return false;
@@ -6277,28 +6318,37 @@ public boolean isHasPublicStore() {
         return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId()));
     }
     
-    public void startGlobusTransfer() {
-        ApiToken apiToken = null;
-        User user = session.getUser();
-        if (user instanceof AuthenticatedUser) {
-            apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
-        } else if (user instanceof PrivateUrlUser) {
-            PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
-            PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
-            apiToken = new ApiToken();
-            apiToken.setTokenString(privUrl.getToken());
-        }
-        if(fileMetadataForAction!=null) {
-            List<FileMetadata> downloadFMList = new ArrayList<FileMetadata>(1);
-            downloadFMList.add(fileMetadataForAction);
-            PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList));
-        } else {
-            if(getSelectedDownloadableFiles()!=null) {
-                PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, getSelectedDownloadableFiles()));
+    public void startGlobusTransfer(boolean transferAll) {
+        if(transferAll) {
+            this.setSelectedFiles(workingVersion.getFileMetadatas());
+        }
+        boolean validated = validateFilesForDownload(true);
+        if (validated) {
+            ApiToken apiToken = null;
+            User user = session.getUser();
+            if (user instanceof AuthenticatedUser) {
+                apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
+            } else if (user instanceof PrivateUrlUser) {
+                PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
+                PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
+                apiToken = new ApiToken();
+                apiToken.setTokenString(privUrl.getToken());
+            }
+            if (fileMetadataForAction != null) {
+                List<FileMetadata> downloadFMList = new ArrayList<FileMetadata>(1);
+                downloadFMList.add(fileMetadataForAction);
+                PrimeFaces.current()
+                        .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList));
             } else {
-                //ToDo: For non-public, need the subset that are downloadable by the user
-                //ToDo: For mixed (some in backing store), need the ones in the globus store
-                PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, workingVersion.getFileMetadatas()));
+                if (getSelectedGlobusTransferableFiles() != null) {
+                    PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken,
+                            getSelectedGlobusTransferableFiles()));
+                } else {
+                    // ToDo: For non-public, need the subset that are downloadable by the user
+                    // ToDo: For mixed (some in backing store), need the ones in the globus store
+                    PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken,
+                            workingVersion.getFileMetadatas()));
+                }
             }
         }
     }

From 0e91e6ae59020991513add7e14e09c69641ee71e Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:12:20 -0500
Subject: [PATCH 297/546] Convenience method to get store id for a file

---
 src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index df0c3e5a019..776d04e98cc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -1797,5 +1797,11 @@ public static boolean isActivelyEmbargoed(List<FileMetadata> fmdList) {
         }
         return false;
     }
+
+
+    public static String getStorageDriver(DataFile dataFile) {
+        String storageIdentifier = dataFile.getStorageIdentifier();
+        return storageIdentifier.substring(0, storageIdentifier.indexOf(DataAccess.SEPARATOR));
+    }
     
 }

From e5bf3001e39bf8362f9025e85cf3f6626baf15d0 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:14:41 -0500
Subject: [PATCH 298/546] skip inaccessible files when doing validatation

---
 .../command/impl/FinalizeDatasetPublicationCommand.java    | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
index 3da087addd9..89cfc732455 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
@@ -32,15 +32,13 @@
 import java.util.logging.Logger;
 import edu.harvard.iq.dataverse.GlobalIdServiceBean;
 import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
+import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.engine.command.Command;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import java.util.ArrayList;
 import java.util.concurrent.Future;
 import org.apache.solr.client.solrj.SolrServerException;
 
-import jakarta.ejb.EJB;
-import jakarta.inject.Inject;
-
 
 /**
  *
@@ -350,7 +348,8 @@ private void validateDataFiles(Dataset dataset, CommandContext ctxt) throws Comm
                     // (the decision was made to validate all the files on every
                     // major release; we can revisit the decision if there's any
                     // indication that this makes publishing take significantly longer.
-                    if (maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) {
+                    String driverId = FileUtil.getStorageDriver(dataFile);
+                    if(StorageIO.isDataverseAccessible(driverId) && maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) {
                         FileUtil.validateDataFileChecksum(dataFile);
                     }
                     else {

From 534c99bb0376aeaa25f2d9d54cbe68a8bfb3b6bc Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:15:23 -0500
Subject: [PATCH 299/546] Convenience method re: store supports globus access

---
 .../iq/dataverse/dataaccess/GlobusAccessibleStore.java     | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
index aad1dab5eab..d827e40e807 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
@@ -58,4 +58,11 @@ public static String getGlobusToken(String storeId) {
         return StorageIO.getConfigParamForDriver(storeId, GLOBUS_TOKEN);
     }
     
+    public static boolean isGlobusAccessible(String storeId) {
+        if(StorageIO.getConfigParamForDriver(storeId, StorageIO.TYPE).equals(DataAccess.GLOBUS)) {
+            return true;
+        }
+        return false;
+    }
+    
 }

From ca1a4f1267b2d52cd38054cca61fbddf6941522b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:16:12 -0500
Subject: [PATCH 300/546] Update to use new isNotDataverseAccessible method in
 getInputStream

---
 .../iq/dataverse/dataaccess/GlobusOverlayAccessIO.java        | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 7ec1e2f9e73..3e72fa85d35 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -232,7 +232,9 @@ public long retrieveSizeFromMedia() {
     
     @Override
     public InputStream getInputStream() throws IOException {
-        if(Boolean.parseBoolean(getConfigParam("endpoint-maps-to-base-store"))) {
+        //Currently only supported when using an S3 store with the Globus S3Connector.
+        //ToDo: Support when using a managed Globus endpoint that supports http access
+        if(!AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(endpoint)) {
             return baseStore.getInputStream();
         } else {
             throw new IOException("Not implemented");

From f39fa0715e81aafefd14c92c50171eb436a45491 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:17:03 -0500
Subject: [PATCH 301/546] Convenience method isDataverseAccessible

---
 .../edu/harvard/iq/dataverse/dataaccess/StorageIO.java     | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
index 14fc9254c59..51cdecf64a0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -57,6 +57,8 @@ public abstract class StorageIO<T extends DvObject> {
     static final String UPLOAD_REDIRECT = "upload-redirect";
     static final String UPLOAD_OUT_OF_BAND = "upload-out-of-band";
     protected static final String DOWNLOAD_REDIRECT = "download-redirect";
+    protected static final String DATAVERSE_INACCESSIBLE = "dataverse-inaccessible";
+
 
     public StorageIO() {
 
@@ -620,6 +622,11 @@ public static boolean isDirectUploadEnabled(String driverId) {
                 || Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_OUT_OF_BAND));
     }
 
+    //True by default, Stores (e.g. RemoteOverlay, Globus) can set this false to stop attempts to read bytes
+    public static boolean isDataverseAccessible(String driverId) {
+        return (true && !Boolean.parseBoolean(getConfigParamForDriver(driverId, DATAVERSE_INACCESSIBLE)));
+    }
+    
     // Check that storageIdentifier is consistent with store's config
     // False will prevent direct uploads
     static boolean isValidIdentifier(String driverId, String storageId) {

From dc4580232dcfe698010cdc4c20fb77c19482484b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 14:18:05 -0500
Subject: [PATCH 302/546] use correct term (though up and down terms are the
 same)

could also fix for native/http, but not for rsync
---
 src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index e40f55fedd8..3c6992f8ec3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -941,7 +941,7 @@ public boolean isHTTPDownload() {
     }
 
     public boolean isGlobusDownload() {
-        return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), false);
+        return getMethodAvailable(FileDownloadMethods.GLOBUS.toString(), false);
     }
     
     public boolean isGlobusFileDownload() {

From 0bfbb10c355ea1ebc24d2d8bee928c50ca22db41 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Fri, 1 Dec 2023 16:59:38 -0500
Subject: [PATCH 303/546] "manage collections" guide entry. #8549

---
 .../source/admin/collectionquotas.rst           | 17 +++++++++++++++++
 doc/sphinx-guides/source/admin/index.rst        |  1 +
 doc/sphinx-guides/source/api/native-api.rst     | 12 +++++++++++-
 .../iq/dataverse/storageuse/StorageUse.java     |  3 +++
 4 files changed, 32 insertions(+), 1 deletion(-)
 create mode 100644 doc/sphinx-guides/source/admin/collectionquotas.rst

diff --git a/doc/sphinx-guides/source/admin/collectionquotas.rst b/doc/sphinx-guides/source/admin/collectionquotas.rst
new file mode 100644
index 00000000000..883b6cf0c93
--- /dev/null
+++ b/doc/sphinx-guides/source/admin/collectionquotas.rst
@@ -0,0 +1,17 @@
+Storage Quotas for Collections
+==============================
+
+Please note that this is a new and still experimental feature (as of Dataverse v6.1 release).
+
+Instance admins can now define storage quota limits for specific collections. These limits can be set, changed and/or deleted via the provided APIs (please see the :ref:`collection-storage-quotas` section of the :doc:`/api/native-api` guide). The Read version of the API is available to the individual collection admins (i.e., a collection owner can check on the quota configured for their collection), but only superusers can set, change or disable storage quotas.
+
+Storage quotas are *inherited* by subcollections. In other words, when storage use limit is set for a specific collection, it applies to all the datasets immediately under it and in its sub-collections, unless different quotas are defined there and so on. Each file added to any dataset in that hierarchy counts for the purposes of the quota limit defined for the top collection. A storage quota defined on a child sub-collection overrides whatever quota that may be defined on the parent, or inherited from an ancestor.
+
+For example, a collection ``A`` has the storage quota set to 10GB. It has 3 sub-collections, ``B``, ``C`` and ``D``. Users can keep uploading files into the datasets anywhere in this hierarchy until the combined size of 10GB is reached between them. However, if an admin has reasons to limit one of the sub-collections, ``B`` to 3GB only, that quota can be explicitly set there. This both limits the growth of ``B`` to 3GB, and also *guarantees* that allocation to it. I.e. the contributors to collection ``B`` will be able to keep adding data until the 3GB limit is reached, even after the parent collection ``A`` reaches the combined 10GB limit (at which point ``A`` and all its subcollections except for ``B`` will become read-only).
+
+We do not yet know whether this is going to be a popular, or needed use case - a child collection quota that is different from the quota it inherits from a parent. It is likely that for many instances it will be sufficient to be able to define quotas for collections and have them apply to all the child objects underneath. We will examine the response to this feature and consider making adjustments to this scheme based on it. We are already considering introducing other types of quotas, such as limits by users or specific storage volumes.  
+
+Please note that only the sizes of the main datafiles and the archival tab-delimited format versions, as produced by the ingest process are counted for the purposes of enforcing the limits. Automatically generated "auxiliary" files, such as rescaled image thumbnails and metadata exports for datasets are not.
+
+When quotas are set and enforced, the users will be informed of the remaining storage allocation on the file upload page together with other upload and processing limits.
+
diff --git a/doc/sphinx-guides/source/admin/index.rst b/doc/sphinx-guides/source/admin/index.rst
index ac81aa737a7..633842044b4 100755
--- a/doc/sphinx-guides/source/admin/index.rst
+++ b/doc/sphinx-guides/source/admin/index.rst
@@ -27,6 +27,7 @@ This guide documents the functionality only available to superusers (such as "da
    solr-search-index
    ip-groups
    mail-groups
+   collectionquotas
    monitoring
    reporting-tools-and-queries
    maintenance
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 234d5f37232..7bd334f6a95 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -763,7 +763,8 @@ Collection Storage Quotas
 
   curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota"
 
-Will output the storage quota allocated (in bytes), or a message indicating that the quota is not defined for the collection.
+Will output the storage quota allocated (in bytes), or a message indicating that the quota is not defined for the specific collection. The user identified by the API token must have the ``Manage`` permission on the collection. 
+
 
 To set or change the storage allocation quota for a collection:
 
@@ -771,13 +772,22 @@ To set or change the storage allocation quota for a collection:
 
   curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota/$SIZE_IN_BYTES"
 
+This is API is superuser-only.
+  
+
 To delete a storage quota configured for a collection:
 
 .. code-block:: 
 
   curl -X DELETE -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/storage/quota"
 
+This is API is superuser-only.
+
+Use the ``/settings`` API to enable or disable the enforcement of storage quotas that are defined across the instance via the following setting. For example,
+
+.. code-block:: 
 
+   curl -X PUT -d 'true' http://localhost:8080/api/admin/settings/:UseStorageQuotas
 
 
 Datasets
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
index 240fba1037d..b777736dc8d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java
@@ -7,10 +7,12 @@
 import jakarta.persistence.GenerationType;
 import jakarta.persistence.GeneratedValue;
 import jakarta.persistence.Id;
+import jakarta.persistence.Index;
 import jakarta.persistence.JoinColumn;
 import jakarta.persistence.NamedQueries;
 import jakarta.persistence.NamedQuery;
 import jakarta.persistence.OneToOne;
+import jakarta.persistence.Table;
 import java.io.Serializable;
 
 /**
@@ -23,6 +25,7 @@
     @NamedQuery(name = "StorageUse.incrementByteSizeByDvContainerId", query = "UPDATE StorageUse su SET su.sizeInBytes = su.sizeInBytes +:fileSize WHERE su.dvObjectContainer.id =:dvObjectId")
 })
 @Entity
+@Table(indexes = {@Index(columnList="dvobjectcontainer_id")})
 public class StorageUse implements Serializable {
 
     private static final long serialVersionUID = 1L;

From 9af23d23d97413338ce2b800697b19970aca3dd5 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 1 Dec 2023 17:23:09 -0500
Subject: [PATCH 304/546] add mixed/other dialogs for transfer case

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 92 ++++++++++++-------
 src/main/java/propertyFiles/Bundle.properties |  6 +-
 src/main/webapp/dataset.xhtml                 | 48 ++++++++--
 src/main/webapp/filesFragment.xhtml           | 10 +-
 4 files changed, 110 insertions(+), 46 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 0b0d0a2e4f5..47a32987b0b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -365,6 +365,7 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) {
      */
     private boolean versionHasTabular = false;
     private boolean versionHasGlobus = false;
+    private boolean globusTransferRequested = false;
 
     private boolean showIngestSuccess;
     
@@ -3116,6 +3117,16 @@ public void setSelectedGlobusTransferableFiles(List<FileMetadata> selectedGlobus
         this.selectedGlobusTransferableFiles = selectedGlobusTransferableFiles;
     }
     
+    private List<FileMetadata> selectedNonGlobusTransferableFiles;
+
+    public List<FileMetadata> getSelectedNonGlobusTransferableFiles() {
+        return selectedNonGlobusTransferableFiles;
+    }
+
+    public void setSelectedNonGlobusTransferableFiles(List<FileMetadata> selectedNonGlobusTransferableFiles) {
+        this.selectedNonGlobusTransferableFiles = selectedNonGlobusTransferableFiles;
+    }
+    
     public String getSizeOfDataset() {
         return DatasetUtil.getDownloadSize(workingVersion, false);
     }
@@ -3227,7 +3238,7 @@ private void startDownload(boolean downloadOriginal){
         boolean guestbookRequired = isDownloadPopupRequired();
         boolean validate = validateFilesForDownload(downloadOriginal);
         if (validate) {
-            updateGuestbookResponse(guestbookRequired, downloadOriginal);
+            updateGuestbookResponse(guestbookRequired, downloadOriginal, false);
             if(!guestbookRequired && !getValidateFilesOutcome().equals("Mixed")){
                 startMultipleFileDownload();
             }
@@ -3289,8 +3300,9 @@ public boolean validateFilesForDownload(boolean downloadOriginal){
             return false;
         }
 
-        if (!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty())
-                && !getSelectedNonDownloadableFiles().isEmpty()) {
+        //Some are selected and there are non-downloadable ones or there are both downloadable and globus transferable files
+        if ((!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty())
+                && (!getSelectedNonDownloadableFiles().isEmpty()) || (!getSelectedDownloadableFiles().isEmpty() && !getSelectedGlobusTransferableFiles().isEmpty()))) {
             setValidateFilesOutcome("Mixed");
             return true;
         }
@@ -3302,7 +3314,7 @@ public boolean validateFilesForDownload(boolean downloadOriginal){
 
     }
 
-    private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal) {
+    private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal, boolean isGlobusTransfer) {
         // Note that the GuestbookResponse object may still have information from
         // the last download action performed by the user. For example, it may
         // still have the non-null Datafile in it, if the user has just downloaded
@@ -3310,7 +3322,11 @@ private void updateGuestbookResponse (boolean guestbookRequired, boolean downloa
         // even if that's not what they are trying to do now.
         // So make sure to reset these values:
         guestbookResponse.setDataFile(null);
-        guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString());
+        if(isGlobusTransfer) {
+            guestbookResponse.setSelectedFileIds(getFilesIdsString(getSelectedGlobusTransferableFiles()));
+        } else {
+            guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString());
+        }
         if (downloadOriginal) {
             guestbookResponse.setFileFormat("original");
         } else {
@@ -3331,6 +3347,7 @@ private boolean filterSelectedFiles(){
         setSelectedRestrictedFiles(new ArrayList<>());
         setSelectedUnrestrictedFiles(new ArrayList<>());
         setSelectedGlobusTransferableFiles(new ArrayList<>());
+        setSelectedNonGlobusTransferableFiles(new ArrayList<>());
 
         boolean someFiles = false;
         boolean globusDownloadEnabled = systemConfig.isGlobusDownload();
@@ -3346,11 +3363,14 @@ private boolean filterSelectedFiles(){
             if(downloadable){
                 getSelectedDownloadableFiles().add(fmd);
                 someFiles=true;
-            } else if(globusTransferable) {
+            } else {
+                getSelectedNonDownloadableFiles().add(fmd);
+            }
+            if(globusTransferable) {
                 getSelectedGlobusTransferableFiles().add(fmd);
                 someFiles=true;
             } else {
-                getSelectedNonDownloadableFiles().add(fmd);
+                getSelectedNonGlobusTransferableFiles().add(fmd);
             }
             if(fmd.isRestricted()){
                 getSelectedRestrictedFiles().add(fmd); //might be downloadable to user or not
@@ -6318,37 +6338,45 @@ public boolean isHasPublicStore() {
         return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId()));
     }
     
-    public void startGlobusTransfer(boolean transferAll) {
-        if(transferAll) {
+    public boolean isGlobusTransferRequested() {
+        return globusTransferRequested;
+    }
+    
+    public void startGlobusTransfer(boolean transferAll, boolean popupShown) {
+        if (transferAll) {
             this.setSelectedFiles(workingVersion.getFileMetadatas());
         }
+        boolean guestbookRequired = isDownloadPopupRequired();
+        
         boolean validated = validateFilesForDownload(true);
         if (validated) {
-            ApiToken apiToken = null;
-            User user = session.getUser();
-            if (user instanceof AuthenticatedUser) {
-                apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
-            } else if (user instanceof PrivateUrlUser) {
-                PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
-                PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
-                apiToken = new ApiToken();
-                apiToken.setTokenString(privUrl.getToken());
-            }
-            if (fileMetadataForAction != null) {
-                List<FileMetadata> downloadFMList = new ArrayList<FileMetadata>(1);
-                downloadFMList.add(fileMetadataForAction);
-                PrimeFaces.current()
-                        .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList));
-            } else {
-                if (getSelectedGlobusTransferableFiles() != null) {
-                    PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken,
-                            getSelectedGlobusTransferableFiles()));
+            globusTransferRequested = true;
+            boolean mixed = "Mixed".equals(getValidateFilesOutcome());
+            // transfer is
+            updateGuestbookResponse(guestbookRequired, true, true);
+            if ((!guestbookRequired && !mixed) || popupShown) {
+                ApiToken apiToken = null;
+                User user = session.getUser();
+                if (user instanceof AuthenticatedUser) {
+                    apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
+                } else if (user instanceof PrivateUrlUser) {
+                    PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
+                    PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
+                    apiToken = new ApiToken();
+                    apiToken.setTokenString(privUrl.getToken());
+                }
+                if (fileMetadataForAction != null) {
+                    List<FileMetadata> downloadFMList = new ArrayList<FileMetadata>(1);
+                    downloadFMList.add(fileMetadataForAction);
+                    PrimeFaces.current()
+                            .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList));
                 } else {
-                    // ToDo: For non-public, need the subset that are downloadable by the user
-                    // ToDo: For mixed (some in backing store), need the ones in the globus store
-                    PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken,
-                            workingVersion.getFileMetadatas()));
+                    if (getSelectedGlobusTransferableFiles() != null) {
+                        PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken,
+                                getSelectedGlobusTransferableFiles()));
+                    }
                 }
+                globusTransferRequested = false;
             }
         }
     }
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 13e3a675a27..65dd020f27b 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -64,6 +64,7 @@ manager=Manager
 curator=Curator
 explore=Explore
 download=Download
+transfer=Globus Transfer
 downloadOriginal=Original Format
 downloadArchival=Archival Format (.tab)
 deaccession=Deaccession
@@ -1391,6 +1392,7 @@ dataset.accessBtn.header.explore=Explore Options
 dataset.accessBtn.header.configure=Configure Options
 dataset.accessBtn.header.compute=Compute Options
 dataset.accessBtn.download.size=ZIP ({0})
+dataset.accessBtn.transfer.size=({0})
 dataset.accessBtn.too.big=The dataset is too large to download. Please select the files you need from the files table.
 dataset.accessBtn.original.too.big=The dataset is too large to download in the original format. Please select the files you need from the files table.
 dataset.accessBtn.archival.too.big=The dataset is too large to download in the archival format. Please select the files you need from the files table.
@@ -1655,8 +1657,10 @@ dataset.inValidSelectedFilesForDownloadWithEmbargo=Embargoed and/or Restricted F
 dataset.noValidSelectedFilesForDownload=The selected file(s) may not be downloaded because you have not been granted access.
 dataset.mixedSelectedFilesForDownload=The restricted file(s) selected may not be downloaded because you have not been granted access.
 dataset.mixedSelectedFilesForDownloadWithEmbargo=The embargoed and/or restricted file(s) selected may not be downloaded because you have not been granted access.
-
+dataset.mixedSelectedFilesForTransfer=Some file(s) cannot be transferred. (They are restricted, embargoed, or not Globus accessible.)
+dataset.inValidSelectedFilesForTransfer=Ineligible Files Selected
 dataset.downloadUnrestricted=Click Continue to download the files you have access to download.
+dataset.transferUnrestricted=Click Continue to transfer the elligible files.
 
 dataset.requestAccessToRestrictedFiles=You may request access to the restricted file(s) by clicking the Request Access button.
 dataset.requestAccessToRestrictedFilesWithEmbargo=Embargoed files cannot be accessed during the embargo period. If your selection contains restricted files, you may request access to them by clicking the Request Access button.
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml
index 0b8983a7770..e50e68ec162 100644
--- a/src/main/webapp/dataset.xhtml
+++ b/src/main/webapp/dataset.xhtml
@@ -178,7 +178,7 @@
                                                                 <!-- NORMAL DOWNLOAD BUTTON (NO TABULAR FILES) -->
                                                                 <li jsf:rendered="#{!DatasetPage.versionHasTabular}">                 
                                                                     <p:commandLink update="@form" actionListener="#{DatasetPage.startDownloadAllOriginal()}" styleClass="btn-download" 
-                                                                                   oncomplete="showPopup();">
+                                                                                   oncomplete="showPopup(false);">
                                                                     #{bundle.download}
                                                                         <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">
                                                                             <f:param value="#{DatasetPage.sizeOfDataset}" />
@@ -192,7 +192,7 @@
                                                                     <!-- DOWNLOAD ORIGINAL BUTTON (TABULAR FILES PRESENT) -->
                                                                     <li jsf:rendered="#{ DatasetPage.sizeOfDatasetOrig != '0 B'  and !(DatasetPage.tooLargeToDownloadOriginal) }">
                                                                         <p:commandLink update="@form" actionListener="#{DatasetPage.startDownloadAllOriginal()}" 
-                                                                                       oncomplete="showPopup();"
+                                                                                       oncomplete="showPopup(false);"
                                                                                        styleClass="btn-download">
                                                                             #{bundle.downloadOriginal}
                                                                             <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">
@@ -208,7 +208,7 @@
                                                                     </ui:fragment>
                                                                     <!-- DOWNLOAD ARCHIVAL FILES (TABULAR FILES PRESENT) -->
                                                                     <li jsf:rendered="#{!DatasetPage.tooLargeToDownloadArchival}">
-                                                                        <p:commandLink update="@form" oncomplete="showPopup();" 
+                                                                        <p:commandLink update="@form" oncomplete="showPopup(false);" 
                                                                                        actionListener="#{DatasetPage.startDownloadAllArchival()}" styleClass="btn-download">
                                                                                 #{bundle.downloadArchival}
                                                                             <h:outputFormat value="#{bundle['dataset.accessBtn.download.size']}">
@@ -230,9 +230,14 @@
                                                             </ui:fragment>
                                                             <ui:fragment rendered="#{settingsWrapper.globusDownload and settingsWrapper.isGlobusEnabledStorageDriver(DatasetPage.dataset.effectiveStorageDriverId)}">
                                                                 <li>
-                                                                    <h:commandLink styleClass="btn-download" action="#{DatasetPage.startGlobusTransfer(true)}">
-                                                                        <h:outputText value="Globus Transfer"/>
-                                                                    </h:commandLink>
+                                                                    <p:commandLink update="@form" oncomplete="showPopup(true);" 
+                                                                                       actionListener="#{DatasetPage.startGlobusTransfer(true, false)}" styleClass="btn-download">
+                                                                                #{bundle.transfer}
+                                                                        <h:outputFormat value="#{bundle['dataset.accessBtn.transfer.size']}">
+                                                                            <f:param value="#{DatasetPage.sizeOfDataset}" />
+                                                                        </h:outputFormat>
+                                                                        <f:actionListener binding="#{DatasetPage.setTermsGuestbookPopupAction(bundle.download)}"/>
+                                                                        </p:commandLink>
                                                                 </li>
                                                             </ui:fragment>
                                                             
@@ -1095,6 +1100,28 @@
                             </button>
                         </div>
                     </p:dialog>
+                    <p:dialog id="globusTransferMixed" styleClass="smallPopUp" header="#{bundle['dataset.inValidSelectedFilesForTransfer']}" widgetVar="globusTransferMixed" modal="true">
+                        <p class="text-danger"><span class="glyphicon glyphicon-exclamation-sign"/> #{bundle['dataset.mixedSelectedFilesForTransfer']}</p>
+                        <table>
+                            <ui:repeat var="resFile" value="#{DatasetPage.selectedNonGlobusTransferableFiles}" >
+                                <tr>
+                                    <td>#{resFile.label}</td>
+                                </tr>
+                            </ui:repeat>
+                        </table>
+                        <div class="button-block">
+                            <p class="help-block">#{bundle['dataset.transferUnrestricted']}</p>
+                            <p:commandButton styleClass="btn btn-default" value="#{bundle.continue}" onclick="PF('globusTransferMixed').hide()" 
+                                             rendered="#{!DatasetPage.guestbookAndTermsPopupRequired}"
+                                             action="#{DatasetPage.startGlobusTransfer(false, true)}"/>
+                            <p:commandButton styleClass="btn btn-default" value="#{bundle.continue}" onclick="PF('globusTransferMixed').hide();"
+                                             rendered="#{DatasetPage.guestbookAndTermsPopupRequired and !settingsWrapper.rsyncDownload}"
+                                             oncomplete="PF('guestbookAndTermsPopup').show();" />
+                            <button class="btn btn-link" onclick="PF('globusTransferMixed').hide();" type="button">
+                                #{bundle.cancel}
+                            </button>
+                        </div>
+                    </p:dialog>
                     <p:dialog id="deleteConfirmation" styleClass="smallPopUp" header="#{bundle['file.deleteDialog.header']}" widgetVar="deleteConfirmation" modal="true">
                         <p class="text-warning"><span class="glyphicon glyphicon-warning-sign"/> #{bundle['file.deleteDialog.tip']}</p>
                         <div class="button-block">
@@ -1545,6 +1572,7 @@
                             <ui:param name="fileDownloadService" value="#{DatasetPage.fileDownloadService}"/>
                             <ui:param name="termsGuestbookPopupAction" value="#{DatasetPage.termsGuestbookPopupAction}"/>
                             <ui:param name="guestbookPopupRequiredAtDownload" value="#{DatasetPage.guestbookPopupRequiredAtDownload}"/>
+                            <ui:param name="isGlobusTransfer" value="#{DatasetPage.globusTransferRequested}"/>
                         </ui:include>
                     </p:dialog>
                     <!-- Preview Guestbook -->
@@ -1911,10 +1939,14 @@
                         $('button[id$="updateOwnerDataverse"]').trigger('click');
                     }
                     
-                    function showPopup() {
+                    function showPopup(isTransfer) {
                         var outcome = document.getElementById("datasetForm:validateFilesOutcome").value;
                         if (outcome ==='Mixed'){
-                            PF('downloadMixed').show();
+                            if(isTransfer) {
+                               PF('globusTransferMixed').show();
+                            } else {
+                                PF('downloadMixed').show();
+                            }
                         }
                         if (outcome ==='FailEmpty'){
                             PF('selectFilesForDownload').show();
diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml
index 3d28e3170f7..58899ab7062 100644
--- a/src/main/webapp/filesFragment.xhtml
+++ b/src/main/webapp/filesFragment.xhtml
@@ -442,7 +442,7 @@
                                    disabled="#{false and DatasetPage.lockedFromDownload}"
                                    onclick="if (!testFilesSelected()) return false;" 
                                    action="#{DatasetPage.startDownloadSelectedOriginal()}"
-                                   update="@form" oncomplete="showPopup();">
+                                   update="@form" oncomplete="showPopup(false);">
                         <f:actionListener binding="#{DatasetPage.setTermsGuestbookPopupAction(bundle.download)}"/>
                         <f:setPropertyActionListener target="#{DatasetPage.fileMetadataForAction}" value="#{null}"/>
                         <span class="glyphicon glyphicon-download-alt"/> #{bundle.download}
@@ -459,7 +459,7 @@
                         <li>
                             <p:commandLink
                                            disabled="#{false and DatasetPage.lockedFromDownload}"
-                                           update="@form" oncomplete="showPopup();"
+                                           update="@form" oncomplete="showPopup(false);"
                                            onclick="if (!testFilesSelected()) return false;" 
                                            actionListener="#{DatasetPage.startDownloadSelectedOriginal()}">
                                 <f:actionListener binding="#{DatasetPage.setTermsGuestbookPopupAction(bundle.download)}"/>
@@ -470,7 +470,7 @@
                         <li jsf:rendered="#{DatasetPage.isVersionHasTabular()}">
                             <p:commandLink
                                            disabled="#{false and DatasetPage.lockedFromDownload}"
-                                           update="@form" oncomplete="showPopup();"
+                                           update="@form" oncomplete="showPopup(false);"
                                            onclick="if (!testFilesSelected()) return false;" 
                                            actionListener="#{DatasetPage.startDownloadSelectedArchival()}">
                             <f:actionListener binding="#{DatasetPage.setTermsGuestbookPopupAction(bundle.download)}"/>
@@ -481,9 +481,9 @@
                         <li  jsf:rendered="#{DatasetPage.isVersionHasGlobus()}"> 
                             <p:commandLink
                                            disabled="#{false and DatasetPage.lockedFromDownload}"
-                                           update="@form" oncomplete="showPopup();"
+                                           update="@form" oncomplete="showPopup(true);"
                                            onclick="if (!testFilesSelected()) return false;" 
-                                           actionListener="#{DatasetPage.startGlobusTransfer(false)}">
+                                           actionListener="#{DatasetPage.startGlobusTransfer(false, false)}">
                             <f:actionListener binding="#{DatasetPage.setTermsGuestbookPopupAction(bundle.download)}"/>
                             <f:setPropertyActionListener target="#{DatasetPage.fileMetadataForAction}" value="#{null}"/>
                                 #{bundle['file.globus.transfer']}

From 43105d31ae3d5357e450da3a98cac6886e18a1d3 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Sat, 2 Dec 2023 13:14:28 -0500
Subject: [PATCH 305/546] refactor, handle guestbook at download case

---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 29 ++-----
 .../iq/dataverse/FileDownloadHelper.java      | 36 ++++----
 .../iq/dataverse/GuestbookResponse.java       |  2 +-
 .../dataverse/api/DownloadInstanceWriter.java |  6 +-
 .../dataverse/globus/GlobusServiceBean.java   | 86 ++++++++++++++++---
 .../guestbook-terms-popup-fragment.xhtml      | 13 ++-
 6 files changed, 115 insertions(+), 57 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 47a32987b0b..830e146fa07 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -3321,7 +3321,11 @@ private void updateGuestbookResponse (boolean guestbookRequired, boolean downloa
         // a single file; or it may still have the format set to "original" -
         // even if that's not what they are trying to do now.
         // So make sure to reset these values:
-        guestbookResponse.setDataFile(null);
+        if(fileMetadataForAction == null) {
+            guestbookResponse.setDataFile(null);
+        } else {
+            guestbookResponse.setDataFile(fileMetadataForAction.getDataFile());
+        }
         if(isGlobusTransfer) {
             guestbookResponse.setSelectedFileIds(getFilesIdsString(getSelectedGlobusTransferableFiles()));
         } else {
@@ -6355,27 +6359,8 @@ public void startGlobusTransfer(boolean transferAll, boolean popupShown) {
             // transfer is
             updateGuestbookResponse(guestbookRequired, true, true);
             if ((!guestbookRequired && !mixed) || popupShown) {
-                ApiToken apiToken = null;
-                User user = session.getUser();
-                if (user instanceof AuthenticatedUser) {
-                    apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
-                } else if (user instanceof PrivateUrlUser) {
-                    PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
-                    PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
-                    apiToken = new ApiToken();
-                    apiToken.setTokenString(privUrl.getToken());
-                }
-                if (fileMetadataForAction != null) {
-                    List<FileMetadata> downloadFMList = new ArrayList<FileMetadata>(1);
-                    downloadFMList.add(fileMetadataForAction);
-                    PrimeFaces.current()
-                            .executeScript(globusService.getGlobusDownloadScript(dataset, apiToken, downloadFMList));
-                } else {
-                    if (getSelectedGlobusTransferableFiles() != null) {
-                        PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken,
-                                getSelectedGlobusTransferableFiles()));
-                    }
-                }
+                boolean doNotSaveGuestbookResponse = workingVersion.isDraft();
+                globusService.writeGuestbookAndStartTransfer(guestbookResponse, doNotSaveGuestbookResponse);
                 globusTransferRequested = false;
             }
         }
diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java
index a6ae7223d9d..4d8100124ec 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java
@@ -9,6 +9,7 @@
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
 import edu.harvard.iq.dataverse.externaltools.ExternalTool;
+import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.JsfHelper;
@@ -53,6 +54,9 @@ public class FileDownloadHelper implements java.io.Serializable {
     
     @EJB
     DataFileServiceBean datafileService;
+    
+    @EJB
+    GlobusServiceBean globusService;
 
     private final Map<Long, Boolean> fileDownloadPermissionMap = new HashMap<>(); // { FileMetadata.id : Boolean } 
 
@@ -60,32 +64,32 @@ public FileDownloadHelper() {
         this.filesForRequestAccess = new ArrayList<>();
     }
 
-    // See also @Size(max = 255) in GuestbookResponse
-     private boolean testResponseLength(String value) {
-        return !(value != null && value.length() > 255);
-     }
-
     // This helper method is called from the Download terms/guestbook/etc. popup,
     // when the user clicks the "ok" button. We use it, instead of calling
     // downloadServiceBean directly, in order to differentiate between single
     // file downloads and multiple (batch) downloads - since both use the same
     // terms/etc. popup.
-    public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse) {
+    public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse, boolean isGlobusTransfer) {
         PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()");
         guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD);
          // Note that this method is only ever called from the file-download-popup -
          // meaning we know for the fact that we DO want to save this
          // guestbookResponse permanently in the database.
-        if (guestbookResponse.getSelectedFileIds() != null) {
-            // this is a batch (multiple file) download.
-            // Although here's a chance that this is not really a batch download - i.e.,
-            // there may only be one file on the file list. But the fileDownloadService
-            // method below will check for that, and will redirect to the single download, if
-            // that's the case. -- L.A.
-            fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse);
-        } else if (guestbookResponse.getDataFile() != null) {
-            // this a single file download:
-            fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse);
+        if(isGlobusTransfer) {
+            globusService.writeGuestbookAndStartTransfer(guestbookResponse, true);
+        } else {
+            if (guestbookResponse.getSelectedFileIds() != null) {
+                // this is a batch (multiple file) download.
+                // Although here's a chance that this is not really a batch download - i.e.,
+                // there may only be one file on the file list. But the fileDownloadService
+                // method below will check for that, and will redirect to the single download,
+                // if
+                // that's the case. -- L.A.
+                fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse);
+            } else if (guestbookResponse.getDataFile() != null) {
+                // this a single file download:
+                fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse);
+            }
         }
      }
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java
index 976f1e084ac..9041ccf887c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java
@@ -99,7 +99,7 @@ public class GuestbookResponse implements Serializable {
      */
     
     public static final String ACCESS_REQUEST = "AccessRequest";
-    static final String DOWNLOAD = "Download";
+    public static final String DOWNLOAD = "Download";
     static final String SUBSET = "Subset";
     static final String EXPLORE = "Explore";
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index cc064976982..bcb8799ec9e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -213,9 +213,9 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         if (di.getConversionParam().equals("format")) {
 
                             if ("GlobusTransfer".equals(di.getConversionParamValue())) {
-                                List<FileMetadata> downloadFMList = new ArrayList<FileMetadata>(1);
-                                downloadFMList.add(dataFile.getFileMetadata());
-                                redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadFMList);
+                                List<DataFile> downloadDFList = new ArrayList<DataFile>(1);
+                                downloadDFList.add(dataFile);
+                                redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadDFList);
                             }
                         }
                     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index d8742fc90d5..0c991424ce9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -50,15 +50,19 @@
 import java.util.stream.IntStream;
 
 import org.apache.commons.codec.binary.StringUtils;
+import org.primefaces.PrimeFaces;
 
 import com.google.gson.Gson;
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.users.ApiToken;
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
 import edu.harvard.iq.dataverse.authorization.users.User;
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.FileUtil;
@@ -73,21 +77,22 @@ public class GlobusServiceBean implements java.io.Serializable {
 
     @EJB
     protected DatasetServiceBean datasetSvc;
-
     @EJB
     protected SettingsServiceBean settingsSvc;
-
     @Inject
     DataverseSession session;
-
     @EJB
     protected AuthenticationServiceBean authSvc;
-
     @EJB
     EjbDataverseEngine commandEngine;
-
     @EJB
     UserNotificationServiceBean userNotificationService;
+    @EJB
+    PrivateUrlServiceBean privateUrlService;
+    @EJB
+    FileDownloadServiceBean fileDownloadService;
+    @EJB
+    DataFileServiceBean dataFileService;
 
     private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName());
     private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss");
@@ -600,7 +605,7 @@ public String getGlobusAppUrlForDataset(Dataset d) {
         return getGlobusAppUrlForDataset(d, true, null);
     }
 
-    public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<FileMetadata> fileMetadataList) {
+    public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<DataFile> dataFiles) {
         String localeCode = session.getLocaleCode();
         ApiToken apiToken = null;
         User user = session.getUser();
@@ -629,10 +634,6 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<FileMeta
                     + "/globusUploadParameters?locale=" + localeCode;
         } else {
             // Download
-            ArrayList<DataFile> dataFiles = new ArrayList<DataFile>(fileMetadataList.size());
-            for (FileMetadata fileMetadata : fileMetadataList) {
-                dataFiles.add(fileMetadata.getDataFile());
-            }
             JsonObject files = getFilesMap(dataFiles, d);
 
             String downloadId = UUID.randomUUID().toString();
@@ -657,7 +658,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<FileMeta
         return finalUrl;
     }
 
-    public JsonObject getFilesMap(ArrayList<DataFile> dataFiles, Dataset d) {
+    public JsonObject getFilesMap(List<DataFile> dataFiles, Dataset d) {
         JsonObjectBuilder filesBuilder = Json.createObjectBuilder();
         for (DataFile df : dataFiles) {
             String storageId = df.getStorageIdentifier();
@@ -675,8 +676,8 @@ public JsonObject getFilesMap(ArrayList<DataFile> dataFiles, Dataset d) {
         return filesBuilder.build();
     }
 
-    public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List<FileMetadata> downloadFMList) {
-        return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadFMList));
+    public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List<DataFile> downloadDFList) {
+        return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList));
 
     }
 
@@ -1460,5 +1461,64 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
 
         return endpoint;
     }
+    
+    // This helper method is called from the Download terms/guestbook/etc. popup,
+    // when the user clicks the "ok" button. We use it, instead of calling
+    // downloadServiceBean directly, in order to differentiate between single
+    // file downloads and multiple (batch) downloads - since both use the same
+    // terms/etc. popup.
+    public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, boolean doNotSaveGuestbookResponse) {
+        PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()");
+        guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD);
+
+        ApiToken apiToken = null;
+        User user = session.getUser();
+        if (user instanceof AuthenticatedUser) {
+            apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) user);
+        } else if (user instanceof PrivateUrlUser) {
+            PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
+            PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
+            apiToken = new ApiToken();
+            apiToken.setTokenString(privUrl.getToken());
+        }
+        
+        DataFile df = guestbookResponse.getDataFile();
+        if (df != null) {
+            logger.info("Single datafile case for writeGuestbookAndStartTransfer");
+            List<DataFile> downloadDFList = new ArrayList<DataFile>(1);
+            downloadDFList.add(df);
+            if (!doNotSaveGuestbookResponse) {
+                fileDownloadService.writeGuestbookResponseRecord(guestbookResponse);
+            }
+            PrimeFaces.current()
+                    .executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList));
+        } else {
+            //Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload
+            List<String> list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(",")));
+            List<DataFile> selectedFiles = new ArrayList<DataFile>();
+            for (String idAsString : list) {
+                try {
+                    Long fileId = Long.parseLong(idAsString);
+                // If we need to create a GuestBookResponse record, we have to 
+                // look up the DataFile object for this file: 
+                if (!doNotSaveGuestbookResponse) {
+                    df = dataFileService.findCheapAndEasy(fileId);
+                    guestbookResponse.setDataFile(df);
+                    fileDownloadService.writeGuestbookResponseRecord(guestbookResponse);
+                    selectedFiles.add(df);
+                }
+                } catch (NumberFormatException nfe) {
+                    logger.warning("A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + idAsString);
+                    return;
+                }
+
+            }
+            if (!selectedFiles.isEmpty()) {
+                //Use dataset from one file - files should all be from the same dataset
+                PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken,
+                        selectedFiles));
+            }
+        }
+     }
 
 }
diff --git a/src/main/webapp/guestbook-terms-popup-fragment.xhtml b/src/main/webapp/guestbook-terms-popup-fragment.xhtml
index 34df0c79390..5948047d845 100644
--- a/src/main/webapp/guestbook-terms-popup-fragment.xhtml
+++ b/src/main/webapp/guestbook-terms-popup-fragment.xhtml
@@ -274,8 +274,17 @@
             <p:commandButton styleClass="btn btn-default" value="#{bundle.acceptTerms}"
                              rendered="#{guestbookResponse.fileFormat != 'externalTool' and
                                          guestbookResponse.fileFormat != 'package' and
-                                         popupContext != 'previewTab'}"
-                             actionListener="#{fileDownloadHelper.writeGuestbookAndStartDownload(guestbookResponse)}"
+                                         popupContext != 'previewTab' and not isGlobusTransfer}"
+                             actionListener="#{fileDownloadHelper.writeGuestbookAndStartDownload(guestbookResponse, false)}"
+                             update="guestbookUIFragment">
+                <f:param name="DO_GB_VALIDATION_#{popupContext}" value="true"/>
+            </p:commandButton>
+            <!--  REGULAR GLOBUS TRANSFER -->
+            <p:commandButton styleClass="btn btn-default" value="#{bundle.acceptTerms}"
+                             rendered="#{guestbookResponse.fileFormat != 'externalTool' and
+                                         guestbookResponse.fileFormat != 'package' and
+                                         popupContext != 'previewTab' and isGlobusTransfer}"
+                             actionListener="#{fileDownloadHelper.writeGuestbookAndStartDownload(guestbookResponse, true)}"
                              update="guestbookUIFragment">
                 <f:param name="DO_GB_VALIDATION_#{popupContext}" value="true"/>
             </p:commandButton>

From a76158f5903ec73a78b284de90d6491a7e05bfce Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Sat, 2 Dec 2023 13:35:33 -0500
Subject: [PATCH 306/546] suppress download entry when not accessible, refactor

---
 .../edu/harvard/iq/dataverse/DatasetPage.java |   2 +-
 .../harvard/iq/dataverse/SettingsWrapper.java |  22 +++
 .../file-download-button-fragment.xhtml       |   6 +-
 .../dataaccess/GlobusOverlayAccessIOTest.java | 176 ++++++++++++++++++
 4 files changed, 202 insertions(+), 4 deletions(-)
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 830e146fa07..704c1d42228 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -3354,7 +3354,7 @@ private boolean filterSelectedFiles(){
         setSelectedNonGlobusTransferableFiles(new ArrayList<>());
 
         boolean someFiles = false;
-        boolean globusDownloadEnabled = systemConfig.isGlobusDownload();
+        boolean globusDownloadEnabled = settingsWrapper.isGlobusDownload();
         for (FileMetadata fmd : this.selectedFiles){
             boolean downloadable=this.fileDownloadHelper.canDownloadFile(fmd);
             
diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
index 8b7f732d03f..8ab1e87aef2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
@@ -6,6 +6,8 @@
 package edu.harvard.iq.dataverse;
 
 import edu.harvard.iq.dataverse.branding.BrandingUtil;
+import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO;
+import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.Setting;
@@ -337,6 +339,26 @@ public boolean isGlobusEnabledStorageDriver(String driverId) {
         return (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId));
     }
     
+    public boolean isDownloadable(FileMetadata fmd) {
+        boolean downloadable=true;
+        if(isGlobusFileDownload()) {
+            String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier());
+            
+            downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId); 
+        }
+        return downloadable;
+    }
+    
+    public boolean isGlobusTransferable(FileMetadata fmd) {
+        boolean globusTransferable=true;
+        if(isGlobusFileDownload()) {
+            String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier());
+            globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId);
+        }
+        return globusTransferable;
+    }
+    
+    
     public String getGlobusAppUrl() {
         if (globusAppUrl == null) {
             globusAppUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost");
diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml
index 318aab1454e..9c29fd777a1 100644
--- a/src/main/webapp/file-download-button-fragment.xhtml
+++ b/src/main/webapp/file-download-button-fragment.xhtml
@@ -60,7 +60,7 @@
         
         <!-- GlobusTransfer, orig file only  -->
          <o:importFunctions type="edu.harvard.iq.dataverse.dataaccess.DataAccess" />
-        <li jsf:rendered="#{settingsWrapper.globusFileDownload and settingsWrapper.isGlobusEnabledStorageDriver(DataAccess:getStorageDriverFromIdentifier(fileMetadata.dataFile.storageIdentifier))}">
+        <li jsf:rendered="#{settingsWrapper.globusFileDownload and settingsWrapper.isGlobusTransferable(fileMetadata)}">
             <p:commandLink rendered="#{!(guestbookAndTermsPopupRequired) 
                                       and !(fileMetadata.dataFile.filePackage)}"
                          styleClass="btn-download"
@@ -112,7 +112,7 @@
                 #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType}
             </p:commandLink>
             <p:commandLink rendered="#{!(fileMetadata.dataFile.tabularData) and !(guestbookAndTermsPopupRequired) 
-                                      and !(fileMetadata.dataFile.filePackage and systemConfig.HTTPDownload)}"
+                                      and !(fileMetadata.dataFile.filePackage and systemConfig.HTTPDownload) and settingsWrapper.isDownloadable(fileMetadata)}"
                          styleClass="btn-download"
                          process="@this"
                          disabled="#{(fileMetadata.dataFile.ingestInProgress or lockedFromDownload) ? 'disabled' : ''}" 
@@ -122,7 +122,7 @@
                 #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType}
             </p:commandLink>
             <p:commandLink rendered="#{!(fileMetadata.dataFile.tabularData) and guestbookAndTermsPopupRequired 
-                                      and !(fileMetadata.dataFile.filePackage and systemConfig.HTTPDownload)}"
+                                      and !(fileMetadata.dataFile.filePackage and systemConfig.HTTPDownload) and settingsWrapper.isDownloadable(fileMetadata)}"
                          styleClass="btn-download"
                          process="@this"
                          disabled="#{(fileMetadata.dataFile.ingestInProgress or lockedFromDownload) ? 'disabled' : ''}" 
diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
new file mode 100644
index 00000000000..bf3bcdbfe8e
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2018 Forschungszentrum Jülich GmbH
+ * SPDX-License-Identifier: Apache 2.0
+ */
+package edu.harvard.iq.dataverse.dataaccess;
+
+import edu.harvard.iq.dataverse.DOIServiceBean;
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DvObject;
+import edu.harvard.iq.dataverse.GlobalId;
+import edu.harvard.iq.dataverse.GlobalIdServiceBean;
+import edu.harvard.iq.dataverse.mocks.MocksFactory;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.util.UrlSignerUtil;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import static org.junit.jupiter.api.Assertions.*;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.mockito.junit.jupiter.MockitoSettings;
+import org.mockito.quality.Strictness;
+import java.io.IOException;
+import java.nio.file.Paths;
+
+@ExtendWith(MockitoExtension.class)
+@MockitoSettings(strictness = Strictness.STRICT_STUBS)
+public class GlobusOverlayAccessIOTest {
+
+    @Mock
+
+    private Dataset dataset;
+    private DataFile datafile;
+    private DataFile localDatafile;
+    private String baseStoreId = "182ad2bda2f-c3508e719076";
+    private String logoPath = "image002.mrc";
+    private String authority = "10.5072";
+    private String identifier = "F2ABCDEF";
+
+    @BeforeEach
+    public void setUp() {
+        System.setProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH,
+                "d8c42580-6528-4605-9ad8-116a61982644/hdc1");
+        System.setProperty("dataverse.files.globus." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS,
+                "d8c42580-6528-4605-9ad8-116a61982644/hdc1");
+
+        System.setProperty("dataverse.files.globus.globus-token",
+                "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9");
+        System.setProperty("dataverse.files.globus.remote-store-name", "GlobusEndpoint1");
+        System.setProperty("dataverse.files.globus.type", "globus");
+
+        System.setProperty("dataverse.files.globus.managed", "true");
+
+        System.setProperty("dataverse.files.globus.base-store", "file");
+        System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+        System.setProperty("dataverse.files.file.directory", "/tmp/files");
+
+        // System.setProperty("dataverse.files.test.type", "remote");
+        System.setProperty("dataverse.files.globus.label", "globusTest");
+        System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org/resources");
+        System.setProperty("dataverse.files.test.base-store", "file");
+        System.setProperty("dataverse.files.test.download-redirect", "true");
+        System.setProperty("dataverse.files.test.remote-store-name", "DemoDataCorp");
+        System.setProperty("dataverse.files.globus.secret-key", "12345"); // Real keys should be much longer, more
+                                                                          // random
+        System.setProperty("dataverse.files.file.type", "file");
+        System.setProperty("dataverse.files.file.label", "default");
+        datafile = MocksFactory.makeDataFile();
+        dataset = MocksFactory.makeDataset();
+        dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/",
+                DOIServiceBean.DOI_RESOLVER_URL, null));
+        datafile.setOwner(dataset);
+        datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath);
+
+        localDatafile = MocksFactory.makeDataFile();
+        localDatafile.setOwner(dataset);
+        localDatafile.setStorageIdentifier("globus://" + baseStoreId);
+    }
+
+    @AfterEach
+    public void tearDown() {
+        System.clearProperty("dataverse.files.test.type");
+        System.clearProperty("dataverse.files.test.label");
+        System.clearProperty("dataverse.files.test.base-url");
+        System.clearProperty("dataverse.files.test.base-store");
+        System.clearProperty("dataverse.files.test.download-redirect");
+        System.clearProperty("dataverse.files.test.label");
+        System.clearProperty("dataverse.files.test.remote-store-name");
+        System.clearProperty("dataverse.files.test.secret-key");
+        System.clearProperty("dataverse.files.file.type");
+        System.clearProperty("dataverse.files.file.label");
+    }
+
+    @Test
+    void testGlobusOverlayFiles() throws IOException {
+        System.clearProperty("dataverse.files.globus.managed");
+        datafile.setStorageIdentifier(
+                "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath);
+        GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>(datafile, null, "globus");
+        System.out.println("Size2 is " + gsio.retrieveSizeFromMedia());
+
+        System.out.println(
+                "NotValid: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//../of/the/hill"));
+        System.out.println(
+                "ValidRemote: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//of/the/hill"));
+        System.setProperty("dataverse.files.globus.managed", "true");
+        datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath);
+        System.out.println("ValidLocal: "
+                + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42"));
+
+        // We can read the storageIdentifier and get the driver
+        assertTrue(datafile.getStorageIdentifier()
+                .startsWith(DataAccess.getStorageDriverFromIdentifier(datafile.getStorageIdentifier())));
+        // We can get the driver type from it's ID
+        assertTrue(DataAccess.getDriverType("globus").equals(System.getProperty("dataverse.files.globus.type")));
+        // When we get a StorageIO for the file, it is the right type
+        StorageIO<DataFile> storageIO = DataAccess.getStorageIO(localDatafile);
+        assertTrue(storageIO instanceof GlobusOverlayAccessIO);
+        // When we use it, we can get properties like the remote store name
+        GlobusOverlayAccessIO<DataFile> globusIO = (GlobusOverlayAccessIO<DataFile>) storageIO;
+        assertTrue(
+                globusIO.getRemoteStoreName().equals(System.getProperty("dataverse.files.globus.remote-store-name")));
+
+        String location = globusIO.getStorageLocation();
+        assertEquals("globus:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + baseStoreId, location);
+/*
+        // TBD:
+        // And can get a temporary download URL for the main file
+        String signedURL = globusIO.generateTemporaryDownloadUrl(null, null, null);
+        System.out.println(signedURL);
+        // And the URL starts with the right stuff
+        assertTrue(signedURL.startsWith(System.getProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH) + "/" + logoPath));
+        // And the signature is valid
+        // assertTrue(
+        // UrlSignerUtil.isValidUrl(signedURL, null, null,
+        // System.getProperty("dataverse.files.globus.secret-key")));
+        // And we get an unsigned URL with the right stuff with no key
+        System.clearProperty("dataverse.files.globus.secret-key");
+        String unsignedURL = globusIO.generateTemporaryDownloadUrl(null, null, null);
+        assertTrue(unsignedURL.equals(System.getProperty("dataverse.files.globus.base-url") + "/" + logoPath));
+*/
+        // Once we've opened, we can get the file size (only works if the call to Globus
+        // works)
+        globusIO.open(DataAccessOption.READ_ACCESS);
+        assertTrue(globusIO.getSize() > 0);
+        // If we ask for the path for an aux file, it is correct
+        System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority,
+                identifier, baseStoreId + ".auxobject").toString());
+        System.out.println(globusIO.getAuxObjectAsPath("auxobject").toString());
+        assertTrue(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, identifier,
+                baseStoreId + ".auxobject").equals(globusIO.getAuxObjectAsPath("auxobject")));
+        IOException thrown = assertThrows(IOException.class, () -> DataAccess.getStorageIO(localDatafile),
+                "Expected getStorageIO() to throw, but it didn't");
+        // 'test' is the driverId in the IOException messages
+        assertTrue(thrown.getMessage().contains("globus"));
+
+    }
+
+    @Test
+    void testRemoteOverlayIdentifierFormats() throws IOException {
+        System.clearProperty("dataverse.files.globus.managed");
+        datafile.setStorageIdentifier(
+                "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath);
+        assertTrue(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier()));
+        assertFalse(
+                DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier().replace("globus", "bad")));
+        assertFalse(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier()));
+        System.setProperty("dataverse.files.globus.managed", "true");
+        assertTrue(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier()));
+
+    }
+
+}

From 93a586727a3c00069699eb47e5ca5ca3ebbf91cf Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Sat, 2 Dec 2023 17:58:45 -0500
Subject: [PATCH 307/546] remove old testing code

---
 .../dataaccess/GlobusOverlayAccessIO.java     |  46 -----
 .../dataaccess/GlobusOverlayAccessIOTest.java | 176 ------------------
 2 files changed, 222 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 3e72fa85d35..e825af8cf30 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -408,52 +408,6 @@ private static String[] getAllowedEndpoints(String driverId) throws IOException
     }
 
 
-    public static void main(String[] args) {
-        System.out.println("Running the main method");
-        if (args.length > 0) {
-            System.out.printf("List of arguments: {}", Arrays.toString(args));
-        }
-        System.setProperty("dataverse.files.globus.base-url", "globus://d8c42580-6528-4605-9ad8-116a61982644");
-        System.out.println("NotValid: " + isValidIdentifier("globus", "globus://localid//../of/the/hill"));
-        System.out.println("ValidRemote: " + isValidIdentifier("globus", "globus://localid//of/the/hill"));
-        System.setProperty("dataverse.files.globus.managed", "true");
-
-        System.out.println("ValidLocal: " + isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42"));
-        System.setProperty("dataverse.files.globus.globus-token",
-                "");
-        System.setProperty("dataverse.files.globus.base-store", "file");
-        System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
-        System.setProperty("dataverse.files.file.directory", "/tmp/files");
-        // logger.info(JvmSettings.BASE_URL.lookup("globus"));
-        // logger.info(JvmSettings.GLOBUS_TOKEN.lookup("globus"));
-
-        try {
-            GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>(
-                    "globus://1234///hdc1/image001.mrc", "globus");
-            logger.info("Size is " + gsio.retrieveSizeFromMedia());
-
-        } catch (IOException e) {
-            // TODO Auto-generated catch block
-            e.printStackTrace();
-        }
-        try {
-            DataFile df = new DataFile();
-            Dataset ds = new Dataset();
-            ds.setAuthority("10.5072");
-            ds.setIdentifier("FK21234");
-            df.setOwner(ds);
-            df.setStorageIdentifier("globus://1234///hdc1/image001.mrc");
-            GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>(df, null, "globus");
-            logger.info("Size2 is " + gsio.retrieveSizeFromMedia());
-
-        } catch (IOException e) {
-            // TODO Auto-generated catch block
-            e.printStackTrace();
-        }
-
-    }
-
-
     @Override
     public void open(DataAccessOption... option) throws IOException {
         // TODO Auto-generated method stub
diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
index bf3bcdbfe8e..e69de29bb2d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
@@ -1,176 +0,0 @@
-/*
- * Copyright 2018 Forschungszentrum Jülich GmbH
- * SPDX-License-Identifier: Apache 2.0
- */
-package edu.harvard.iq.dataverse.dataaccess;
-
-import edu.harvard.iq.dataverse.DOIServiceBean;
-import edu.harvard.iq.dataverse.DataFile;
-import edu.harvard.iq.dataverse.Dataset;
-import edu.harvard.iq.dataverse.DvObject;
-import edu.harvard.iq.dataverse.GlobalId;
-import edu.harvard.iq.dataverse.GlobalIdServiceBean;
-import edu.harvard.iq.dataverse.mocks.MocksFactory;
-import edu.harvard.iq.dataverse.settings.JvmSettings;
-import edu.harvard.iq.dataverse.util.UrlSignerUtil;
-
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.extension.ExtendWith;
-import static org.junit.jupiter.api.Assertions.*;
-import org.mockito.Mock;
-import org.mockito.junit.jupiter.MockitoExtension;
-import org.mockito.junit.jupiter.MockitoSettings;
-import org.mockito.quality.Strictness;
-import java.io.IOException;
-import java.nio.file.Paths;
-
-@ExtendWith(MockitoExtension.class)
-@MockitoSettings(strictness = Strictness.STRICT_STUBS)
-public class GlobusOverlayAccessIOTest {
-
-    @Mock
-
-    private Dataset dataset;
-    private DataFile datafile;
-    private DataFile localDatafile;
-    private String baseStoreId = "182ad2bda2f-c3508e719076";
-    private String logoPath = "image002.mrc";
-    private String authority = "10.5072";
-    private String identifier = "F2ABCDEF";
-
-    @BeforeEach
-    public void setUp() {
-        System.setProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH,
-                "d8c42580-6528-4605-9ad8-116a61982644/hdc1");
-        System.setProperty("dataverse.files.globus." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS,
-                "d8c42580-6528-4605-9ad8-116a61982644/hdc1");
-
-        System.setProperty("dataverse.files.globus.globus-token",
-                "YTVlNzFjNzItYWVkYi00Mzg4LTkzNWQtY2NhM2IyODI2MzdmOnErQXRBeWNEMVM3amFWVnB0RlFnRk5zMTc3OFdDa3lGeVZPT3k0RDFpaXM9");
-        System.setProperty("dataverse.files.globus.remote-store-name", "GlobusEndpoint1");
-        System.setProperty("dataverse.files.globus.type", "globus");
-
-        System.setProperty("dataverse.files.globus.managed", "true");
-
-        System.setProperty("dataverse.files.globus.base-store", "file");
-        System.setProperty("dataverse.files.file.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
-        System.setProperty("dataverse.files.file.directory", "/tmp/files");
-
-        // System.setProperty("dataverse.files.test.type", "remote");
-        System.setProperty("dataverse.files.globus.label", "globusTest");
-        System.setProperty("dataverse.files.test.base-url", "https://demo.dataverse.org/resources");
-        System.setProperty("dataverse.files.test.base-store", "file");
-        System.setProperty("dataverse.files.test.download-redirect", "true");
-        System.setProperty("dataverse.files.test.remote-store-name", "DemoDataCorp");
-        System.setProperty("dataverse.files.globus.secret-key", "12345"); // Real keys should be much longer, more
-                                                                          // random
-        System.setProperty("dataverse.files.file.type", "file");
-        System.setProperty("dataverse.files.file.label", "default");
-        datafile = MocksFactory.makeDataFile();
-        dataset = MocksFactory.makeDataset();
-        dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/",
-                DOIServiceBean.DOI_RESOLVER_URL, null));
-        datafile.setOwner(dataset);
-        datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath);
-
-        localDatafile = MocksFactory.makeDataFile();
-        localDatafile.setOwner(dataset);
-        localDatafile.setStorageIdentifier("globus://" + baseStoreId);
-    }
-
-    @AfterEach
-    public void tearDown() {
-        System.clearProperty("dataverse.files.test.type");
-        System.clearProperty("dataverse.files.test.label");
-        System.clearProperty("dataverse.files.test.base-url");
-        System.clearProperty("dataverse.files.test.base-store");
-        System.clearProperty("dataverse.files.test.download-redirect");
-        System.clearProperty("dataverse.files.test.label");
-        System.clearProperty("dataverse.files.test.remote-store-name");
-        System.clearProperty("dataverse.files.test.secret-key");
-        System.clearProperty("dataverse.files.file.type");
-        System.clearProperty("dataverse.files.file.label");
-    }
-
-    @Test
-    void testGlobusOverlayFiles() throws IOException {
-        System.clearProperty("dataverse.files.globus.managed");
-        datafile.setStorageIdentifier(
-                "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath);
-        GlobusOverlayAccessIO<DvObject> gsio = new GlobusOverlayAccessIO<DvObject>(datafile, null, "globus");
-        System.out.println("Size2 is " + gsio.retrieveSizeFromMedia());
-
-        System.out.println(
-                "NotValid: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//../of/the/hill"));
-        System.out.println(
-                "ValidRemote: " + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://localid//of/the/hill"));
-        System.setProperty("dataverse.files.globus.managed", "true");
-        datafile.setStorageIdentifier("globus://" + baseStoreId + "//" + logoPath);
-        System.out.println("ValidLocal: "
-                + GlobusOverlayAccessIO.isValidIdentifier("globus", "globus://176e28068b0-1c3f80357c42"));
-
-        // We can read the storageIdentifier and get the driver
-        assertTrue(datafile.getStorageIdentifier()
-                .startsWith(DataAccess.getStorageDriverFromIdentifier(datafile.getStorageIdentifier())));
-        // We can get the driver type from it's ID
-        assertTrue(DataAccess.getDriverType("globus").equals(System.getProperty("dataverse.files.globus.type")));
-        // When we get a StorageIO for the file, it is the right type
-        StorageIO<DataFile> storageIO = DataAccess.getStorageIO(localDatafile);
-        assertTrue(storageIO instanceof GlobusOverlayAccessIO);
-        // When we use it, we can get properties like the remote store name
-        GlobusOverlayAccessIO<DataFile> globusIO = (GlobusOverlayAccessIO<DataFile>) storageIO;
-        assertTrue(
-                globusIO.getRemoteStoreName().equals(System.getProperty("dataverse.files.globus.remote-store-name")));
-
-        String location = globusIO.getStorageLocation();
-        assertEquals("globus:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + baseStoreId, location);
-/*
-        // TBD:
-        // And can get a temporary download URL for the main file
-        String signedURL = globusIO.generateTemporaryDownloadUrl(null, null, null);
-        System.out.println(signedURL);
-        // And the URL starts with the right stuff
-        assertTrue(signedURL.startsWith(System.getProperty("dataverse.files.globus." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH) + "/" + logoPath));
-        // And the signature is valid
-        // assertTrue(
-        // UrlSignerUtil.isValidUrl(signedURL, null, null,
-        // System.getProperty("dataverse.files.globus.secret-key")));
-        // And we get an unsigned URL with the right stuff with no key
-        System.clearProperty("dataverse.files.globus.secret-key");
-        String unsignedURL = globusIO.generateTemporaryDownloadUrl(null, null, null);
-        assertTrue(unsignedURL.equals(System.getProperty("dataverse.files.globus.base-url") + "/" + logoPath));
-*/
-        // Once we've opened, we can get the file size (only works if the call to Globus
-        // works)
-        globusIO.open(DataAccessOption.READ_ACCESS);
-        assertTrue(globusIO.getSize() > 0);
-        // If we ask for the path for an aux file, it is correct
-        System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority,
-                identifier, baseStoreId + ".auxobject").toString());
-        System.out.println(globusIO.getAuxObjectAsPath("auxobject").toString());
-        assertTrue(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, identifier,
-                baseStoreId + ".auxobject").equals(globusIO.getAuxObjectAsPath("auxobject")));
-        IOException thrown = assertThrows(IOException.class, () -> DataAccess.getStorageIO(localDatafile),
-                "Expected getStorageIO() to throw, but it didn't");
-        // 'test' is the driverId in the IOException messages
-        assertTrue(thrown.getMessage().contains("globus"));
-
-    }
-
-    @Test
-    void testRemoteOverlayIdentifierFormats() throws IOException {
-        System.clearProperty("dataverse.files.globus.managed");
-        datafile.setStorageIdentifier(
-                "globus://" + baseStoreId + "//d8c42580-6528-4605-9ad8-116a61982644/hdc1/" + logoPath);
-        assertTrue(DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier()));
-        assertFalse(
-                DataAccess.isValidDirectStorageIdentifier(datafile.getStorageIdentifier().replace("globus", "bad")));
-        assertFalse(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier()));
-        System.setProperty("dataverse.files.globus.managed", "true");
-        assertTrue(DataAccess.isValidDirectStorageIdentifier(localDatafile.getStorageIdentifier()));
-
-    }
-
-}

From 1a96c566bccdf32aefeaca89898a3746b146fa08 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Sun, 3 Dec 2023 18:57:59 -0500
Subject: [PATCH 308/546] the kill switch for the real-time storageuse updates
 (just in case) and some related documentation (#8549)

---
 .../source/admin/collectionquotas.rst         |  2 +
 .../source/installation/config.rst            |  5 ++
 .../dataverse/ingest/IngestServiceBean.java   |  2 +-
 .../iq/dataverse/settings/JvmSettings.java    |  3 +
 .../storageuse/StorageUseServiceBean.java     | 58 ++++++-------------
 5 files changed, 30 insertions(+), 40 deletions(-)

diff --git a/doc/sphinx-guides/source/admin/collectionquotas.rst b/doc/sphinx-guides/source/admin/collectionquotas.rst
index 883b6cf0c93..2ce3132e2ba 100644
--- a/doc/sphinx-guides/source/admin/collectionquotas.rst
+++ b/doc/sphinx-guides/source/admin/collectionquotas.rst
@@ -1,3 +1,4 @@
+
 Storage Quotas for Collections
 ==============================
 
@@ -15,3 +16,4 @@ Please note that only the sizes of the main datafiles and the archival tab-delim
 
 When quotas are set and enforced, the users will be informed of the remaining storage allocation on the file upload page together with other upload and processing limits.
 
+Part of the new and experimental nature of this feature is that we don't know for the fact yet how well it will function in real life on a very busy production system, despite our best efforts to test it prior to the release. One specific issue is having to update the recorded storage use for every parent collection of the given dataset whenever new files are added. This includes updating the combined size of the root, top collection - which will need to be updated after *every* file upload. In an unlikely case that this will start causing problems with race conditions and database update conflicts, it is possible to disable these updates (and thus disable the storage quotas feature), by setting the :ref:`dataverse.storageuse.disable-storageuse-increments` JVM setting to true.
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 52ba35376ac..03eeff9dbb6 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -2489,6 +2489,11 @@ This setting was added to keep S3 direct upload lightweight. When that feature i
 
 See also :ref:`s3-direct-upload-features-disabled`.
 
+dataverse.storageuse.disable-storageuse-increments
+++++++++++++++++++++++++++++++++++++++++++++++++++
+
+This setting serves the role of an emergency "kill switch" that will disable maintaining the real time record of storage use for all the datasets and collections in the database. Because of the experimental nature of this feature (see :doc:`/admin/collectionquotas`) that hasn't been used in production setting as of this release, v6.1 this setting is provided in case these updates start causing database race conditions and conflicts on a busy server. 
+
 dataverse.auth.oidc.*
 +++++++++++++++++++++
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 5efb4c06f48..233f746fb17 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -206,7 +206,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
                 boolean unattached = false;
                 boolean savedSuccess = false;
                 if (dataFile.getOwner() == null) {
-                    // is it ever "unattached"? 
+                    // is it ever "attached"? 
                     // do we ever call this method with dataFile.getOwner() != null? 
                     // - we really shouldn't be, either. 
                     unattached = true;
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index cc3272413c7..7c65bba77d5 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -150,6 +150,9 @@ public enum JvmSettings {
     SCOPE_NETCDF(PREFIX, "netcdf"),
     GEO_EXTRACT_S3_DIRECT_UPLOAD(SCOPE_NETCDF, "geo-extract-s3-direct-upload"),
 
+    // STORAGE USE SETTINGS
+    SCOPE_STORAGEUSE(PREFIX, "storageuse"),
+    STORAGEUSE_DISABLE_UPDATES(SCOPE_STORAGEUSE, "disable-storageuse-increments"),
     ;
 
     private static final String SCOPE_SEPARATOR = ".";
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
index b542a7cd661..18e4ef49640 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
@@ -1,12 +1,14 @@
 package edu.harvard.iq.dataverse.storageuse;
 
 import edu.harvard.iq.dataverse.DvObjectContainer;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import jakarta.ejb.Stateless;
 import jakarta.ejb.TransactionAttribute;
 import jakarta.ejb.TransactionAttributeType;
 import jakarta.inject.Named;
 import jakarta.persistence.EntityManager;
 import jakarta.persistence.PersistenceContext;
+import java.util.Optional;
 import java.util.logging.Logger;
 
 /**
@@ -37,31 +39,6 @@ public Long findStorageSizeByDvContainerId(Long dvObjectId) {
         return res == null ? 0L : res;
     }
     
-    public void incrementStorageSizeHierarchy(DvObjectContainer dvObject, Long filesize) {
-        incrementStorageSize(dvObject, filesize); 
-        DvObjectContainer parent = dvObject.getOwner();
-        while (parent != null) {
-            incrementStorageSize(parent, filesize);
-            parent = parent.getOwner();
-        }
-    }
-    
-    /**
-     * @param dvObject
-     * @param filesize 
-     */
-    public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) {
-        StorageUse dvContainerSU = findByDvContainerId(dvObject.getId());
-        if (dvContainerSU != null) {
-            // @todo: named query
-            dvContainerSU.incrementSizeInBytes(filesize);
-            em.merge(dvContainerSU);
-        } else {
-            dvContainerSU = new StorageUse(dvObject, filesize); 
-            em.persist(dvContainerSU);
-        }
-    }
-    
     /**
      * Increments the recorded storage size for all the dvobject parents of a
      * datafile, recursively. 
@@ -71,20 +48,23 @@ public void incrementStorageSize(DvObjectContainer dvObject, Long filesize) {
     @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
     public void incrementStorageSizeRecursively(Long dvObjectContainerId, Long increment) {
         //@todo should throw exceptions if either parameter is null
-        String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n"
-                + "("
-                + "    SELECT id, owner_id\n"
-                + "    FROM dvobject\n"
-                + "    WHERE id=" + dvObjectContainerId + "\n"
-                + "    UNION ALL\n"
-                + "    SELECT dvobject.id, dvobject.owner_id\n"
-                + "    FROM dvobject\n"
-                + "    JOIN uptree ON dvobject.id = uptree.owner_id)\n"
-                + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n"
-                + "FROM uptree\n"
-                + "WHERE dvobjectcontainer_id = uptree.id;";
-        
-        int parentsUpdated = em.createNativeQuery(queryString).executeUpdate();
+        Optional<Boolean> allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class);
+        if (!(allow.isPresent() && allow.get())) {
+            String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n"
+                    + "("
+                    + "    SELECT id, owner_id\n"
+                    + "    FROM dvobject\n"
+                    + "    WHERE id=" + dvObjectContainerId + "\n"
+                    + "    UNION ALL\n"
+                    + "    SELECT dvobject.id, dvobject.owner_id\n"
+                    + "    FROM dvobject\n"
+                    + "    JOIN uptree ON dvobject.id = uptree.owner_id)\n"
+                    + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n"
+                    + "FROM uptree\n"
+                    + "WHERE dvobjectcontainer_id = uptree.id;";
+
+            int parentsUpdated = em.createNativeQuery(queryString).executeUpdate();
+        }
         // @todo throw an exception if the number of parent dvobjects updated by
         // the query is < 2 - ? 
     }

From 0a536da0c42ed9654641985f1fd8dc20b461c16c Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 4 Dec 2023 09:46:59 -0500
Subject: [PATCH 309/546] a missing ref in the doc. #8549

---
 doc/sphinx-guides/source/installation/config.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 03eeff9dbb6..7cb321708a7 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -2489,6 +2489,8 @@ This setting was added to keep S3 direct upload lightweight. When that feature i
 
 See also :ref:`s3-direct-upload-features-disabled`.
 
+.. _dataverse.storageuse.disable-storageuse-increments:
+
 dataverse.storageuse.disable-storageuse-increments
 ++++++++++++++++++++++++++++++++++++++++++++++++++
 

From b20f198368615d7d8c4e798a25d6f68a6d0c4ed9 Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Mon, 4 Dec 2023 11:27:27 -0500
Subject: [PATCH 310/546] Bump version to 6.1

---
 doc/sphinx-guides/source/conf.py      | 4 ++--
 doc/sphinx-guides/source/versions.rst | 3 ++-
 modules/dataverse-parent/pom.xml      | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py
index 0660ec3b071..64efc359e9a 100755
--- a/doc/sphinx-guides/source/conf.py
+++ b/doc/sphinx-guides/source/conf.py
@@ -66,9 +66,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '6.0'
+version = '6.1'
 # The full version, including alpha/beta/rc tags.
-release = '6.0'
+release = '6.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst
index 2000a2097f0..2cf7f46dc5e 100755
--- a/doc/sphinx-guides/source/versions.rst
+++ b/doc/sphinx-guides/source/versions.rst
@@ -7,7 +7,8 @@ Dataverse Software Documentation Versions
 This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases <https://github.com/IQSS/dataverse/releases>`__ page in our GitHub repo.
 
 - pre-release `HTML (not final!) <http://preview.guides.gdcc.io/en/develop/>`__ and `PDF (experimental!) <http://preview.guides.gdcc.io/_/downloads/en/develop/pdf/>`__ built from the :doc:`develop </developers/version-control>` branch :doc:`(how to contribute!) </developers/documentation>`
-- 6.0
+- 6.1
+- `6.0 </en/6.0/>`__
 - `5.14 </en/5.14/>`__
 - `5.13 </en/5.13/>`__
 - `5.12.1 </en/5.12.1/>`__
diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index db0fa46a952..7b305cad581 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -131,7 +131,7 @@
  
     <properties>
         <!-- This is a special Maven property name, do not change! -->
-        <revision>6.0</revision>
+        <revision>6.1</revision>
     
         <target.java.version>17</target.java.version>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

From 5f29144762c166c7856958497e24f629d53c92a0 Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Mon, 4 Dec 2023 12:58:01 -0500
Subject: [PATCH 311/546] adding 6.1 release notes and removing .md files

---
 ...001-datasets-files-api-user-permissions.md |  13 --
 doc/release-notes/10060-api-changelog.md      |   3 -
 .../10093-signedUrl_improvements.md           |   5 -
 .../10104-dataset-citation-deaccessioned.md   |   1 -
 doc/release-notes/6.1-release-notes.md        | 195 ++++++++++++++++++
 .../9268-8349-oidc-improvements.md            |  43 ----
 doc/release-notes/9412-markdown-previewer.md  |   1 -
 doc/release-notes/9428-alternative-title.md   |   9 -
 doc/release-notes/9589-ds-configure-tool.md   |   1 -
 doc/release-notes/9590-intellij-redeploy.md   |   3 -
 .../9599-guestbook-at-request.md              |   2 -
 doc/release-notes/9635-solr-improvements.md   |   4 -
 doc/release-notes/9692-files-api-extension.md |   7 -
 .../9714-files-api-extension-filters.md       |  14 --
 .../9763-versions-api-improvements.md         |   8 -
 .../9785-files-api-extension-search-text.md   |   3 -
 .../9834-files-api-extension-counts.md        |   6 -
 ...oad-extension-new-file-access-endpoints.md |  14 --
 .../9852-files-api-extension-deaccession.md   |  12 --
 .../9880-info-api-zip-limit-embargo.md        |   5 -
 .../9907-files-api-counts-with-criteria.md    |  11 -
 doc/release-notes/9955-Signposting-updates.md |   7 -
 ...et-api-downloadsize-ignore-tabular-size.md |   9 -
 .../9972-files-api-filter-by-tabular-tags.md  |   3 -
 ...with-criteria-and-deaccessioned-support.md |  12 --
 25 files changed, 195 insertions(+), 196 deletions(-)
 delete mode 100644 doc/release-notes/10001-datasets-files-api-user-permissions.md
 delete mode 100644 doc/release-notes/10060-api-changelog.md
 delete mode 100644 doc/release-notes/10093-signedUrl_improvements.md
 delete mode 100644 doc/release-notes/10104-dataset-citation-deaccessioned.md
 create mode 100644 doc/release-notes/6.1-release-notes.md
 delete mode 100644 doc/release-notes/9268-8349-oidc-improvements.md
 delete mode 100644 doc/release-notes/9412-markdown-previewer.md
 delete mode 100644 doc/release-notes/9428-alternative-title.md
 delete mode 100644 doc/release-notes/9589-ds-configure-tool.md
 delete mode 100644 doc/release-notes/9590-intellij-redeploy.md
 delete mode 100644 doc/release-notes/9599-guestbook-at-request.md
 delete mode 100644 doc/release-notes/9635-solr-improvements.md
 delete mode 100644 doc/release-notes/9692-files-api-extension.md
 delete mode 100644 doc/release-notes/9714-files-api-extension-filters.md
 delete mode 100644 doc/release-notes/9763-versions-api-improvements.md
 delete mode 100644 doc/release-notes/9785-files-api-extension-search-text.md
 delete mode 100644 doc/release-notes/9834-files-api-extension-counts.md
 delete mode 100644 doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md
 delete mode 100644 doc/release-notes/9852-files-api-extension-deaccession.md
 delete mode 100644 doc/release-notes/9880-info-api-zip-limit-embargo.md
 delete mode 100644 doc/release-notes/9907-files-api-counts-with-criteria.md
 delete mode 100644 doc/release-notes/9955-Signposting-updates.md
 delete mode 100644 doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md
 delete mode 100644 doc/release-notes/9972-files-api-filter-by-tabular-tags.md
 delete mode 100644 doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md

diff --git a/doc/release-notes/10001-datasets-files-api-user-permissions.md b/doc/release-notes/10001-datasets-files-api-user-permissions.md
deleted file mode 100644
index 0aa75f9218a..00000000000
--- a/doc/release-notes/10001-datasets-files-api-user-permissions.md
+++ /dev/null
@@ -1,13 +0,0 @@
-- New query parameter `includeDeaccessioned` added to the getVersion endpoint (/api/datasets/{id}/versions/{versionId}) to consider deaccessioned versions when searching for versions.
-  
-
-- New endpoint to get user permissions on a dataset (/api/datasets/{id}/userPermissions). In particular, the user permissions that this API call checks, returned as booleans, are the following:
-
-  - Can view the unpublished dataset
-  - Can edit the dataset
-  - Can publish the dataset
-  - Can manage the dataset permissions
-  - Can delete the dataset draft
-
-
-- New permission check "canManageFilePermissions" added to the existing endpoint for getting user permissions on a file (/api/access/datafile/{id}/userPermissions).
\ No newline at end of file
diff --git a/doc/release-notes/10060-api-changelog.md b/doc/release-notes/10060-api-changelog.md
deleted file mode 100644
index 56ac96e3564..00000000000
--- a/doc/release-notes/10060-api-changelog.md
+++ /dev/null
@@ -1,3 +0,0 @@
-We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html
-
-See also #10060.
diff --git a/doc/release-notes/10093-signedUrl_improvements.md b/doc/release-notes/10093-signedUrl_improvements.md
deleted file mode 100644
index 26a17c65e3f..00000000000
--- a/doc/release-notes/10093-signedUrl_improvements.md
+++ /dev/null
@@ -1,5 +0,0 @@
-A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended.
-
-SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093.
-
-Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045.
diff --git a/doc/release-notes/10104-dataset-citation-deaccessioned.md b/doc/release-notes/10104-dataset-citation-deaccessioned.md
deleted file mode 100644
index 0ba06d729c4..00000000000
--- a/doc/release-notes/10104-dataset-citation-deaccessioned.md
+++ /dev/null
@@ -1 +0,0 @@
-The getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation.
diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
new file mode 100644
index 00000000000..c2b52ab34b8
--- /dev/null
+++ b/doc/release-notes/6.1-release-notes.md
@@ -0,0 +1,195 @@
+# Dataverse 6.1
+
+(If this note appears truncated on the GitHub Releases page, you can view it in full in the source tree: https://github.com/IQSS/dataverse/blob/master/doc/release-notes/6.1-release-notes.md)
+
+This release brings new features, enhancements, and bug fixes to the Dataverse software.
+Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project.
+
+## Release Highlights (Major Upgrades, Breaking Changes)
+
+This release contains major upgrades to core components. Detailed upgrade instructions can be found below.
+
+## Detailed Release Highlights, New Features and Use Case Scenarios
+
+### Dataverse installation can be now be configured to allow out-of-band upload
+- Installation can be now be configured to allow out-of-band upload by setting the `dataverse.files.<id>.upload-out-of-band` JVM option to `true`.
+By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
+With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store.
+
+### Alternative Title is made repeatable.
+- One will need to update database with updated citation block.
+  `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv`
+- One will also need to update solr schema:
+ Change in "alternativeTitle" field  multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml`
+ Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"`
+
+Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array.
+For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"]
+
+### Improvements in the /versions API
+- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions
+- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output
+- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files.
+
+This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide.
+### The following API endpoints have been added:
+
+- /api/files/{id}/downloadCount
+- /api/files/{id}/dataTables 
+- /api/files/{id}/metadata/tabularTags New endpoint to set tabular file tags.
+- canManageFilePermissions (/access/datafile/{id}/userPermissions) Added for getting user permissions on a file.
+- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Given a dataset and its version, retrieves file counts based on different criteria (Total count, per content type, per access status and per category name).
+- setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created.
+- userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file.
+- hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version.
+- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version).
+- getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes.
+- getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths.
+
+### Extended the existing endpoints:
+- getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files.
+- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain file counts. Added support for filtering by optional criteria query parameter:
+    - contentType
+    - accessStatus
+    - categoryName
+    - tabularTagName
+    - searchText
+- getDownloadSize ("api/datasets/{identifier}/versions/{versionId}/downloadsize"): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. Added a new optional query parameter "mode" 
+This parameter applies a filter criteria to the operation and supports the following values:
+    - All (Default): Includes both archival and original sizes for tabular files
+    - Archival: Includes only the archival size for tabular files
+    - Original: Includes only the original size for tabular files.
+- /api/datasets/{id}/versions/{versionId} New query parameter `includeDeaccessioned` added to consider deaccessioned versions when searching for versions.
+- /api/datasets/{id}/userPermissions Get user permissions on a dataset, in particular, the user permissions that this API call checks, returned as booleans, are the following:
+    - Can view the unpublished dataset
+    - Can edit the dataset
+    - Can publish the dataset
+    - Can manage the dataset permissions
+    - Can delete the dataset draft
+- getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation.
+
+
+### DataFile API payload has been extended to include the following fields:
+- tabularData: Boolean field to know if the DataFile is of tabular type
+- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner)
+- friendlyType: String
+
+### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering
+- Access status: through the `accessStatus` query parameter, which supports the following values:
+    - Public
+    - Restricted
+    - EmbargoedThenRestricted
+    - EmbargoedThenPublic
+- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added.
+- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png".
+
+
+### Misc
+- Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589.
+
+- Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default).
+The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions.
+
+- Dataverse's OAI_ORE Metadata Export format and archival BagIT exports
+(which include the OAI-ORE metadata export file) have been updated to include
+information about the dataset version state, e.g. RELEASED or DEACCESSIONED
+and to indicate which version of Dataverse was used to create the archival Bag.
+As part of the latter, the current OAI_ORE Metadata format has been given a 1.0.0
+version designation and it is expected that any future changes to the OAI_ORE export
+format will result in a version change and that tools such as DVUploader that can
+recreate datasets from archival Bags will start indicating which version(s) of the
+OAI_ORE format they can read.
+Dataverse installations that have been using archival Bags may wish to update any
+existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse
+[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls)
+to generate updated versions.
+
+- This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec.
+  - To fix #9952, we surround the license info with `<` and `>`.
+  - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information
+  - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now.
+
+- We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html
+See also #10060.
+
+### Solr Improvements
+- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
+
+Please see the "Installing Solr" section of the Installation Prerequisites guide.
+
+
+### Development
+- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin.
+For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools
+
+- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews
+
+- A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended.
+  - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093.
+  - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045.
+
+## OpenID Connect Authentication Provider Improvements
+
+### Using MicroProfile Config For Provisioning
+
+With this release it is possible to provision a single OIDC-based authentication provider
+by using MicroProfile Config instead of or in addition to the classic Admin API provisioning.
+
+If you are using an external OIDC provider component as an identity management system and/or broker
+to other authentication providers such as Google, eduGain SAML and so on, this might make your
+life easier during instance setups and reconfiguration. You no longer need to generate the
+necessary JSON file.
+
+### Adding PKCE Support
+
+Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable
+support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.)
+
+## Improved Testing
+
+With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests
+like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition
+of both OIDC login options in UI and API.
+
+The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21.
+
+The support for setting JVM options during testing has been improved for developers. You now may add the
+`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is
+also paving the way to enable manipulating JVM options during end-to-end tests on remote ends.
+
+As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`.
+
+## New Configuration Options
+
+- dataverse.auth.oidc.enabled
+- dataverse.auth.oidc.client-id
+- dataverse.auth.oidc.client-secret
+- dataverse.auth.oidc.auth-server-url
+- dataverse.auth.oidc.pkce.enabled
+- dataverse.auth.oidc.pkce.method
+- dataverse.auth.oidc.title
+- dataverse.auth.oidc.subtitle
+- dataverse.auth.oidc.pkce.max-cache-size
+- dataverse.auth.oidc.pkce.max-cache-age
+
+## Installation
+
+If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it!
+
+Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club!
+
+You are also very welcome to join the [Global Dataverse Community Consortium](https://dataversecommunity.global) (GDCC).
+
+## Upgrade Instructions
+
+Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc.
+
+These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0.
+
+## Complete List of Changes
+
+For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub.
+
+## Getting Help
+
+For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org.
diff --git a/doc/release-notes/9268-8349-oidc-improvements.md b/doc/release-notes/9268-8349-oidc-improvements.md
deleted file mode 100644
index ddfc13e603c..00000000000
--- a/doc/release-notes/9268-8349-oidc-improvements.md
+++ /dev/null
@@ -1,43 +0,0 @@
-## OpenID Connect Authentication Provider Improvements
-
-### Using MicroProfile Config For Provisioning
-
-With this release it is possible to provision a single OIDC-based authentication provider
-by using MicroProfile Config instead of or in addition to the classic Admin API provisioning.
-
-If you are using an external OIDC provider component as an identity management system and/or broker
-to other authentication providers such as Google, eduGain SAML and so on, this might make your
-life easier during instance setups and reconfiguration. You no longer need to generate the
-necessary JSON file.
-
-### Adding PKCE Support
-
-Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable
-support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.)
-
-## Improved Testing
-
-With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests
-like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition
-of both OIDC login options in UI and API.
-
-The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21.
-
-The support for setting JVM options during testing has been improved for developers. You now may add the
-`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is
-also paving the way to enable manipulating JVM options during end-to-end tests on remote ends.
-
-As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`.
-
-## New Configuration Options
-
-- dataverse.auth.oidc.enabled
-- dataverse.auth.oidc.client-id
-- dataverse.auth.oidc.client-secret
-- dataverse.auth.oidc.auth-server-url
-- dataverse.auth.oidc.pkce.enabled
-- dataverse.auth.oidc.pkce.method
-- dataverse.auth.oidc.title
-- dataverse.auth.oidc.subtitle
-- dataverse.auth.oidc.pkce.max-cache-size
-- dataverse.auth.oidc.pkce.max-cache-age
diff --git a/doc/release-notes/9412-markdown-previewer.md b/doc/release-notes/9412-markdown-previewer.md
deleted file mode 100644
index 8faa2679fb0..00000000000
--- a/doc/release-notes/9412-markdown-previewer.md
+++ /dev/null
@@ -1 +0,0 @@
-There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews
diff --git a/doc/release-notes/9428-alternative-title.md b/doc/release-notes/9428-alternative-title.md
deleted file mode 100644
index 3bc74f218b5..00000000000
--- a/doc/release-notes/9428-alternative-title.md
+++ /dev/null
@@ -1,9 +0,0 @@
-Alternative Title is made repeatable. 
-- One will need to update database with updated citation block.
-`curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv`
-- One will also need to update solr schema:
-Change in "alternativeTitle" field  multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` 
-Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"`
-
-Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array.
-For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"]  
diff --git a/doc/release-notes/9589-ds-configure-tool.md b/doc/release-notes/9589-ds-configure-tool.md
deleted file mode 100644
index 70ac5fcaa6a..00000000000
--- a/doc/release-notes/9589-ds-configure-tool.md
+++ /dev/null
@@ -1 +0,0 @@
-Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589.
diff --git a/doc/release-notes/9590-intellij-redeploy.md b/doc/release-notes/9590-intellij-redeploy.md
deleted file mode 100644
index 07af352ece4..00000000000
--- a/doc/release-notes/9590-intellij-redeploy.md
+++ /dev/null
@@ -1,3 +0,0 @@
-Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin.
-
-For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools
diff --git a/doc/release-notes/9599-guestbook-at-request.md b/doc/release-notes/9599-guestbook-at-request.md
deleted file mode 100644
index e9554b71fb4..00000000000
--- a/doc/release-notes/9599-guestbook-at-request.md
+++ /dev/null
@@ -1,2 +0,0 @@
-Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default).
-The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. 
diff --git a/doc/release-notes/9635-solr-improvements.md b/doc/release-notes/9635-solr-improvements.md
deleted file mode 100644
index ad55ee3afe6..00000000000
--- a/doc/release-notes/9635-solr-improvements.md
+++ /dev/null
@@ -1,4 +0,0 @@
-- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
-
-Please see the "Installing Solr" section of the Installation Prerequisites guide.
-
diff --git a/doc/release-notes/9692-files-api-extension.md b/doc/release-notes/9692-files-api-extension.md
deleted file mode 100644
index baa8e2f87cd..00000000000
--- a/doc/release-notes/9692-files-api-extension.md
+++ /dev/null
@@ -1,7 +0,0 @@
-The following API endpoints have been added:
-
-- /api/files/{id}/downloadCount
-- /api/files/{id}/dataTables
-- /access/datafile/{id}/userPermissions
-
-The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination and ordering
diff --git a/doc/release-notes/9714-files-api-extension-filters.md b/doc/release-notes/9714-files-api-extension-filters.md
deleted file mode 100644
index 034230efe61..00000000000
--- a/doc/release-notes/9714-files-api-extension-filters.md
+++ /dev/null
@@ -1,14 +0,0 @@
-The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support optional filtering by:
-
-- Access status: through the `accessStatus` query parameter, which supports the following values:
-
-  - Public
-  - Restricted
-  - EmbargoedThenRestricted
-  - EmbargoedThenPublic
-
-
-- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added.
-
-
-- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png".
diff --git a/doc/release-notes/9763-versions-api-improvements.md b/doc/release-notes/9763-versions-api-improvements.md
deleted file mode 100644
index 8d7f6c7a20a..00000000000
--- a/doc/release-notes/9763-versions-api-improvements.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Improvements in the /versions API
-
-- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions;
-- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output;
-- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. 
-
-This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide.
-
diff --git a/doc/release-notes/9785-files-api-extension-search-text.md b/doc/release-notes/9785-files-api-extension-search-text.md
deleted file mode 100644
index fb185e1c7af..00000000000
--- a/doc/release-notes/9785-files-api-extension-search-text.md
+++ /dev/null
@@ -1,3 +0,0 @@
-The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support optional filtering by search text through the `searchText` query parameter.
-
-The search will be applied to the labels and descriptions of the dataset files.
diff --git a/doc/release-notes/9834-files-api-extension-counts.md b/doc/release-notes/9834-files-api-extension-counts.md
deleted file mode 100644
index 3ec15d8bd36..00000000000
--- a/doc/release-notes/9834-files-api-extension-counts.md
+++ /dev/null
@@ -1,6 +0,0 @@
-Implemented the following new endpoints:
-
-- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Given a dataset and its version, retrieves file counts based on different criteria (Total count, per content type, per access status and per category name). 
-
-
-- setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created.
diff --git a/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md b/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md
deleted file mode 100644
index f306ae2ab80..00000000000
--- a/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md
+++ /dev/null
@@ -1,14 +0,0 @@
-Implemented the following new endpoints:
-
-- userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. 
-
-
-- hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version.
-
-
-In addition, the DataFile API payload has been extended to include the following fields:
-
-- tabularData: Boolean field to know if the DataFile is of tabular type
-
-
-- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) 
diff --git a/doc/release-notes/9852-files-api-extension-deaccession.md b/doc/release-notes/9852-files-api-extension-deaccession.md
deleted file mode 100644
index 55698580e3c..00000000000
--- a/doc/release-notes/9852-files-api-extension-deaccession.md
+++ /dev/null
@@ -1,12 +0,0 @@
-Extended the existing endpoints:
-
-- getVersionFiles (/api/datasets/{id}/versions/{versionId}/files)
-- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts)
-
-The above endpoints now accept a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files or file counts.
-
-Additionally, a new endpoint has been developed to support version deaccessioning through API (Given a dataset and a version).
-
-- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession)
-
-Finally, the DataFile API payload has been extended to add the field "friendlyType"
diff --git a/doc/release-notes/9880-info-api-zip-limit-embargo.md b/doc/release-notes/9880-info-api-zip-limit-embargo.md
deleted file mode 100644
index d2afb139e72..00000000000
--- a/doc/release-notes/9880-info-api-zip-limit-embargo.md
+++ /dev/null
@@ -1,5 +0,0 @@
-Implemented the following new endpoints:
-
-- getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes.
-
-- getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths.
diff --git a/doc/release-notes/9907-files-api-counts-with-criteria.md b/doc/release-notes/9907-files-api-counts-with-criteria.md
deleted file mode 100644
index 07cd23daad0..00000000000
--- a/doc/release-notes/9907-files-api-counts-with-criteria.md
+++ /dev/null
@@ -1,11 +0,0 @@
-Extended the getVersionFileCounts endpoint (/api/datasets/{id}/versions/{versionId}/files/counts) to support filtering by criteria.
-
-In particular, the endpoint now accepts the following optional criteria query parameters:
-
-- contentType
-- accessStatus
-- categoryName
-- tabularTagName
-- searchText
-
-This filtering criteria is the same as the one for the getVersionFiles endpoint.
diff --git a/doc/release-notes/9955-Signposting-updates.md b/doc/release-notes/9955-Signposting-updates.md
deleted file mode 100644
index db0e27e51c5..00000000000
--- a/doc/release-notes/9955-Signposting-updates.md
+++ /dev/null
@@ -1,7 +0,0 @@
-This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec.
-
-To fix #9952, we surround the license info with `<` and `>`.
-
-To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information
-
-To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now.
diff --git a/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md b/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md
deleted file mode 100644
index 2ede679b361..00000000000
--- a/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md
+++ /dev/null
@@ -1,9 +0,0 @@
-Added a new optional query parameter "mode" to the "getDownloadSize" API endpoint ("api/datasets/{identifier}/versions/{versionId}/downloadsize").
-
-This parameter applies a filter criteria to the operation and supports the following values:
-
-- All (Default): Includes both archival and original sizes for tabular files
-
-- Archival: Includes only the archival size for tabular files 
-
-- Original: Includes only the original size for tabular files
diff --git a/doc/release-notes/9972-files-api-filter-by-tabular-tags.md b/doc/release-notes/9972-files-api-filter-by-tabular-tags.md
deleted file mode 100644
index 9c3fced1741..00000000000
--- a/doc/release-notes/9972-files-api-filter-by-tabular-tags.md
+++ /dev/null
@@ -1,3 +0,0 @@
-- New query parameter `tabularTagName` added to the getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) to return files to which the particular tabular tag has been added.
-
-- New endpoint to set tabular file tags via API: /api/files/{id}/metadata/tabularTags.
diff --git a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md
deleted file mode 100644
index 020224b2094..00000000000
--- a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md
+++ /dev/null
@@ -1,12 +0,0 @@
-Extended the getDownloadSize endpoint (/api/datasets/{id}/versions/{versionId}/downloadsize), including the following new features:
-
-- The endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned dataset versions when searching for versions to obtain the file total download size.
-
-
-- The endpoint now supports filtering by criteria. In particular, it accepts the following optional criteria query parameters:
-
-  - contentType
-  - accessStatus
-  - categoryName
-  - tabularTagName
-  - searchText

From b077d98a11e6957085757c54c48030ef33b50c30 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 4 Dec 2023 13:30:03 -0500
Subject: [PATCH 312/546] doc update, release note

---
 doc/release-notes/10162-globus-support.md          | 14 ++++++++++++++
 .../source/developers/big-data-support.rst         |  7 +++++--
 2 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 doc/release-notes/10162-globus-support.md

diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md
new file mode 100644
index 00000000000..d64e72b70a1
--- /dev/null
+++ b/doc/release-notes/10162-globus-support.md
@@ -0,0 +1,14 @@
+Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support)
+- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incomatibilities.
+- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model
+- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus)
+- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs)
+
+Backward Incompatibilities:
+- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism
+- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured.
+
+New JVM Options:
+- A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores).
+
+Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used
diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst
index d38f7f27a68..fe49f9f6150 100644
--- a/doc/sphinx-guides/source/developers/big-data-support.rst
+++ b/doc/sphinx-guides/source/developers/big-data-support.rst
@@ -152,8 +152,6 @@ Note: Globus file transfer is still experimental but feedback is welcome! See :r
 Users can transfer files via `Globus <ttps://www.globus.org>`_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) 
 and a community-developed `dataverse-globus <https://github.com/scholarsportal/dataverse-globus>`_ app has been properly installed and configured.
 
-Due to differences in the access control models of a Dataverse installation and Globus, enabling the Globus capability on a store will disable the ability to restrict and embargo files in that store.
-
 Globus endpoints can be in a variety of places, from data centers to personal computers. 
 This means that from within the Dataverse software, a Globus transfer can feel like an upload or a download (with Globus Personal Connect running on your laptop, for example) or it can feel like a true transfer from one server to another (from a cluster in a data center into a Dataverse dataset or vice versa).
 
@@ -162,11 +160,16 @@ Globus transfer uses an efficient transfer mechanism and has additional features
 * robust file transfer capable of restarting after network or endpoint failures
 * third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation
 
+Note: Due to differences in the access control models of a Dataverse installation and Globus and the current Globus store model, Dataverse cannot enforce per-file-access restrictions.
+It is therefore recommended that a store be configured as public, which disables the ability to restrict and embargo files in that store, when Globus access is allowed.
+
 Dataverse supports three options for using Globus, two involving transfer to Dataverse-managed endpoints and one allowing Dataverse to reference files on remote endpoints.
 Dataverse-managed endpoints must be Globus 'guest collections' hosted on either a file-system-based endpoint or an S3-based endpoint (the latter requires use of the Globus
 S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint.
 Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost).
 
+With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionlity related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionlity normally associated with direct uploads to S3 is available.)
+
 For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. 
 Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access).
 

From 547d71c342e08ebdf674d8754dc072465ad20651 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 4 Dec 2023 14:31:07 -0500
Subject: [PATCH 313/546] #9464 add more detail to validation error message

---
 .../edu/harvard/iq/dataverse/DataverseServiceBean.java     | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index ed46caf65a1..027e58d9263 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -1072,7 +1072,12 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) {
             schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid
         } catch (ValidationException vx) {
             logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); 
-            return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " "  + vx.getErrorMessage();
+            String accumulatedexceptions = "";
+            for (ValidationException va : vx.getCausingExceptions()){
+                accumulatedexceptions = accumulatedexceptions + va;
+                accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " ");
+            }
+            return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " "  + accumulatedexceptions;
         } catch (Exception ex) {            
             logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage());
             return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage();

From fc3ae08ec9335ac857af4d9c112e892255ef1c7a Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 4 Dec 2023 14:44:00 -0500
Subject: [PATCH 314/546] adding documentation

---
 .../edu/harvard/iq/dataverse/DatasetPage.java |  21 ++
 .../harvard/iq/dataverse/api/Datasets.java    | 238 +++++++++++-------
 2 files changed, 163 insertions(+), 96 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 704c1d42228..f871d2e5198 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -6346,6 +6346,27 @@ public boolean isGlobusTransferRequested() {
         return globusTransferRequested;
     }
     
+    /**
+     * Analagous with the startDownload method, this method is called when the user
+     * tries to start a Globus transfer out (~download). The
+     * validateFilesForDownload call checks to see if there are some files that can
+     * be Globus transfered and, if so and there are no files that can't be
+     * transferre, this method will launch the globus transfer app. If there is a
+     * mix of files or if the guestbook popup is required, the method passes back to
+     * the UI so those popup(s) can be shown. Once they are, this method is called
+     * with the popupShown param true and the app will be shown.
+     * 
+     * @param transferAll - when called from the dataset Access menu, this should be
+     *                    true so that all files are included in the processing.
+     *                    When it is called from the file table, the current
+     *                    selection is used and the param should be false.
+     * @param popupShown  - This method is called twice if the the mixed files or
+     *                    guestbook popups are needed. On the first call, popupShown
+     *                    is false so that the transfer is not started and those
+     *                    popups can be shown. On the second call, popupShown is
+     *                    true and processing will occur as long as there are some
+     *                    valid files to transfer.
+     */
     public void startGlobusTransfer(boolean transferAll, boolean popupShown) {
         if (transferAll) {
             this.setSelectedFiles(workingVersion.getFileMetadatas());
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 5961b428bcb..ae576134be3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3444,90 +3444,34 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam("
     }
 
 
-    @POST
-    @AuthRequired
-    @Path("{id}/addGlobusFiles")
-    @Consumes(MediaType.MULTIPART_FORM_DATA)
-    public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
-                                            @PathParam("id") String datasetId,
-                                            @FormDataParam("jsonData") String jsonData,
-                                            @Context UriInfo uriInfo
-    ) throws IOException, ExecutionException, InterruptedException {
-
-        logger.info(" ====  (api addGlobusFilesToDataset) jsonData   ====== " + jsonData);
-
-        if (!systemConfig.isHTTPUpload()) {
-            return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled"));
-        }
-
-        // -------------------------------------
-        // (1) Get the user from the API key
-        // -------------------------------------
-        AuthenticatedUser authUser;
-        try {
-            authUser = getRequestAuthenticatedUserOrDie(crc);
-        } catch (WrappedResponse ex) {
-            return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth")
-            );
-        }
-
-        // -------------------------------------
-        // (2) Get the Dataset Id
-        // -------------------------------------
-        Dataset dataset;
-
-        try {
-            dataset = findDatasetOrDie(datasetId);
-        } catch (WrappedResponse wr) {
-            return wr.getResponse();
-        }
-        
-        JsonObject jsonObject = null;
-        try {
-            jsonObject = JsonUtil.getJsonObject(jsonData);
-        } catch (Exception ex) {
-            logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage());
-            return badRequest("Error parsing json body");
-
-        }
-
-        //------------------------------------
-        // (2b) Make sure dataset does not have package file
-        // --------------------------------------
-
-        for (DatasetVersion dv : dataset.getVersions()) {
-            if (dv.isHasPackageFile()) {
-                return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile")
-                );
-            }
-        }
-
-
-        String lockInfoMessage = "Globus Upload API started ";
-        DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload,
-                (authUser).getId(), lockInfoMessage);
-        if (lock != null) {
-            dataset.addLock(lock);
-        } else {
-            logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId());
-        }
-
-
-        ApiToken token = authSvc.findApiTokenByUser(authUser);
-
-        if(uriInfo != null) {
-            logger.info(" ====  (api uriInfo.getRequestUri()) jsonData   ====== " + uriInfo.getRequestUri().toString());
-        }
-
-
-        String requestUrl = SystemConfig.getDataverseSiteUrlStatic();
-        
-        // Async Call
-        globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser);
-
-        return ok("Async call to Globus Upload started ");
-
-    }
+/****************************
+ * Globus Support Section:
+ * 
+ * Globus transfer in (upload) and out (download) involve three basic steps: The
+ * app is launched and makes a callback to the
+ * globusUploadParameters/globusDownloadParameters method to get all of the info
+ * needed to set up it's display.
+ * 
+ * At some point after that, the user will make a selection as to which files to
+ * transfer and the app will call requestGlobusUploadPaths/requestGlobusDownload
+ * to indicate a transfer is about to start. In addition to providing the
+ * details of where to transfer the files to/from, Dataverse also grants the
+ * Globus principal involved the relevant rw or r permission for the dataset.
+ * 
+ * Once the transfer is started, the app records the task id and sends it to
+ * Dataverse in the addGlobusFiles/monitorGlobusDownload call. Dataverse then
+ * monitors the transfer task and when it ultimately succeeds for fails it
+ * revokes the principal's permission and, for the transfer in case, adds the
+ * files to the dataset. (The dataset is locked until the transfer completes.)
+ * 
+ * (If no transfer is started within a specified timeout, permissions will
+ * automatically be revoked - see the GlobusServiceBean for details.)
+ *
+ * The option to reference a file at a remote endpoint (rather than transfer it)
+ * follows the first two steps of the process above but completes with a call to
+ * the normal /addFiles endpoint (as there is no transfer to monitor and the
+ * files can be added to the dataset immediately.)
+ */
 
     /**
      * Retrieve the parameters and signed URLs required to perform a globus
@@ -3630,11 +3574,11 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
     }
 
     /**
-     * Requests permissions for a given globus user to upload to the dataset
+     * Provides specific storageIdentifiers to use for each file amd requests permissions for a given globus user to upload to the dataset
      * 
      * @param crc
      * @param datasetId
-     * @param jsonData
+     * @param jsonData - an object that must include the id of the globus "principal" involved and the "numberOfFiles" that will be transferred.
      * @return
      * @throws IOException
      * @throws ExecutionException
@@ -3721,15 +3665,114 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP
 
     }
 
-    /**
-     * Retrieve the parameters and signed URLs required to perform a globus
-     * transfer/download. This api endpoint is expected to be called as a signed
-     * callback after the globus-dataverse app/other app is launched, but it will
-     * accept other forms of authentication.
+    /** A method analogous to /addFiles that must also include the taskIdentifier of the transfer-in-progress to monitor
      * 
      * @param crc
      * @param datasetId
+     * @param jsonData - see /addFiles documentation, aditional "taskIdentifier" key in the main object is required.
+     * @param uriInfo
+     * @return
+     * @throws IOException
+     * @throws ExecutionException
+     * @throws InterruptedException
      */
+    @POST
+    @AuthRequired
+    @Path("{id}/addGlobusFiles")
+    @Consumes(MediaType.MULTIPART_FORM_DATA)
+    public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
+                                            @PathParam("id") String datasetId,
+                                            @FormDataParam("jsonData") String jsonData,
+                                            @Context UriInfo uriInfo
+    ) throws IOException, ExecutionException, InterruptedException {
+
+        logger.info(" ====  (api addGlobusFilesToDataset) jsonData   ====== " + jsonData);
+
+        if (!systemConfig.isHTTPUpload()) {
+            return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled"));
+        }
+
+        // -------------------------------------
+        // (1) Get the user from the API key
+        // -------------------------------------
+        AuthenticatedUser authUser;
+        try {
+            authUser = getRequestAuthenticatedUserOrDie(crc);
+        } catch (WrappedResponse ex) {
+            return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth")
+            );
+        }
+
+        // -------------------------------------
+        // (2) Get the Dataset Id
+        // -------------------------------------
+        Dataset dataset;
+
+        try {
+            dataset = findDatasetOrDie(datasetId);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+        
+        JsonObject jsonObject = null;
+        try {
+            jsonObject = JsonUtil.getJsonObject(jsonData);
+        } catch (Exception ex) {
+            logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage());
+            return badRequest("Error parsing json body");
+
+        }
+
+        //------------------------------------
+        // (2b) Make sure dataset does not have package file
+        // --------------------------------------
+
+        for (DatasetVersion dv : dataset.getVersions()) {
+            if (dv.isHasPackageFile()) {
+                return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile")
+                );
+            }
+        }
+
+
+        String lockInfoMessage = "Globus Upload API started ";
+        DatasetLock lock = datasetService.addDatasetLock(dataset.getId(), DatasetLock.Reason.GlobusUpload,
+                (authUser).getId(), lockInfoMessage);
+        if (lock != null) {
+            dataset.addLock(lock);
+        } else {
+            logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId());
+        }
+
+
+        ApiToken token = authSvc.findApiTokenByUser(authUser);
+
+        if(uriInfo != null) {
+            logger.info(" ====  (api uriInfo.getRequestUri()) jsonData   ====== " + uriInfo.getRequestUri().toString());
+        }
+
+
+        String requestUrl = SystemConfig.getDataverseSiteUrlStatic();
+        
+        // Async Call
+        globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser);
+
+        return ok("Async call to Globus Upload started ");
+
+    }
+    
+/**
+ * Retrieve the parameters and signed URLs required to perform a globus
+ * transfer/download. This api endpoint is expected to be called as a signed
+ * callback after the globus-dataverse app/other app is launched, but it will
+ * accept other forms of authentication.
+ * 
+ * @param crc
+ * @param datasetId
+ * @param locale
+ * @param downloadId - an id to a cached object listing the files involved. This is generated via Dataverse and provided to the dataverse-globus app in a signedURL.
+ * @return - JSON containing the parameters and URLs needed by the dataverse-globus app. The format is analogous to that for external tools. 
+ */
     @GET
     @AuthRequired
     @Path("{id}/globusDownloadParameters")
@@ -3815,12 +3858,14 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P
 
     /**
      * Requests permissions for a given globus user to download the specified files
-     * the dataset
+     * the dataset and returns information about the paths to transfer from.
+     * 
+     * When called directly rather than in response to being given a downloadId, the jsonData can include a "fileIds" key with an array of file ids to transfer.
      * 
      * @param crc
      * @param datasetId
-     * @param jsonData
-     * @return
+     * @param jsonData - a JSON object that must include the id of the  Globus "principal" that will be transferring the files in the case where Dataverse manages the Globus endpoint. For remote endpoints, the principal is not required.
+     * @return - a JSON object containing a map of file ids to Globus endpoint/path
      * @throws IOException
      * @throws ExecutionException
      * @throws InterruptedException
@@ -3957,11 +4002,12 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat
 
     /**
      * Monitors a globus download and removes permissions on the dir/dataset when
-     * done
+     * the specified transfer task is completed.
      * 
      * @param crc
      * @param datasetId
-     * @param jsonData
+     * @param jsonData  - a JSON Object containing the key "taskIdentifier" with the
+     *                  Globus task to monitor.
      * @return
      * @throws IOException
      * @throws ExecutionException

From 7697157ac98049dea45a2bd98193aad75e6037e1 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Mon, 4 Dec 2023 15:27:21 -0500
Subject: [PATCH 315/546] #9464 handle single errors

---
 .../edu/harvard/iq/dataverse/DataverseServiceBean.java     | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 027e58d9263..07e7fe615e2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -1077,7 +1077,12 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) {
                 accumulatedexceptions = accumulatedexceptions + va;
                 accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " ");
             }
-            return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " "  + accumulatedexceptions;
+            if (!accumulatedexceptions.isEmpty()){
+                return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " "  + accumulatedexceptions;
+            } else {
+                return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " "  + vx.getErrorMessage();
+            }
+            
         } catch (Exception ex) {            
             logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage());
             return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage();

From 8ec61d084a81c7d5786bd583177b80255aa7e883 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 4 Dec 2023 15:58:21 -0500
Subject: [PATCH 316/546] cleanup, add method stubs, open for basestore,
 info->fine

---
 .../AbstractRemoteOverlayAccessIO.java        |  12 +-
 .../dataaccess/GlobusAccessibleStore.java     |   6 +
 .../dataaccess/GlobusOverlayAccessIO.java     | 142 ++++++++++++------
 3 files changed, 112 insertions(+), 48 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
index 16defc26a4f..8d058b7c9e3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
@@ -15,11 +15,8 @@
 
 import javax.net.ssl.SSLContext;
 
-import org.apache.http.Header;
 import org.apache.http.client.config.CookieSpecs;
 import org.apache.http.client.config.RequestConfig;
-import org.apache.http.client.methods.CloseableHttpResponse;
-import org.apache.http.client.methods.HttpHead;
 import org.apache.http.client.protocol.HttpClientContext;
 import org.apache.http.config.Registry;
 import org.apache.http.config.RegistryBuilder;
@@ -30,15 +27,18 @@
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
-import org.apache.http.protocol.HTTP;
 import org.apache.http.ssl.SSLContextBuilder;
-import org.apache.http.util.EntityUtils;
-
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.DvObject;
 
+
+/**
+ * A base class for StorageIO implementations supporting remote access. At present, that includes the RemoteOverlayAccessIO store and the newer GlobusOverlayAccessIO store. It primarily includes
+ * common methods for handling auxiliary files in the configured base store.
+ * @param <T>
+ */
 public abstract class AbstractRemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
 
     protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
index d827e40e807..e4d062f0619 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
@@ -6,7 +6,13 @@
 
 public interface GlobusAccessibleStore {
 
+    //Whether Dataverse manages access controls for the Globus endpoint or not.
     static final String MANAGED = "managed";
+    /*
+     * transfer and reference endpoint formats: <globusEndpointId/basePath>
+     * 
+     * REFERENCE_ENDPOINTS_WITH_BASEPATHS - reference endpoints separated by a comma
+     */
     static final String TRANSFER_ENDPOINT_WITH_BASEPATH = "transfer-endpoint-with-basepath";
     static final String GLOBUS_TOKEN = "globus-token";
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index e825af8cf30..7a6809cb2ff 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -2,12 +2,15 @@
 
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.Dataverse;
 import edu.harvard.iq.dataverse.DvObject;
+import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.globus.AccessToken;
 import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
 import edu.harvard.iq.dataverse.util.UrlSignerUtil;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
@@ -16,6 +19,7 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Arrays;
+import java.util.List;
 import java.util.logging.Logger;
 
 import org.apache.http.client.ClientProtocolException;
@@ -32,9 +36,18 @@
 
 /**
  * @author qqmyers
- */
+ * 
+ * This class implements three related use cases, all of which leverage the underlying idea of using a base store (as with the Https RemoteOverlay store):
+ *   Managed - where Dataverse has control of the specified Globus endpoint and can set/remove permissions as needed to allow file transfers in/out:
+ *      File/generic endpoint - assumes Dataverse does not have access to the datafile contents
+ *      S3-Connector endpoint - assumes the datafiles are accessible via Globus and via S3 such that Dataverse can access to the datafile contents when needed.
+ *   Remote - where Dataverse references files that remain at remote Globus endpoints (as with the Https RemoteOverlay store) and cannot access to the datafile contents.
+ *   
+ *   Note that Globus endpoints can provide Http URLs to get file contents, so a future enhancement could potentially support datafile contents access in the Managed/File and Remote cases. 
+ *   
+ *    */
 /*
- * Globus Overlay Driver
+ * Globus Overlay Driver storageIdentifer format:
  * 
  * Remote: StorageIdentifier format:
  * <globusDriverId>://<baseStorageIdentifier>//<relativePath> 
@@ -47,11 +60,6 @@
  * 
  * Storage location:
  * <globusEndpointId/basepath>/<dataset authority>/<dataset identifier>/<baseStorageIdentifier>
- *
- * transfer and reference endpoint formats: 
- * <globusEndpointId/basePath>
- * 
- * reference endpoints separated by a comma
  * 
  */
 public class GlobusOverlayAccessIO<T extends DvObject> extends AbstractRemoteOverlayAccessIO<T> implements GlobusAccessibleStore {
@@ -115,7 +123,6 @@ private String retrieveGlobusAccessToken() {
         return accessToken.getOtherTokens().get(0).getAccessToken();
     }
 
-
     private void parsePath() {
         int filenameStart = path.lastIndexOf("/") + 1;
         String endpointWithBasePath = null;
@@ -126,9 +133,9 @@ private void parsePath() {
         }
         //String endpointWithBasePath = baseEndpointPath.substring(baseEndpointPath.lastIndexOf(DataAccess.SEPARATOR) + 3);
         int pathStart = endpointWithBasePath.indexOf("/");
-        logger.info("endpointWithBasePath: " + endpointWithBasePath);
+        logger.fine("endpointWithBasePath: " + endpointWithBasePath);
         endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : "");
-        logger.info("endpointPath: " + endpointPath);
+        logger.fine("endpointPath: " + endpointPath);
         
 
         if (isManaged() && (dvObject!=null)) {
@@ -146,7 +153,7 @@ private void parsePath() {
         if (filenameStart > 0) {
             relativeDirectoryPath = relativeDirectoryPath + path.substring(0, filenameStart);
         }
-        logger.info("relativeDirectoryPath finally: " + relativeDirectoryPath);
+        logger.fine("relativeDirectoryPath finally: " + relativeDirectoryPath);
         filename = path.substring(filenameStart);
         endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
 
@@ -171,7 +178,7 @@ protected void validatePath(String relPath) throws IOException {
         } else {
             try {
                 String endpoint = findMatchingEndpoint(relPath, allowedEndpoints);
-                logger.info(endpoint + "  " + relPath);
+                logger.fine(endpoint + "  " + relPath);
 
                 if (endpoint == null || !Paths.get(endpoint, relPath).normalize().startsWith(endpoint)) {
                     throw new IOException(
@@ -189,7 +196,6 @@ protected void validatePath(String relPath) throws IOException {
     public long retrieveSizeFromMedia() {
         parsePath();
         String globusAccessToken = retrieveGlobusAccessToken();
-        logger.info("GAT2: " + globusAccessToken);
         // Construct Globus URL
         URI absoluteURI = null;
         try {
@@ -198,13 +204,12 @@ public long retrieveSizeFromMedia() {
                     + "/ls?path=" + endpointPath + relativeDirectoryPath + "&filter=name:" + filename);
             HttpGet get = new HttpGet(absoluteURI);
 
-            logger.info("Token is " + globusAccessToken);
             get.addHeader("Authorization", "Bearer " + globusAccessToken);
             CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext);
             if (response.getStatusLine().getStatusCode() == 200) {
                 // Get reponse as string
                 String responseString = EntityUtils.toString(response.getEntity());
-                logger.info("Response from " + get.getURI().toString() + " is: " + responseString);
+                logger.fine("Response from " + get.getURI().toString() + " is: " + responseString);
                 JsonObject responseJson = JsonUtil.getJsonObject(responseString);
                 JsonArray dataArray = responseJson.getJsonArray("DATA");
                 if (dataArray != null && dataArray.size() != 0) {
@@ -214,7 +219,7 @@ public long retrieveSizeFromMedia() {
             } else {
                 logger.warning("Response from " + get.getURI().toString() + " was "
                         + response.getStatusLine().getStatusCode());
-                logger.info(EntityUtils.toString(response.getEntity()));
+                logger.fine(EntityUtils.toString(response.getEntity()));
             }
         } catch (URISyntaxException e) {
             // Should have been caught in validatePath
@@ -258,16 +263,15 @@ public void delete() throws IOException {
             absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/submission_id");
             HttpGet get = new HttpGet(absoluteURI);
 
-            logger.info("Token is " + globusAccessToken);
             get.addHeader("Authorization", "Bearer " + globusAccessToken);
             CloseableHttpResponse response = getSharedHttpClient().execute(get, localContext);
             if (response.getStatusLine().getStatusCode() == 200) {
                 // Get reponse as string
                 String responseString = EntityUtils.toString(response.getEntity());
-                logger.info("Response from " + get.getURI().toString() + " is: " + responseString);
+                logger.fine("Response from " + get.getURI().toString() + " is: " + responseString);
                 JsonObject responseJson = JsonUtil.getJsonObject(responseString);
                 String submissionId = responseJson.getString("value");
-                logger.info("submission_id for delete is: " + submissionId);
+                logger.fine("submission_id for delete is: " + submissionId);
                 absoluteURI = new URI("https://transfer.api.globusonline.org/v0.10/delete");
                 HttpPost post = new HttpPost(absoluteURI);
                 JsonObjectBuilder taskJsonBuilder = Json.createObjectBuilder();
@@ -277,30 +281,30 @@ public void delete() throws IOException {
                 post.setHeader("Content-Type", "application/json");
                 post.addHeader("Authorization", "Bearer " + globusAccessToken);
                 String taskJson= JsonUtil.prettyPrint(taskJsonBuilder.build());
-                logger.info("Sending: " + taskJson);
+                logger.fine("Sending: " + taskJson);
                 post.setEntity(new StringEntity(taskJson, "utf-8"));
                 CloseableHttpResponse postResponse = getSharedHttpClient().execute(post, localContext);
                 int statusCode=postResponse.getStatusLine().getStatusCode();
-                logger.info("Response :" + statusCode + ": " +postResponse.getStatusLine().getReasonPhrase());
+                logger.fine("Response :" + statusCode + ": " +postResponse.getStatusLine().getReasonPhrase());
                 switch (statusCode) {
                 case 202:
                     // ~Success - delete task was accepted
-                    logger.info("Globus delete initiated: " + EntityUtils.toString(postResponse.getEntity()));
+                    logger.fine("Globus delete initiated: " + EntityUtils.toString(postResponse.getEntity()));
                     break;
                 case 200:
                     // Duplicate - delete task was already accepted
-                    logger.info("Duplicate Globus delete: " + EntityUtils.toString(postResponse.getEntity()));
+                    logger.warning("Duplicate Globus delete: " + EntityUtils.toString(postResponse.getEntity()));
                     break;
                 default:
                     logger.warning("Response from " + post.getURI().toString() + " was "
                             + postResponse.getStatusLine().getStatusCode());
-                    logger.info(EntityUtils.toString(postResponse.getEntity()));
+                    logger.fine(EntityUtils.toString(postResponse.getEntity()));
                 }
 
             } else {
                 logger.warning("Response from " + get.getURI().toString() + " was "
                         + response.getStatusLine().getStatusCode());
-                logger.info(EntityUtils.toString(response.getEntity()));
+                logger.fine(EntityUtils.toString(response.getEntity()));
             }
         } catch (Exception e) {
             logger.warning(e.getMessage());
@@ -383,7 +387,7 @@ public String getStorageLocation() throws IOException {
      */
     protected void configureGlobusEndpoints() throws IOException {
         allowedEndpoints = getAllowedEndpoints(this.driverId);
-        logger.info("Set allowed endpoints: " + Arrays.toString(allowedEndpoints));
+        logger.fine("Set allowed endpoints: " + Arrays.toString(allowedEndpoints));
     }
     
     private static String[] getAllowedEndpoints(String driverId) throws IOException {
@@ -409,37 +413,91 @@ private static String[] getAllowedEndpoints(String driverId) throws IOException
 
 
     @Override
-    public void open(DataAccessOption... option) throws IOException {
-        // TODO Auto-generated method stub
-        
-    }
+    public void open(DataAccessOption... options) throws IOException {
+
+        baseStore.open(options);
+
+        DataAccessRequest req = this.getRequest();
+
+        if (isWriteAccessRequested(options)) {
+            isWriteAccess = true;
+            isReadAccess = false;
+        } else {
+            isWriteAccess = false;
+            isReadAccess = true;
+        }
+
+        if (dvObject instanceof DataFile) {
+            String storageIdentifier = dvObject.getStorageIdentifier();
+
+            DataFile dataFile = this.getDataFile();
+
+            if (req != null && req.getParameter("noVarHeader") != null) {
+                baseStore.setNoVarHeader(true);
+            }
+
+            if (storageIdentifier == null || "".equals(storageIdentifier)) {
+                throw new FileNotFoundException("Data Access: No local storage identifier defined for this datafile.");
+            }
+
+            logger.fine("StorageIdentifier is: " + storageIdentifier);
+
+            if (isReadAccess) {
+                if (dataFile.getFilesize() >= 0) {
+                    this.setSize(dataFile.getFilesize());
+                } else {
+                    logger.fine("Setting size");
+                    this.setSize(retrieveSizeFromMedia());
+                }
+                // Only applies for the S3 Connector case (where we could have run an ingest)
+                if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
+                        && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
+
+                    List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
+                    String varHeaderLine = generateVariableHeader(datavariables);
+                    this.setVarHeader(varHeaderLine);
+                }
+
+            }
 
+            this.setMimeType(dataFile.getContentType());
+
+            try {
+                this.setFileName(dataFile.getFileMetadata().getLabel());
+            } catch (Exception ex) {
+                this.setFileName("unknown");
+            }
+        } else if (dvObject instanceof Dataset) {
+            throw new IOException(
+                    "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet");
+        } else if (dvObject instanceof Dataverse) {
+            throw new IOException(
+                    "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet");
+        }
+    }
 
     @Override
     public Path getFileSystemPath() throws IOException {
-        // TODO Auto-generated method stub
-        return null;
+        throw new UnsupportedDataAccessOperationException(
+                this.getClass().getName() + ": savePath() not implemented in this storage driver.");
     }
 
-
     @Override
     public void savePath(Path fileSystemPath) throws IOException {
-        // TODO Auto-generated method stub
-        
+        throw new UnsupportedDataAccessOperationException(
+                this.getClass().getName() + ": savePath() not implemented in this storage driver.");
     }
 
-
     @Override
     public void saveInputStream(InputStream inputStream) throws IOException {
-        // TODO Auto-generated method stub
-        
+        throw new UnsupportedDataAccessOperationException(
+                this.getClass().getName() + ": savePath() not implemented in this storage driver.");
     }
 
-
     @Override
     public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
-        // TODO Auto-generated method stub
-        
+        throw new UnsupportedDataAccessOperationException(
+                this.getClass().getName() + ": savePath() not implemented in this storage driver.");
     }
-    
+
 }

From 38c120e13d2e1276324b903be58306520168b577 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 4 Dec 2023 18:21:41 -0500
Subject: [PATCH 317/546] cleanup, delete obsolete methods, change to private,
 info->fine

---
 .../harvard/iq/dataverse/api/Datasets.java    |   4 +-
 .../dataverse/globus/GlobusServiceBean.java   | 461 +++---------------
 .../iq/dataverse/settings/JvmSettings.java    |   2 +-
 src/main/webapp/globus.xhtml                  |  30 --
 4 files changed, 78 insertions(+), 419 deletions(-)
 delete mode 100644 src/main/webapp/globus.xhtml

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index ae576134be3..cb57acd3b86 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -3543,7 +3543,7 @@ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @Pat
         } else {
             params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths);
         }
-        int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class);
+        int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class);
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths";
         allowedApiCalls.add(
@@ -3833,7 +3833,7 @@ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @P
             params.add("endpoint", transferEndpoint);
         }
         params.add("files", files);
-        int timeoutSeconds = JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class);
+        int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class);
         JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
         allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "monitorGlobusDownload")
                 .add(URLTokenUtil.HTTP_METHOD, "POST")
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 0c991424ce9..37959188857 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -97,34 +97,6 @@ public class GlobusServiceBean implements java.io.Serializable {
     private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName());
     private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss");
 
-    private String code;
-    private String userTransferToken;
-    private String state;
-
-    public String getState() {
-        return state;
-    }
-
-    public void setState(String state) {
-        this.state = state;
-    }
-
-    public String getCode() {
-        return code;
-    }
-
-    public void setCode(String code) {
-        this.code = code;
-    }
-
-    public String getUserTransferToken() {
-        return userTransferToken;
-    }
-
-    public void setUserTransferToken(String userTransferToken) {
-        this.userTransferToken = userTransferToken;
-    }
-
     private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions)
             throws MalformedURLException {
 
@@ -152,33 +124,6 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi
         return null;
     }
 
-    /*
-     * public void updatePermision(AccessToken clientTokenUser, String directory,
-     * String principalType, String perm) throws MalformedURLException { if
-     * (directory != null && !directory.equals("")) { directory = directory + "/"; }
-     * logger.info("Start updating permissions." + " Directory is " + directory);
-     * String globusEndpoint =
-     * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, "");
-     * ArrayList<String> rules = checkPermisions(clientTokenUser, directory,
-     * globusEndpoint, principalType, null); logger.info("Size of rules " +
-     * rules.size()); int count = 0; while (count < rules.size()) {
-     * logger.info("Start removing rules " + rules.get(count)); Permissions
-     * permissions = new Permissions(); permissions.setDATA_TYPE("access");
-     * permissions.setPermissions(perm); permissions.setPath(directory);
-     * 
-     * Gson gson = new GsonBuilder().create(); URL url = new
-     * URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint
-     * + "/access/" + rules.get(count));
-     * logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" +
-     * globusEndpoint + "/access/" + rules.get(count)); MakeRequestResponse result =
-     * makeRequest(url, "Bearer",
-     * clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT",
-     * gson.toJson(permissions)); if (result.status != 200) {
-     * logger.warning("Cannot update access rule " + rules.get(count)); } else {
-     * logger.info("Access rule " + rules.get(count) + " was updated"); } count++; }
-     * }
-     */
-
     /**
      * Call to delete a globus rule related to the specified dataset.
      * 
@@ -214,6 +159,13 @@ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger
         }
     }
 
+    /** Request read/write access for the specified principal and generate a list of accessible paths for new files for the specified dataset.
+     * 
+     * @param principal - the id of the Globus principal doing the transfer
+     * @param dataset
+     * @param numberOfPaths - how many files are to be transferred
+     * @return
+     */
     public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) {
 
         GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
@@ -278,6 +230,12 @@ private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissi
         }
     }
 
+    /** Given an array of remote files to be referenced in the dataset, create a set of valid storage identifiers and return a map of the remote file paths to storage identifiers.
+     * 
+     * @param dataset
+     * @param referencedFiles - a JSON array of remote files to be referenced in the dataset - each should be a string with the <Globus endpoint>/path/to/file
+     * @return - a map of supplied paths to valid storage identifiers
+     */
     public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) {
         String driverId = dataset.getEffectiveStorageDriverId();
         JsonArray endpoints = GlobusAccessibleStore.getReferenceEndpointsWithPaths(driverId);
@@ -304,39 +262,38 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref
         return fileMap.build();
     }
 
+    
+    /** A cache of temporary permission requests - for upload (rw) and download (r) access.
+     * When a temporary permission request is created, it is added to the cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, the permission will be revoked/deleted.
+     * (If a transfer has been started, the permission will not be revoked/deleted until the transfer is complete. This is handled in other methods.)
+     */
     // Single cache of open rules/permission requests
     private final Cache<String, Long> rulesCache = Caffeine.newBuilder()
             .expireAfterWrite(
-                    Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
+                    Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
             .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> {
                 // Delete rules that expire
-                logger.info("Rule " + ruleId + " expired");
+                logger.fine("Rule " + ruleId + " expired");
                 Dataset dataset = datasetSvc.find(datasetId);
                 deletePermission((String) ruleId, dataset, logger);
             })
 
             .build();
 
+    //Convenience method to add a temporary permission request to the cache - allows logging of temporary permission requests
     private void monitorTemporaryPermissions(String ruleId, long datasetId) {
-        logger.info("Adding rule " + ruleId + " for dataset " + datasetId);
+        logger.fine("Adding rule " + ruleId + " for dataset " + datasetId);
         rulesCache.put(ruleId, datasetId);
     }
 
-    public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException {
-
-        URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId
-                + "/successful_transfers");
-
-        MakeRequestResponse result = makeRequest(url, "Bearer",
-                clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null);
-
-        if (result.status == 200) {
-            logger.info(" SUCCESS ====== ");
-            return true;
-        }
-        return false;
-    }
-
+/** Call the Globus API to get info about the transfer.
+ * 
+ * @param accessToken
+ * @param taskId - the Globus task id supplied by the user
+ * @param globusLogger - the transaction-specific logger to use (separate log files are created in general, some calls may use the class logger)
+ * @return
+ * @throws MalformedURLException
+ */
     public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException {
 
         URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId);
@@ -356,6 +313,11 @@ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger
         return task;
     }
 
+    /** Globus call to get an access token for the user using the long-term token we hold.
+     * 
+     * @param globusBasicToken - the base64 encoded Globus Basic token comprised of the <Globus user id>:<key>
+     * @return - a valid Globus access token
+     */
     public static AccessToken getClientToken(String globusBasicToken) {
         URL url;
         AccessToken clientTokenUser = null;
@@ -375,36 +337,7 @@ public static AccessToken getClientToken(String globusBasicToken) {
         return clientTokenUser;
     }
 
-    public AccessToken getAccessToken(HttpServletRequest origRequest, String globusBasicToken)
-            throws UnsupportedEncodingException, MalformedURLException {
-        String serverName = origRequest.getServerName();
-        if (serverName.equals("localhost")) {
-            logger.severe("Changing localhost to utoronto");
-            serverName = "utl-192-123.library.utoronto.ca";
-        }
-
-        String redirectURL = "https://" + serverName + "/globus.xhtml";
-
-        redirectURL = URLEncoder.encode(redirectURL, "UTF-8");
-
-        URL url = new URL("https://auth.globus.org/v2/oauth2/token?code=" + code + "&redirect_uri=" + redirectURL
-                + "&grant_type=authorization_code");
-        logger.info(url.toString());
-
-        MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null);
-        AccessToken accessTokenUser = null;
-
-        if (result.status == 200) {
-            logger.info("Access Token: \n" + result.toString());
-            accessTokenUser = parseJson(result.jsonResponse, AccessToken.class, true);
-            logger.info(accessTokenUser.getAccessToken());
-        }
-
-        return accessTokenUser;
-
-    }
-
-    public static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method,
+    private static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method,
             String jsonString) {
         String str = null;
         HttpURLConnection connection = null;
@@ -412,9 +345,8 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a
         try {
             connection = (HttpURLConnection) url.openConnection();
             // Basic
-            // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9
             logger.info(authType + " " + authCode);
-            logger.info("For URL: " + url.toString());
+            logger.fine("For URL: " + url.toString());
             connection.setRequestProperty("Authorization", authType + " " + authCode);
             // connection.setRequestProperty("Content-Type",
             // "application/x-www-form-urlencoded");
@@ -422,7 +354,7 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a
             if (jsonString != null) {
                 connection.setRequestProperty("Content-Type", "application/json");
                 connection.setRequestProperty("Accept", "application/json");
-                logger.info(jsonString);
+                logger.fine(jsonString);
                 connection.setDoOutput(true);
 
                 OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream());
@@ -431,24 +363,21 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a
             }
 
             status = connection.getResponseCode();
-            logger.info("Status now " + status);
+            logger.fine("Status now " + status);
             InputStream result = connection.getInputStream();
             if (result != null) {
-                logger.info("Result is not null");
                 str = readResultJson(result).toString();
-                logger.info("str is ");
-                logger.info(result.toString());
+                logger.fine("str is " + result.toString());
             } else {
-                logger.info("Result is null");
+                logger.fine("Result is null");
                 str = null;
             }
 
-            logger.info("status: " + status);
+            logger.fine("status: " + status);
         } catch (IOException ex) {
-            logger.info("IO");
             logger.severe(ex.getMessage());
-            logger.info(ex.getCause().toString());
-            logger.info(ex.getStackTrace().toString());
+            logger.fine(ex.getCause().toString());
+            logger.fine(ex.getStackTrace().toString());
         } finally {
             if (connection != null) {
                 connection.disconnect();
@@ -461,16 +390,14 @@ public static MakeRequestResponse makeRequest(URL url, String authType, String a
 
     private static StringBuilder readResultJson(InputStream in) {
         StringBuilder sb = null;
-        try {
-
-            BufferedReader br = new BufferedReader(new InputStreamReader(in));
+        try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) {
             sb = new StringBuilder();
             String line;
             while ((line = br.readLine()) != null) {
                 sb.append(line + "\n");
             }
             br.close();
-            logger.info(sb.toString());
+            logger.fine(sb.toString());
         } catch (IOException e) {
             sb = null;
             logger.severe(e.getMessage());
@@ -495,31 +422,6 @@ private static <T> T parseJson(String sb, Class<T> jsonParserClass, boolean nami
         }
     }
 
-    public String getDirectory(String datasetId) {
-        Dataset dataset = null;
-        String directory = null;
-        try {
-            dataset = datasetSvc.find(Long.parseLong(datasetId));
-            if (dataset == null) {
-                logger.severe("Dataset not found " + datasetId);
-                return null;
-            }
-            String storeId = dataset.getStorageIdentifier();
-            storeId.substring(storeId.indexOf("//") + 1);
-            directory = storeId.substring(storeId.indexOf("//") + 1);
-            logger.info(storeId);
-            logger.info(directory);
-            logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage());
-            return directory;
-
-        } catch (NumberFormatException nfe) {
-            logger.severe(nfe.getMessage());
-
-            return null;
-        }
-
-    }
-
     static class MakeRequestResponse {
         public String jsonResponse;
         public int status;
@@ -531,53 +433,26 @@ static class MakeRequestResponse {
 
     }
 
-    /*
-     * unused - may be needed for S3 case private MakeRequestResponse
-     * findDirectory(String directory, String clientToken, String globusEndpoint)
-     * throws MalformedURLException { URL url = new
-     * URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint
-     * + "/ls?path=" + directory + "/");
-     * 
-     * MakeRequestResponse result = makeRequest(url, "Bearer", clientToken, "GET",
-     * null); logger.info("find directory status:" + result.status);
-     * 
-     * return result; }
-     */
 
-    /*
-     * public boolean giveGlobusPublicPermissions(Dataset dataset) throws
-     * UnsupportedEncodingException, MalformedURLException {
-     * 
-     * GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
-     * 
-     * 
-     * MakeRequestResponse status = findDirectory(endpoint.getBasePath(),
-     * endpoint.getClientToken(), endpoint.getId());
-     * 
-     * if (status.status == 200) {
-     * 
-     * int perStatus = givePermission("all_authenticated_users", "", "r", dataset);
-     * logger.info("givePermission status " + perStatus); if (perStatus == 409) {
-     * logger.info("Permissions already exist or limit was reached"); } else if
-     * (perStatus == 400) { logger.info("No directory in Globus"); } else if
-     * (perStatus != 201 && perStatus != 200) {
-     * logger.info("Cannot give read permission"); return false; }
+    /**
+     * Cache of open download Requests This cache keeps track of the set of files
+     * selected for transfer out (download) via Globus. It is a means of
+     * transferring the list from the DatasetPage, where it is generated via user UI
+     * actions, and the Datasets/globusDownloadParameters API.
      * 
-     * } else if (status.status == 404) {
-     * logger.info("There is no globus directory"); } else {
-     * logger.severe("Cannot find directory in globus, status " + status); return
-     * false; }
+     * Nominally, the dataverse-globus app will call that API endpoint and then
+     * /requestGlobusDownload, at which point the cached info is sent to the app. If
+     * the app doesn't call within 5 minutes (the time allowed to call
+     * /globusDownloadParameters) + GLOBUS_CACHE_MAXAGE minutes (a ~longer period
+     * giving the user time to make choices in the app), the cached info is deleted.
      * 
-     * return true; }
      */
-
-    // Single cache of open rules/permission requests
     private final Cache<String, JsonObject> downloadCache = Caffeine.newBuilder()
             .expireAfterWrite(
-                    Duration.of(JvmSettings.GLOBUS_RULES_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES))
+                    Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES))
             .scheduler(Scheduler.systemScheduler()).evictionListener((downloadId, datasetId, cause) -> {
                 // Delete downloads that expire
-                logger.info("Download for " + downloadId + " expired");
+                logger.fine("Download for " + downloadId + " expired");
             })
 
             .build();
@@ -600,11 +475,18 @@ public int setPermissionForDownload(Dataset dataset, String principal) {
         return requestPermission(endpoint, dataset, permissions);
     }
 
-    // Generates the URL to launch the Globus app
+    // Generates the URL to launch the Globus app for upload
     public String getGlobusAppUrlForDataset(Dataset d) {
         return getGlobusAppUrlForDataset(d, true, null);
     }
 
+    /** Generated the App URl for upload (in) or download (out)
+     * 
+     * @param d - the dataset involved
+     * @param upload - boolean, true for upload, false for download
+     * @param dataFiles - a list of the DataFiles to be downloaded
+     * @return
+     */
     public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<DataFile> dataFiles) {
         String localeCode = session.getLocaleCode();
         ApiToken apiToken = null;
@@ -654,7 +536,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<DataFile
         appUrl = appUrl + "&callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback));
 
         String finalUrl = tokenUtil.replaceTokensWithValues(appUrl);
-        logger.info("Calling app: " + finalUrl);
+        logger.fine("Calling app: " + finalUrl);
         return finalUrl;
     }
 
@@ -676,7 +558,7 @@ public JsonObject getFilesMap(List<DataFile> dataFiles, Dataset d) {
         return filesBuilder.build();
     }
 
-    public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List<DataFile> downloadDFList) {
+    private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List<DataFile> downloadDFList) {
         return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList));
 
     }
@@ -718,7 +600,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
         GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
         GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger);
         String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw");
-        logger.info("Found rule: " + ruleId);
+        logger.fine("Found rule: " + ruleId);
         if (ruleId != null) {
             Long datasetId = rulesCache.getIfPresent(ruleId);
             if (datasetId != null) {
@@ -812,8 +694,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                     // calculateMissingMetadataFields: checksum, mimetype
                     JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger);
                     JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files");
-                    logger.info("Size: " + newfilesJsonArray.size());
-                    logger.info("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0)));
+                    logger.fine("Size: " + newfilesJsonArray.size());
+                    logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0)));
                     JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder();
 
                     for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) {
@@ -1227,198 +1109,8 @@ public String calculatemime(String fileName) throws InterruptedException {
 
         return finalType;
     }
-    /*
-     * public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user)
-     * throws MalformedURLException {
-     * 
-     * logger.info("=====Tasklist == dataset id :" + dataset.getId()); String
-     * directory = null;
-     * 
-     * try {
-     * 
-     * List<FileMetadata> fileMetadatas = new ArrayList<>();
-     * 
-     * StorageIO<Dataset> datasetSIO = DataAccess.getStorageIO(dataset);
-     * 
-     * 
-     * 
-     * DatasetVersion workingVersion = dataset.getEditVersion();
-     * 
-     * if (workingVersion.getCreateTime() != null) {
-     * workingVersion.setCreateTime(new Timestamp(new Date().getTime())); }
-     * 
-     * directory = dataset.getAuthorityForFileStorage() + "/" +
-     * dataset.getIdentifierForFileStorage();
-     * 
-     * System.out.println("======= directory ==== " + directory +
-     * " ====  datasetId :" + dataset.getId()); Map<String, Integer> checksumMapOld
-     * = new HashMap<>();
-     * 
-     * Iterator<FileMetadata> fmIt = workingVersion.getFileMetadatas().iterator();
-     * 
-     * while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); if (fm.getDataFile()
-     * != null && fm.getDataFile().getId() != null) { String chksum =
-     * fm.getDataFile().getChecksumValue(); if (chksum != null) {
-     * checksumMapOld.put(chksum, 1); } } }
-     * 
-     * List<DataFile> dFileList = new ArrayList<>(); boolean update = false; for
-     * (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) {
-     * 
-     * String s3ObjectKey = s3ObjectSummary.getKey();
-     * 
-     * 
-     * String t = s3ObjectKey.replace(directory, "");
-     * 
-     * if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String
-     * filePath = s3ObjectKey; String fileName =
-     * filePath.split("/")[filePath.split("/").length - 1]; String fullPath =
-     * datasetSIO.getStorageLocation() + "/" + fileName;
-     * 
-     * logger.info("Full path " + fullPath); StorageIO<DvObject> dataFileStorageIO =
-     * DataAccess.getDirectStorageIO(fullPath); InputStream in =
-     * dataFileStorageIO.getInputStream();
-     * 
-     * String checksumVal = FileUtil.calculateChecksum(in,
-     * DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag();
-     * logger.info("The checksum is " + checksumVal); if
-     * ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" +
-     * dataset.getId() + "======= filename ==== " + filePath +
-     * " == file already exists "); } else if (filePath.contains("cached") ||
-     * filePath.contains(".thumb")) { logger.info(filePath + " is ignored"); } else
-     * { update = true; logger.info("datasetId :" + dataset.getId() +
-     * "======= filename ==== " + filePath + " == new file   "); try {
-     * 
-     * DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE);
-     * //MIME_TYPE_GLOBUS datafile.setModificationTime(new Timestamp(new
-     * Date().getTime())); datafile.setCreateDate(new Timestamp(new
-     * Date().getTime())); datafile.setPermissionModificationTime(new Timestamp(new
-     * Date().getTime()));
-     * 
-     * FileMetadata fmd = new FileMetadata();
-     * 
-     * 
-     * fmd.setLabel(fileName); fmd.setDirectoryLabel(filePath.replace(directory,
-     * "").replace(File.separator + fileName, ""));
-     * 
-     * fmd.setDataFile(datafile);
-     * 
-     * datafile.getFileMetadatas().add(fmd);
-     * 
-     * FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile);
-     * logger.info("====  datasetId :" + dataset.getId() + "======= filename ==== "
-     * + filePath + " == added to datafile, filemetadata   ");
-     * 
-     * try { // We persist "SHA1" rather than "SHA-1".
-     * //datafile.setChecksumType(DataFile.ChecksumType.SHA1);
-     * datafile.setChecksumType(DataFile.ChecksumType.MD5);
-     * datafile.setChecksumValue(checksumVal); } catch (Exception cksumEx) {
-     * logger.info("====  datasetId :" + dataset.getId() +
-     * "======Could not calculate  checksumType signature for the new file "); }
-     * 
-     * datafile.setFilesize(totalSize);
-     * 
-     * dFileList.add(datafile);
-     * 
-     * } catch (Exception ioex) { logger.info("datasetId :" + dataset.getId() +
-     * "======Failed to process and/or save the file " + ioex.getMessage()); return
-     * false;
-     * 
-     * } } } } if (update) {
-     * 
-     * List<DataFile> filesAdded = new ArrayList<>();
-     * 
-     * if (dFileList != null && dFileList.size() > 0) {
-     * 
-     * // Dataset dataset = version.getDataset();
-     * 
-     * for (DataFile dataFile : dFileList) {
-     * 
-     * if (dataFile.getOwner() == null) { dataFile.setOwner(dataset);
-     * 
-     * workingVersion.getFileMetadatas().add(dataFile.getFileMetadata());
-     * dataFile.getFileMetadata().setDatasetVersion(workingVersion);
-     * dataset.getFiles().add(dataFile);
-     * 
-     * }
-     * 
-     * filesAdded.add(dataFile);
-     * 
-     * }
-     * 
-     * logger.info("====  datasetId :" + dataset.getId() +
-     * " ===== Done! Finished saving new files to the dataset."); }
-     * 
-     * fileMetadatas.clear(); for (DataFile addedFile : filesAdded) {
-     * fileMetadatas.add(addedFile.getFileMetadata()); } filesAdded = null;
-     * 
-     * if (workingVersion.isDraft()) {
-     * 
-     * logger.info("Async: ====  datasetId :" + dataset.getId() +
-     * " ==== inside draft version ");
-     * 
-     * Timestamp updateTime = new Timestamp(new Date().getTime());
-     * 
-     * workingVersion.setLastUpdateTime(updateTime);
-     * dataset.setModificationTime(updateTime);
-     * 
-     * 
-     * for (FileMetadata fileMetadata : fileMetadatas) {
-     * 
-     * if (fileMetadata.getDataFile().getCreateDate() == null) {
-     * fileMetadata.getDataFile().setCreateDate(updateTime);
-     * fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); }
-     * fileMetadata.getDataFile().setModificationTime(updateTime); }
-     * 
-     * 
-     * } else { logger.info("datasetId :" + dataset.getId() +
-     * " ==== inside released version ");
-     * 
-     * for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { for
-     * (FileMetadata fileMetadata : fileMetadatas) { if
-     * (fileMetadata.getDataFile().getStorageIdentifier() != null) {
-     * 
-     * if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion.
-     * getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) {
-     * workingVersion.getFileMetadatas().set(i, fileMetadata); } } } }
-     * 
-     * 
-     * }
-     * 
-     * 
-     * try { Command<Dataset> cmd; logger.info("Async: ====  datasetId :" +
-     * dataset.getId() +
-     * " ======= UpdateDatasetVersionCommand START in globus function "); cmd = new
-     * UpdateDatasetVersionCommand(dataset, new DataverseRequest(user,
-     * (HttpServletRequest) null)); ((UpdateDatasetVersionCommand)
-     * cmd).setValidateLenient(true); //new DataverseRequest(authenticatedUser,
-     * (HttpServletRequest) null) //dvRequestService.getDataverseRequest()
-     * commandEngine.submit(cmd); } catch (CommandException ex) {
-     * logger.log(Level.WARNING, "====  datasetId :" + dataset.getId() +
-     * "======CommandException updating DatasetVersion from batch job: " +
-     * ex.getMessage()); return false; }
-     * 
-     * logger.info("====  datasetId :" + dataset.getId() +
-     * " ======= GLOBUS  CALL COMPLETED SUCCESSFULLY ");
-     * 
-     * //return true; }
-     * 
-     * } catch (Exception e) { String message = e.getMessage();
-     * 
-     * logger.info("====  datasetId :" + dataset.getId() +
-     * " ======= GLOBUS  CALL Exception ============== " + message);
-     * e.printStackTrace(); return false; //return
-     * error(Response.Status.INTERNAL_SERVER_ERROR,
-     * "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '"
-     * + message + "'."); }
-     * 
-     * String globusBasicToken =
-     * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "");
-     * AccessToken clientTokenUser = getClientToken(globusBasicToken);
-     * updatePermision(clientTokenUser, directory, "identity", "r"); return true; }
-     * 
-     */
 
-    GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
+    private GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
         Dataset dataset = null;
         if (dvObject instanceof Dataset) {
             dataset = (Dataset) dvObject;
@@ -1435,8 +1127,6 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
         if (GlobusAccessibleStore.isDataverseManaged(driverId) && (dataset != null)) {
             directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/"
                     + dataset.getIdentifierForFileStorage();
-            logger.info("directoryPath now: " + directoryPath);
-
         } else {
             // remote store - may have path in file storageidentifier
             String relPath = dvObject.getStorageIdentifier()
@@ -1446,17 +1136,16 @@ GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
                 directoryPath = directoryPath + relPath.substring(0, filenameStart);
             }
         }
-        logger.info("directoryPath finally: " + directoryPath);
+        logger.fine("directoryPath finally: " + directoryPath);
 
         String endpointId = GlobusAccessibleStore.getTransferEndpointId(driverId);
 
-        logger.info("endpointId: " + endpointId);
+        logger.fine("endpointId: " + endpointId);
 
         String globusToken = GlobusAccessibleStore.getGlobusToken(driverId);
 
         AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
         String clientToken = accessToken.getOtherTokens().get(0).getAccessToken();
-        logger.info("clientToken: " + clientToken);
         endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath);
 
         return endpoint;
@@ -1484,7 +1173,7 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse,
         
         DataFile df = guestbookResponse.getDataFile();
         if (df != null) {
-            logger.info("Single datafile case for writeGuestbookAndStartTransfer");
+            logger.fine("Single datafile case for writeGuestbookAndStartTransfer");
             List<DataFile> downloadDFList = new ArrayList<DataFile>(1);
             downloadDFList.add(df);
             if (!doNotSaveGuestbookResponse) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index c9038047611..96a56d09c0b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -50,7 +50,7 @@ public enum JvmSettings {
     UPLOADS_DIRECTORY(SCOPE_FILES, "uploads"),
     DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"),
     GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"),
-    GLOBUS_RULES_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"),
+    GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"),
     FILES(SCOPE_FILES),
     BASE_URL(FILES, "base-url"),
     GLOBUS_TOKEN(FILES, "globus-token"),
diff --git a/src/main/webapp/globus.xhtml b/src/main/webapp/globus.xhtml
deleted file mode 100644
index f4eebd4babf..00000000000
--- a/src/main/webapp/globus.xhtml
+++ /dev/null
@@ -1,30 +0,0 @@
-<!DOCTYPE html>
-<html xmlns="http://www.w3.org/1999/xhtml"
-      xmlns:h="http://java.sun.com/jsf/html"
-      xmlns:f="http://java.sun.com/jsf/core"
-      xmlns:ui="http://java.sun.com/jsf/facelets"
-      xmlns:p="http://primefaces.org/ui"
-      xmlns:c="http://xmlns.jcp.org/jsp/jstl/core"
-      xmlns:jsf="http://xmlns.jcp.org/jsf">
-<h:head>
-</h:head>
-<h:body>
-    <ui:composition template="/dataverse_template.xhtml">
-        <ui:define name="body">
-            <script>
-                //<![CDATA[
-                var urlParameters = window.location.search
-                console.log(urlParameters);
-                //]]>
-            </script>
-            <f:metadata>
-                <f:viewParam name="code" value="#{GlobusServiceBean.code}" />
-                <f:viewParam name="state" value="#{GlobusServiceBean.state}" />
-                <f:viewAction action="#{GlobusServiceBean.onLoad}" />
-            </f:metadata>
-
-        </ui:define>
-    </ui:composition>
-
-</h:body>
-</html>

From caa6e684390bb4c36dff45f1de94837f8b632f57 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 4 Dec 2023 18:29:22 -0500
Subject: [PATCH 318/546] revert unrelated changes, old settings

---
 .../harvest/server/web/servlet/OAIServlet.java    | 15 ++++++++++-----
 .../iq/dataverse/settings/JvmSettings.java        |  5 +----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
index 19901cae796..96a19acc0e8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
@@ -73,13 +73,18 @@ public class OAIServlet extends HttpServlet {
     
     @EJB
     SystemConfig systemConfig;
+
+    @Inject
+    @ConfigProperty(name = "dataverse.oai.server.maxidentifiers", defaultValue="100")
+    private Integer maxListIdentifiers;
     
-    //Todo - revert this change - added to get past some local compile issues
-    private Integer maxListIdentifiers=100;
-    
-    private Integer maxListSets=100;
+    @Inject
+    @ConfigProperty(name = "dataverse.oai.server.maxsets", defaultValue="100")
+    private Integer maxListSets;
     
-    private Integer maxListRecords=10;
+    @Inject
+    @ConfigProperty(name = "dataverse.oai.server.maxrecords", defaultValue="10")
+    private Integer maxListRecords;
     
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.web.servlet.OAIServlet");
     // If we are going to stick with this solution - of providing a minimalist 
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
index 96a56d09c0b..fb85ae9adab 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java
@@ -50,10 +50,7 @@ public enum JvmSettings {
     UPLOADS_DIRECTORY(SCOPE_FILES, "uploads"),
     DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"),
     GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"),
-    GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-rules-cache-maxage"),
-    FILES(SCOPE_FILES),
-    BASE_URL(FILES, "base-url"),
-    GLOBUS_TOKEN(FILES, "globus-token"),
+    GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-cache-maxage"),
     
     // SOLR INDEX SETTINGS
     SCOPE_SOLR(PREFIX, "solr"),

From 3babc5aac25710dcc92a90ae861a7b21eef43742 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 4 Dec 2023 20:35:56 -0500
Subject: [PATCH 319/546] moving the StorageUse member to DvObjectContainer
 from DvObject; moving the em.merge()/em.persist() to the djb. #8549

---
 .../java/edu/harvard/iq/dataverse/DataFile.java | 17 -----------------
 .../iq/dataverse/DataverseServiceBean.java      | 17 ++++++++++++++++-
 .../java/edu/harvard/iq/dataverse/DvObject.java | 14 --------------
 .../harvard/iq/dataverse/DvObjectContainer.java | 14 ++++++++++++--
 .../command/impl/SetCollectionQuotaCommand.java | 15 +--------------
 .../storageuse/StorageUseServiceBean.java       |  1 -
 6 files changed, 29 insertions(+), 49 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
index 2770118d41b..3d8086b142b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
@@ -641,23 +641,6 @@ public String getFriendlySize() {
         }
     }
     
-    /**
-     * Experimental - record the pre-calculated "storage size" of the file, and 
-     * all its associated auxiliary file objects:
-
-    @Column(nullable = true)
-    private Long storageSize;
-    
-
-    public Long getStorageSize() {
-        return storageSize; 
-    }
-    
-    public void setStorageSize(Long storageSize) {
-        this.storageSize = storageSize; 
-    }
-    * */
-
     public boolean isRestricted() {
         return restricted;
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 549b8310122..487215c7a65 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -18,6 +18,7 @@
 import edu.harvard.iq.dataverse.search.IndexServiceBean;
 import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
 import edu.harvard.iq.dataverse.search.SolrSearchResult;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
 import edu.harvard.iq.dataverse.util.StringUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import java.io.File;
@@ -919,5 +920,19 @@ public List<Object[]> getDatasetTitlesWithinDataverse(Long dataverseId) {
         return em.createNativeQuery(cqString).getResultList();
     }
 
-    
+    public void saveStorageQuota(Dataverse target, Long allocation) {
+        StorageQuota storageQuota = target.getStorageQuota();
+        
+        if (storageQuota != null) {
+            storageQuota.setAllocation(allocation);
+            em.merge(storageQuota);
+        } else {
+            storageQuota = new StorageQuota(); 
+            storageQuota.setDefinitionPoint(target);
+            storageQuota.setAllocation(allocation);
+            target.setStorageQuota(storageQuota);
+            em.persist(storageQuota);
+        }
+        em.flush();
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
index 515d9f9f153..df249e04663 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -3,7 +3,6 @@
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
 import edu.harvard.iq.dataverse.pidproviders.PidUtil;
 import edu.harvard.iq.dataverse.storageuse.StorageQuota;
-import edu.harvard.iq.dataverse.storageuse.StorageUse;
 
 import java.sql.Timestamp;
 import java.text.SimpleDateFormat;
@@ -182,10 +181,6 @@ public void setAlternativePersistentIndentifiers(Set<AlternativePersistentIdenti
     @OneToOne(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true)
     private StorageQuota storageQuota;
     
-    @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE, CascadeType.PERSIST}, orphanRemoval=true)
-    private StorageUse storageUse;
-    
-    
     public boolean isPreviewImageAvailable() {
         return previewImageAvailable;
     }
@@ -485,13 +480,4 @@ public void setStorageQuota(StorageQuota storageQuota) {
     @OneToMany(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true)
     List<RoleAssignment> roleAssignments;
     
-    /**
-     * Should only be used in constructors for DvObjectContainers (Datasets and 
-     * Collections), to make sure new entries are created and persisted in the 
-     * database StorageUse table for every DvObject container we create.
-     * @param storageUse 
-     */
-    public void setStorageUse(StorageUse storageUse) {
-        this.storageUse = storageUse;
-    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
index 2f391e394fa..82057315fbb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
@@ -2,11 +2,9 @@
 
 import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
-import edu.harvard.iq.dataverse.storageuse.StorageQuota;
 import edu.harvard.iq.dataverse.storageuse.StorageUse;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import jakarta.persistence.CascadeType;
-import java.util.Locale;
 import java.util.Optional;
 
 import jakarta.persistence.MappedSuperclass;
@@ -45,6 +43,9 @@ public boolean isEffectivelyPermissionRoot() {
     
     private Boolean guestbookAtRequest = null;
    
+    @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE, CascadeType.PERSIST}, orphanRemoval=true)
+    private StorageUse storageUse;
+    
     public String getEffectiveStorageDriverId() {
         String id = storageDriver;
         if (StringUtils.isBlank(id)) {
@@ -165,4 +166,13 @@ public void setCurationLabelSetName(String setName) {
         this.externalLabelSetName = setName;
     }
     
+    /**
+     * Should only be used in constructors for DvObjectContainers (Datasets and 
+     * Collections), to make sure new entries are created and persisted in the 
+     * database StorageUse table for every DvObject container we create.
+     * @param storageUse 
+     */
+    public void setStorageUse(StorageUse storageUse) {
+        this.storageUse = storageUse;
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
index cf8fb6fd42e..e52c47a5e7d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
@@ -9,7 +9,6 @@
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
 import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
-import edu.harvard.iq.dataverse.storageuse.StorageQuota;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import java.util.logging.Logger;
 
@@ -49,18 +48,6 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
             throw new IllegalCommandException("Must specify valid allocation in bytes", this);
         }
         
-        StorageQuota storageQuota = dataverse.getStorageQuota();
-        
-        if (storageQuota != null) {
-            storageQuota.setAllocation(allocation);
-            ctxt.em().merge(storageQuota);
-        } else {
-            storageQuota = new StorageQuota(); 
-            storageQuota.setDefinitionPoint(dataverse);
-            storageQuota.setAllocation(allocation);
-            dataverse.setStorageQuota(storageQuota);
-            ctxt.em().persist(storageQuota);
-        }
-        ctxt.em().flush();
+        ctxt.dataverses().saveStorageQuota(dataverse, allocation);
     }    
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
index 18e4ef49640..fbaaff22dee 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
@@ -1,6 +1,5 @@
 package edu.harvard.iq.dataverse.storageuse;
 
-import edu.harvard.iq.dataverse.DvObjectContainer;
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import jakarta.ejb.Stateless;
 import jakarta.ejb.TransactionAttribute;

From dfa2dc3853254bc8c58bedbfd288a63bcfa07b32 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 5 Dec 2023 04:38:46 -0500
Subject: [PATCH 320/546] remove adaptation for quotas PR that was itself
 changed

---
 .../impl/CreateNewDataFilesCommand.java       | 24 ++-----------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
index 269ba47643b..0470f59b861 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
@@ -3,20 +3,18 @@
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.DatasetVersion;
 import edu.harvard.iq.dataverse.authorization.Permission;
-import edu.harvard.iq.dataverse.dataaccess.DataAccess;
-import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException;
 import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
 import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable;
 import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
 import edu.harvard.iq.dataverse.engine.command.CommandContext;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
 import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException;
 import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper;
 import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
 import edu.harvard.iq.dataverse.Dataverse;
-import edu.harvard.iq.dataverse.DvObject;
 import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.FileUtil;
@@ -85,7 +83,7 @@ public class CreateNewDataFilesCommand extends AbstractCommand<CreateDataFileRes
     private final String newStorageIdentifier; 
     private final String newCheckSum; 
     private DataFile.ChecksumType newCheckSumType;
-    private Long newFileSize;
+    private final Long newFileSize;
 
     public CreateNewDataFilesCommand(DataverseRequest aRequest, DatasetVersion version, InputStream inputStream, String fileName, String suppliedContentType, String newStorageIdentifier, UserStorageQuota quota, String newCheckSum) {
         this(aRequest, version, inputStream, fileName, suppliedContentType, newStorageIdentifier, quota, newCheckSum, null);
@@ -641,24 +639,6 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
                 }
                 logger.fine("Supplied type: " + suppliedContentType + ", finalType: " + finalType);
             }
-            if (newFileSize == null) {
-                // For direct/out-of-band upload, get the size from the underlying service
-                StorageIO<DvObject> sio;
-                try {
-                    sio = DataAccess.getDirectStorageIO(DataAccess.getLocationFromStorageId(newStorageIdentifier, version.getDataset()));
-
-                    // get file size
-                    // Note - some stores (e.g. AWS S3) only offer eventual consistency and a call
-                    // to get the size immediately after uploading may fail. As of the addition of
-                    // PR#9409 adding storage quotas, we are now requiring size to be available
-                    // earlier. If this is seen, adding
-                    // a delay/retry may help
-                    newFileSize = sio.retrieveSizeFromMedia();
-                } catch (IOException e) {
-                    // If we don't get a file size, a CommandExecutionException will be thrown later in the code
-                    e.printStackTrace();
-                }
-            }
         }
         // Finally, if none of the special cases above were applicable (or 
         // if we were unable to unpack an uploaded file, etc.), we'll just 

From c78613e60ca7a2442753d6382b0ace3c7fd07316 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 5 Dec 2023 08:42:23 -0500
Subject: [PATCH 321/546] one more refinement for the flyway script. #8549

---
 .../storageuse/StorageUseServiceBean.java     | 33 ++++++++++---------
 .../V6.0.0.5__8549-collection-quotas.sql      | 13 ++++++++
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
index fbaaff22dee..7aea7a7b596 100644
--- a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java
@@ -46,23 +46,24 @@ public Long findStorageSizeByDvContainerId(Long dvObjectId) {
      */
     @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
     public void incrementStorageSizeRecursively(Long dvObjectContainerId, Long increment) {
-        //@todo should throw exceptions if either parameter is null
-        Optional<Boolean> allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class);
-        if (!(allow.isPresent() && allow.get())) {
-            String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n"
-                    + "("
-                    + "    SELECT id, owner_id\n"
-                    + "    FROM dvobject\n"
-                    + "    WHERE id=" + dvObjectContainerId + "\n"
-                    + "    UNION ALL\n"
-                    + "    SELECT dvobject.id, dvobject.owner_id\n"
-                    + "    FROM dvobject\n"
-                    + "    JOIN uptree ON dvobject.id = uptree.owner_id)\n"
-                    + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n"
-                    + "FROM uptree\n"
-                    + "WHERE dvobjectcontainer_id = uptree.id;";
+        if (dvObjectContainerId != null && increment != null) {
+            Optional<Boolean> allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class);
+            if (!(allow.isPresent() && allow.get())) {
+                String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n"
+                        + "("
+                        + "    SELECT id, owner_id\n"
+                        + "    FROM dvobject\n"
+                        + "    WHERE id=" + dvObjectContainerId + "\n"
+                        + "    UNION ALL\n"
+                        + "    SELECT dvobject.id, dvobject.owner_id\n"
+                        + "    FROM dvobject\n"
+                        + "    JOIN uptree ON dvobject.id = uptree.owner_id)\n"
+                        + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n"
+                        + "FROM uptree\n"
+                        + "WHERE dvobjectcontainer_id = uptree.id;";
 
-            int parentsUpdated = em.createNativeQuery(queryString).executeUpdate();
+                int parentsUpdated = em.createNativeQuery(queryString).executeUpdate();
+            }
         }
         // @todo throw an exception if the number of parent dvobjects updated by
         // the query is < 2 - ? 
diff --git a/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql
index 3657642c267..d6c067056ec 100644
--- a/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql
+++ b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql
@@ -38,6 +38,19 @@ AND fileobject.id = file.id
 AND dt.datafile_id = file.id
 GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null;
 
+-- there may also be some auxiliary files registered in the database, such as
+-- the content generated and deposited by external tools - diff. privacy stats
+-- being one of the example. These are also considered the "payload" files that
+-- we want to count for the purposes of calculating storage use.
+UPDATE dvobject SET tempStorageSize=tempStorageSize+o.combinedStorageSize
+FROM (SELECT datasetobject.id, COALESCE(SUM(aux.fileSize),0) AS combinedStorageSize
+FROM dvobject fileobject, dvobject datasetobject, datafile file, auxiliaryFile aux
+WHERE fileobject.owner_id = datasetobject.id
+AND fileobject.id = file.id
+AND aux.datafile_id = file.id
+GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null;
+
+
 -- ... and then we can repeat the same for collections, by setting the storage size
 -- to the sum of the storage sizes of the datasets *directly* in each collection:
 -- (no attemp is made yet to recursively count the sizes all the chilld sub-collections)

From 0c02b15aab711acbfb7f2c957c4482313b3997b9 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Tue, 5 Dec 2023 09:50:33 -0500
Subject: [PATCH 322/546] try QDR /logo endpoint

---
 .../edu/harvard/iq/dataverse/api/Datasets.java   | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index af6059cf882..828ba218cc4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -1971,6 +1971,22 @@ public Response getDatasetThumbnail(@PathParam("id") String idSupplied) {
         }
     }
 
+    @GET
+    @Produces({ "image/png" })
+    @Path("{id}/logo")
+    public Response getDatasetLogo(@PathParam("id") String idSupplied) {
+        try {
+            Dataset dataset = findDatasetOrDie(idSupplied);
+            InputStream is = DatasetUtil.getLogoAsInputStream(dataset);
+            if (is == null) {
+                return notFound("Logo not available");
+            }
+            return Response.ok(is).build();
+        } catch (WrappedResponse wr) {
+            return notFound("Logo not available");
+        }
+    }
+
     // TODO: Rather than only supporting looking up files by their database IDs (dataFileIdSupplied), consider supporting persistent identifiers.
     @POST
     @AuthRequired

From 8c9f1242d53aea5ecc906bd4a2a3f5d12a884224 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 5 Dec 2023 10:13:53 -0500
Subject: [PATCH 323/546] switch minio to creds jenkins expects #6783

---
 docker-compose-dev.yml                                   | 9 ++++-----
 .../java/edu/harvard/iq/dataverse/api/S3AccessIT.java    | 4 ++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 6bc50f7e764..98376e255dd 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -41,8 +41,8 @@ services:
         -Ddataverse.files.minio1.path-style-access=true
         -Ddataverse.files.minio1.upload-redirect=false
         -Ddataverse.files.minio1.download-redirect=false
-        -Ddataverse.files.minio1.access-key=minioadmin
-        -Ddataverse.files.minio1.secret-key=minioadmin
+        -Ddataverse.files.minio1.access-key=4cc355_k3y
+        -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k35
     ports:
       - "8080:8080" # HTTP (Dataverse Application)
       - "4848:4848" # HTTP (Payara Admin Console)
@@ -211,9 +211,8 @@ services:
     volumes:
       - minio_storage:/data
     environment:
-      # these are the defaults but are here for clarity
-      MINIO_ROOT_USER: minioadmin
-      MINIO_ROOT_PASSWORD: minioadmin
+      MINIO_ROOT_USER: 4cc355_k3y
+      MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k35
     command: server /data
 
 networks:
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
index f5e4ce6a794..daf04bb3d14 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
@@ -54,8 +54,8 @@ public static void setUp() {
                 .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyLocalStack, secretKeyLocalStack)))
                 .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build();
 
-        String accessKeyMinio = "minioadmin";
-        String secretKeyMinio = "minioadmin";
+        String accessKeyMinio = "4cc355_k3y";
+        String secretKeyMinio = "s3cr3t_4cc355_k35";
         s3minio = AmazonS3ClientBuilder.standard()
                 // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local
                 .withPathStyleAccessEnabled(Boolean.TRUE)

From 6a7d8d1c6f76c8e54f9759f643204aa339c5bdd0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 5 Dec 2023 10:33:19 -0500
Subject: [PATCH 324/546] make assertions about users #6783

---
 .../java/edu/harvard/iq/dataverse/api/S3AccessIT.java     | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
index daf04bb3d14..7c1531cbfaf 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
@@ -99,9 +99,10 @@ public void testNonDirectUpload() {
         String driverLabel = "MinIO";
 
         Response createSuperuser = UtilIT.createRandomUser();
+        createSuperuser.then().assertThat().statusCode(200);
         String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
         String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
-        UtilIT.makeSuperUser(superusername);
+        UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200);
         Response storageDrivers = listStorageDrivers(superuserApiToken);
         storageDrivers.prettyPrint();
         // TODO where is "Local/local" coming from?
@@ -118,6 +119,7 @@ public void testNonDirectUpload() {
 
         //create user who will make a dataverse/dataset
         Response createUser = UtilIT.createRandomUser();
+        createUser.then().assertThat().statusCode(200);
         String username = UtilIT.getUsernameFromResponse(createUser);
         String apiToken = UtilIT.getApiTokenFromResponse(createUser);
 
@@ -208,9 +210,10 @@ public void testDirectUpload() {
         String driverId = "localstack1";
         String driverLabel = "LocalStack";
         Response createSuperuser = UtilIT.createRandomUser();
+        createSuperuser.then().assertThat().statusCode(200);
         String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
         String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
-        UtilIT.makeSuperUser(superusername);
+        UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200);
         Response storageDrivers = listStorageDrivers(superuserApiToken);
         storageDrivers.prettyPrint();
         // TODO where is "Local/local" coming from?
@@ -227,6 +230,7 @@ public void testDirectUpload() {
 
         //create user who will make a dataverse/dataset
         Response createUser = UtilIT.createRandomUser();
+        createUser.then().assertThat().statusCode(200);
         String username = UtilIT.getUsernameFromResponse(createUser);
         String apiToken = UtilIT.getApiTokenFromResponse(createUser);
 

From b9f48913e498ec96ef8f5994c21e7bb549e747e0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 5 Dec 2023 10:41:45 -0500
Subject: [PATCH 325/546] move methods to UtilIT #6783

---
 .../harvard/iq/dataverse/api/S3AccessIT.java  | 75 +++----------------
 .../edu/harvard/iq/dataverse/api/UtilIT.java  | 50 +++++++++++++
 2 files changed, 62 insertions(+), 63 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
index 7c1531cbfaf..1306c30d9c1 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
@@ -103,7 +103,7 @@ public void testNonDirectUpload() {
         String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
         String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
         UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200);
-        Response storageDrivers = listStorageDrivers(superuserApiToken);
+        Response storageDrivers = UtilIT.listStorageDrivers(superuserApiToken);
         storageDrivers.prettyPrint();
         // TODO where is "Local/local" coming from?
         String drivers = """
@@ -127,18 +127,18 @@ public void testNonDirectUpload() {
         createDataverseResponse.prettyPrint();
         String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
 
-        Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
+        Response originalStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken);
         originalStorageDriver.prettyPrint();
         originalStorageDriver.then().assertThat()
                 .body("data.message", equalTo("undefined"))
                 .statusCode(200);
 
-        Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken);
+        Response setStorageDriverToS3 = UtilIT.setStorageDriver(dataverseAlias, driverLabel, superuserApiToken);
         setStorageDriverToS3.prettyPrint();
         setStorageDriverToS3.then().assertThat()
                 .statusCode(200);
 
-        Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
+        Response updatedStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken);
         updatedStorageDriver.prettyPrint();
         updatedStorageDriver.then().assertThat()
                 .statusCode(200);
@@ -214,7 +214,7 @@ public void testDirectUpload() {
         String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
         String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
         UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200);
-        Response storageDrivers = listStorageDrivers(superuserApiToken);
+        Response storageDrivers = UtilIT.listStorageDrivers(superuserApiToken);
         storageDrivers.prettyPrint();
         // TODO where is "Local/local" coming from?
         String drivers = """
@@ -238,18 +238,18 @@ public void testDirectUpload() {
         createDataverseResponse.prettyPrint();
         String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
 
-        Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
+        Response originalStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken);
         originalStorageDriver.prettyPrint();
         originalStorageDriver.then().assertThat()
                 .body("data.message", equalTo("undefined"))
                 .statusCode(200);
 
-        Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken);
+        Response setStorageDriverToS3 = UtilIT.setStorageDriver(dataverseAlias, driverLabel, superuserApiToken);
         setStorageDriverToS3.prettyPrint();
         setStorageDriverToS3.then().assertThat()
                 .statusCode(200);
 
-        Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
+        Response updatedStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken);
         updatedStorageDriver.prettyPrint();
         updatedStorageDriver.then().assertThat()
                 .statusCode(200);
@@ -275,7 +275,7 @@ public void testDirectUpload() {
 //
 //        String fileId = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.id");
         long size = 1000000000l;
-        Response getUploadUrls = getUploadUrls(datasetPid, size, apiToken);
+        Response getUploadUrls = UtilIT.getUploadUrls(datasetPid, size, apiToken);
         getUploadUrls.prettyPrint();
         getUploadUrls.then().assertThat().statusCode(200);
 
@@ -298,7 +298,7 @@ public void testDirectUpload() {
         String contentsOfFile = "foobar";
 
         InputStream inputStream = new ByteArrayInputStream(contentsOfFile.getBytes(StandardCharsets.UTF_8));
-        Response uploadFileDirect = uploadFileDirect(localhostUrl, inputStream);
+        Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream);
         uploadFileDirect.prettyPrint();
         /*
         Direct upload to MinIO is failing with errors like this:
@@ -357,7 +357,7 @@ public void testDirectUpload() {
         assertEquals(contentsOfFile, s3Object);
 
         System.out.println("direct download...");
-        Response getHeaders = downloadFileNoRedirect(Integer.valueOf(fileId), apiToken);
+        Response getHeaders = UtilIT.downloadFileNoRedirect(Integer.valueOf(fileId), apiToken);
         for (Header header : getHeaders.getHeaders()) {
             System.out.println("direct download header: " + header);
         }
@@ -371,7 +371,7 @@ public void testDirectUpload() {
         } catch (UnsupportedEncodingException ex) {
         }
 
-        Response downloadFile = downloadFromUrl(decodedDownloadUrl);
+        Response downloadFile = UtilIT.downloadFromUrl(decodedDownloadUrl);
         downloadFile.prettyPrint();
         downloadFile.then().assertThat().statusCode(200);
 
@@ -394,55 +394,4 @@ public void testDirectUpload() {
 
     }
 
-    //TODO: move these into UtilIT. They are here for now to avoid merge conflicts
-    static Response listStorageDrivers(String apiToken) {
-        return given()
-                .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
-                .get("/api/admin/dataverse/storageDrivers");
-    }
-
-    static Response getStorageDriver(String dvAlias, String apiToken) {
-        return given()
-                .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
-                .get("/api/admin/dataverse/" + dvAlias + "/storageDriver");
-    }
-
-    static Response setStorageDriver(String dvAlias, String label, String apiToken) {
-        return given()
-                .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
-                .body(label)
-                .put("/api/admin/dataverse/" + dvAlias + "/storageDriver");
-    }
-
-    static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) {
-        String idInPath = idOrPersistentIdOfDataset; // Assume it's a number.
-        String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
-        if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) {
-            idInPath = ":persistentId";
-            optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset;
-        }
-        RequestSpecification requestSpecification = given();
-        if (apiToken != null) {
-            requestSpecification = given()
-                    .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken);
-        }
-        return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam);
-    }
-
-    static Response uploadFileDirect(String url, InputStream inputStream) {
-        return given()
-                .header("x-amz-tagging", "dv-state=temp")
-                .body(inputStream)
-                .put(url);
-    }
-
-    static Response downloadFileNoRedirect(Integer fileId, String apiToken) {
-        return given().when().redirects().follow(false)
-                .get("/api/access/datafile/" + fileId + "?key=" + apiToken);
-    }
-
-    static Response downloadFromUrl(String url) {
-        return given().get(url);
-    }
-
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 9b264086c27..12bb069424f 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -2361,6 +2361,56 @@ static Response deleteStorageSite(long storageSiteId) {
                 .delete("/api/admin/storageSites/" + storageSiteId);
     }
 
+    static Response listStorageDrivers(String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .get("/api/admin/dataverse/storageDrivers");
+    }
+
+    static Response getStorageDriver(String dvAlias, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .get("/api/admin/dataverse/" + dvAlias + "/storageDriver");
+    }
+
+    static Response setStorageDriver(String dvAlias, String label, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .body(label)
+                .put("/api/admin/dataverse/" + dvAlias + "/storageDriver");
+    }
+
+    static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) {
+        String idInPath = idOrPersistentIdOfDataset; // Assume it's a number.
+        String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
+        if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) {
+            idInPath = ":persistentId";
+            optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset;
+        }
+        RequestSpecification requestSpecification = given();
+        if (apiToken != null) {
+            requestSpecification = given()
+                    .header(API_TOKEN_HTTP_HEADER, apiToken);
+        }
+        return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam);
+    }
+
+    static Response uploadFileDirect(String url, InputStream inputStream) {
+        return given()
+                .header("x-amz-tagging", "dv-state=temp")
+                .body(inputStream)
+                .put(url);
+    }
+
+    static Response downloadFileNoRedirect(Integer fileId, String apiToken) {
+        return given().when().redirects().follow(false)
+                .get("/api/access/datafile/" + fileId + "?key=" + apiToken);
+    }
+
+    static Response downloadFromUrl(String url) {
+        return given().get(url);
+    }
+
     static Response metricsDataversesToMonth(String yyyymm, String queryParams) {
         String optionalYyyyMm = "";
         if (yyyymm != null) {

From 7349ed9f754e05ff7b16a24ea8f3c24c060ed593 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 5 Dec 2023 10:43:38 -0500
Subject: [PATCH 326/546] get logo, picking 48px size for datafile thumbs

FWIW: QDR generates a 400px version here and then uses styling
 to fit the page. Not sure what the motivation for that was without
 digging.
---
 .../iq/dataverse/dataset/DatasetUtil.java     | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
index 096f1f87acc..ccf861ebdc8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
@@ -411,6 +411,69 @@ public static InputStream getThumbnailAsInputStream(Dataset dataset, int size) {
             return nonDefaultDatasetThumbnail;
         }
     }
+    
+    public static InputStream getLogoAsInputStream(Dataset dataset) {
+        if (dataset == null) {
+            return null;
+        }
+        StorageIO<Dataset> dataAccess = null;
+
+        try {
+            dataAccess = DataAccess.getStorageIO(dataset);
+        } catch (IOException ioex) {
+            logger.warning("getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier()
+                    + " (" + ioex.getMessage() + ")");
+        }
+
+        InputStream in = null;
+        try {
+            if (dataAccess == null) {
+                logger.warning(
+                        "getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier());
+            } else {
+                in = dataAccess.getAuxFileAsInputStream(datasetLogoFilenameFinal);
+            }
+        } catch (IOException ex) {
+            logger.fine(
+                    "Dataset-level thumbnail file does not exist, or failed to open; will try to find an image file that can be used as the thumbnail.");
+        }
+
+        if (in == null) {
+            DataFile thumbnailFile = dataset.getThumbnailFile();
+
+            if (thumbnailFile == null) {
+                if (dataset.isUseGenericThumbnail()) {
+                    logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo and is 'Use Generic'.");
+                    return null;
+                } else {
+                    thumbnailFile = attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null);
+                    if (thumbnailFile == null) {
+                        logger.fine("Dataset (id :" + dataset.getId()
+                                + ") does not have a logo available that could be selected automatically.");
+                        return null;
+                    } else {
+
+                    }
+                }
+            }
+            if (thumbnailFile.isRestricted()) {
+                logger.fine("Dataset (id :" + dataset.getId()
+                        + ") has a logo the user selected but the file must have later been restricted. Returning null.");
+                return null;
+            }
+
+            try {
+                in = ImageThumbConverter.getImageThumbnailAsInputStream(thumbnailFile.getStorageIO(),
+                        ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE).getInputStream();
+            } catch (IOException ioex) {
+                logger.warning("getLogo(): Failed to get logo from DataFile for " + dataset.getStorageIdentifier()
+                        + " (" + ioex.getMessage() + ")");
+                ioex.printStackTrace();
+            }
+
+        }
+        return in;
+    }
 
     /**
      * The dataset logo is the file that a user uploads which is *not* one of

From 6f1cd087624fea70a1c37425aacaf05c9d7ba0bf Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Tue, 5 Dec 2023 15:53:21 +0000
Subject: [PATCH 327/546] Added: checks before calling getFileMetadatas on
 canDownloadAtLeastOneFile method in PermissionServiceBean

---
 .../iq/dataverse/PermissionServiceBean.java   | 51 ++++++++++++++++++-
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index 9e6628617ce..2e4627576c6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -41,6 +41,9 @@
 import java.util.stream.Collectors;
 import static java.util.stream.Collectors.toList;
 import jakarta.persistence.Query;
+import jakarta.persistence.criteria.CriteriaBuilder;
+import jakarta.persistence.criteria.CriteriaQuery;
+import jakarta.persistence.criteria.Root;
 
 /**
  * Your one-stop-shop for deciding which user can do what action on which
@@ -837,12 +840,56 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio
         return false;
     }
 
-    public boolean canDownloadAtLeastOneFile(User requestUser, DatasetVersion datasetVersion) {
+    /**
+     * Checks if a User can download at least one file of the target DatasetVersion.
+     *
+     * @param user User to check
+     * @param datasetVersion DatasetVersion to check
+     * @return boolean indicating whether the user can download at least one file or not
+     */
+    public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersion) {
+        if (user.isSuperuser()) {
+            return true;
+        }
+        if (hasReleasedFiles(datasetVersion)) {
+            return true;
+        }
         for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) {
-            if (userOn(requestUser, fileMetadata.getDataFile()).has(Permission.DownloadFile)) {
+            if (userOn(user, fileMetadata.getDataFile()).has(Permission.DownloadFile)) {
                 return true;
             }
         }
         return false;
     }
+
+    /**
+     * Checks if a DatasetVersion has released files.
+     *
+     * This method is mostly based on {@link #isPublicallyDownloadable(DvObject)} although in this case, instead of basing
+     * the search on a particular file, it searches for the total number of files in the target version that are present
+     * in the released version.
+     *
+     * @param targetDatasetVersion DatasetVersion to check
+     * @return boolean indicating whether the dataset version has released files or not
+     */
+    private boolean hasReleasedFiles(DatasetVersion targetDatasetVersion) {
+        Dataset targetDataset = targetDatasetVersion.getDataset();
+        if (!targetDataset.isReleased()) {
+            return false;
+        }
+        CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder();
+        CriteriaQuery<Long> criteriaQuery = criteriaBuilder.createQuery(Long.class);
+        Root<DatasetVersion> datasetVersionRoot = criteriaQuery.from(DatasetVersion.class);
+        Root<FileMetadata> fileMetadataRoot = criteriaQuery.from(FileMetadata.class);
+        criteriaQuery
+                .select(criteriaBuilder.count(fileMetadataRoot))
+                .where(criteriaBuilder.and(
+                        criteriaBuilder.equal(fileMetadataRoot.get("dataFile").get("restricted"), false),
+                        criteriaBuilder.equal(datasetVersionRoot.get("dataset"), targetDataset),
+                        criteriaBuilder.equal(datasetVersionRoot.get("versionState"), DatasetVersion.VersionState.RELEASED),
+                        fileMetadataRoot.in(targetDatasetVersion.getFileMetadatas()),
+                        fileMetadataRoot.in(datasetVersionRoot.get("fileMetadatas"))));
+        Long result = em.createQuery(criteriaQuery).getSingleResult();
+        return result > 0;
+    }
 }

From c194d74b2029917de050fe5d40b237b23bddf3ab Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 5 Dec 2023 10:59:46 -0500
Subject: [PATCH 328/546] Clarified the sentence about the initial deployment
 in the release note. #8549

---
 doc/release-notes/8549-collection-quotas.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md
index 29b84213cfb..b3635d0c5a1 100644
--- a/doc/release-notes/8549-collection-quotas.md
+++ b/doc/release-notes/8549-collection-quotas.md
@@ -1,3 +1,3 @@
 This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS.
-Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the deployment.
+Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1
 

From cf7e664e626994419ca3a1c80785290da7efe683 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 5 Dec 2023 12:02:41 -0500
Subject: [PATCH 329/546] moved the entitymanager calls from a command to the
 service #8549

---
 .../edu/harvard/iq/dataverse/DataverseServiceBean.java    | 8 ++++++++
 .../engine/command/impl/DeleteCollectionQuotaCommand.java | 4 +---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 487215c7a65..b6e666e8058 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -935,4 +935,12 @@ public void saveStorageQuota(Dataverse target, Long allocation) {
         }
         em.flush();
     }
+    
+    public void disableStorageQuota(StorageQuota storageQuota) {
+        if (storageQuota != null && storageQuota.getAllocation() != null) {
+            storageQuota.setAllocation(null);
+            em.merge(storageQuota);
+            em.flush();
+        }
+    }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
index 4015228366b..c0f863686da 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
@@ -46,9 +46,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
         StorageQuota storageQuota = targetDataverse.getStorageQuota();
         
         if (storageQuota != null && storageQuota.getAllocation() != null) {
-            storageQuota.setAllocation(null);
-            ctxt.em().merge(storageQuota);
-            ctxt.em().flush();
+            ctxt.dataverses().disableStorageQuota(storageQuota);
         } 
         // ... and if no quota was enabled on the collection - nothing to do = success
     }    

From 6a4a9ab3d625f1e5835b3e119449f8fd88eaee23 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 5 Dec 2023 12:10:48 -0500
Subject: [PATCH 330/546] stub out diagnosing jenkins failures #10101

---
 doc/sphinx-guides/source/qa/jenkins.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/doc/sphinx-guides/source/qa/jenkins.md b/doc/sphinx-guides/source/qa/jenkins.md
index a4ca4d8688f..9259284beb9 100644
--- a/doc/sphinx-guides/source/qa/jenkins.md
+++ b/doc/sphinx-guides/source/qa/jenkins.md
@@ -42,3 +42,18 @@ How can you know if API tests are passing? Here are the steps, by way of example
 - Click "Test Result".
 - Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run.
 - Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error.
+
+## Diagnosing Failures
+
+API test failures can have multiple causes. As described above, from the "Test Result" page, you might see the failure under "All Failed Tests". However, the test could have failed because of some underlying system issue.
+
+If you have determined that the API tests have not run at all, your next step should be to click on "Console Output". For example, <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10109/26/console>. Click "Full log" to see the full log in the browser or navigate to <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10109/26/consoleText> (for example) to get a plain text version.
+
+Go to the end of the log and then scroll up, looking for the failure. A failed Ansible task can look like this:
+
+```
+TASK [dataverse : download payara zip] *****************************************
+fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: <urlopen error timed out>", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip"}
+```
+
+In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it!

From dfa49c3720f866f36df0b6cd712f1c5144dfee44 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 5 Dec 2023 14:31:32 -0500
Subject: [PATCH 331/546] rename flyway script

---
 ...thumb-failures.sql => V6.0.0.6__9506-track-thumb-failures.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V6.0.0.5__9506-track-thumb-failures.sql => V6.0.0.6__9506-track-thumb-failures.sql} (100%)

diff --git a/src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql
similarity index 100%
rename from src/main/resources/db/migration/V6.0.0.5__9506-track-thumb-failures.sql
rename to src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql

From 70a3442cc9a6c672ef8a553be8b279b3b8ea1b52 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 5 Dec 2023 14:36:21 -0500
Subject: [PATCH 332/546] updated aux. file service bean #8549

---
 .../dataverse/AuxiliaryFileServiceBean.java   | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
index 8c96f98ce39..363622ba3bf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
@@ -2,6 +2,7 @@
 package edu.harvard.iq.dataverse;
 
 import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
 import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 
@@ -46,6 +47,8 @@ public class AuxiliaryFileServiceBean implements java.io.Serializable {
     @EJB
     private SystemConfig systemConfig;
     
+    @EJB
+    StorageUseServiceBean storageUseService; 
 
     public AuxiliaryFile find(Object pk) {
         return em.find(AuxiliaryFile.class, pk);
@@ -126,6 +129,13 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile
                 }
                 dataFile.getAuxiliaryFiles().add(auxFile);
             }
+            // We've just added this file to storage; increment the StorageUse
+            // record if needed. 
+            if (auxFile.getFileSize() != null 
+                    && auxFile.getFileSize() > 0 
+                    && dataFile.getOwner() != null ) {
+                storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), auxFile.getFileSize());
+            }
         } catch (IOException ioex) {
             logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage());
             throw new InternalServerErrorException();
@@ -181,6 +191,7 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form
         if (af == null) {
             throw new FileNotFoundException();
         }
+        Long auxFileSize = af.getFileSize();
         em.remove(af);
         StorageIO<?> storageIO;
         storageIO = dataFile.getStorageIO();
@@ -188,6 +199,14 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form
         if (storageIO.isAuxObjectCached(auxExtension)) {
             storageIO.deleteAuxObject(auxExtension);
         }
+        // We've just deleted this file from storage; update the StorageUse
+        // record if needed. 
+        if (auxFileSize != null
+                && auxFileSize > 0
+                && dataFile.getOwner() != null) {
+            storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), (0L - auxFileSize));
+        }
+        
     }
 
     public List<AuxiliaryFile> findAuxiliaryFiles(DataFile dataFile) {

From c54a85fca9377b74efc0e74e8a70a6de2f6fccc4 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 5 Dec 2023 14:52:23 -0500
Subject: [PATCH 333/546] #9464 add caveats to release note.

---
 doc/release-notes/9464-json-validation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md
index 4b08f2ca9dd..f104263ba35 100644
--- a/doc/release-notes/9464-json-validation.md
+++ b/doc/release-notes/9464-json-validation.md
@@ -1,3 +1,3 @@
-Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. (Issue #9464 and #9465)
+Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
 
 For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html

From 2379828c2737260901b23020a436f5cab6cc962a Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 5 Dec 2023 15:05:12 -0500
Subject: [PATCH 334/546] Update native-api.rst

---
 doc/sphinx-guides/source/api/native-api.rst | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 2d37c3b07ae..29aa7c880ac 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -510,7 +510,9 @@ The fully expanded example above (without environment variables) looks like this
 Retrieve a Dataset JSON Schema for a Collection
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset:
+Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset. This
+first version of the schema only includes required elements and fields. In the future we plan to improve the schema by adding controlled
+vocabulary and more robust dataset field format testing:
 
 .. code-block:: bash
 
@@ -535,7 +537,8 @@ While it is recommended to download a copy of the JSON Schema from the collectio
 Validate Dataset JSON File for a Collection
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Validates a dataset JSON file customized for a given collection prior to creating the dataset:
+Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting
+and the presence of required elements:
 
 .. code-block:: bash
 

From dd2d9726e3125975493fa6dbf70578d76fa5f07c Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 5 Dec 2023 16:47:04 -0500
Subject: [PATCH 335/546] globus store options

---
 .../source/installation/config.rst            | 50 +++++++++++++++++--
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 7b32da8f6c3..e0e4d4cd89e 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -499,14 +499,14 @@ Logging & Slow Performance
 
 .. _file-storage:
 
-File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores
------------------------------------------------------------------------------------------------------
+File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores and/or Globus Stores
+--------------------------------------------------------------------------------------------------------------------------
 
 By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara6/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\<id\>.directory`` JVM option described below.
 
-A Dataverse installation can alternately store files in a Swift or S3-compatible object store, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis.
+A Dataverse installation can alternately store files in a Swift or S3-compatible object store, or on a Globus endpoint, and can now be configured to support multiple stores at once. With a multi-store configuration, the location for new files can be controlled on a per-Dataverse collection basis.
 
-A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web-accessible trusted remote store.
+A Dataverse installation may also be configured to reference some files (e.g. large and/or sensitive data) stored in a web or Globus accessible trusted remote store.
 
 A Dataverse installation can be configured to allow out of band upload by setting the ``dataverse.files.\<id\>.upload-out-of-band`` JVM option to ``true``.
 By default, Dataverse supports uploading files via the :ref:`add-file-api`. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
@@ -958,7 +958,7 @@ Once you have configured a trusted remote store, you can point your users to the
     dataverse.files.<id>.type                    ``remote``          **Required** to mark this storage as remote.                                (none)
     dataverse.files.<id>.label                   <?>                 **Required** label to be shown in the UI for this storage.                  (none)
     dataverse.files.<id>.base-url                <?>                 **Required** All files must have URLs of the form <baseUrl>/* .             (none)
-    dataverse.files.<id>.base-store              <?>                 **Optional** The id of a base store (of type file, s3, or swift).           (the default store)
+    dataverse.files.<id>.base-store              <?>                 **Required** The id of a base store (of type file, s3, or swift).           (the default store)
     dataverse.files.<id>.download-redirect       ``true``/``false``  Enable direct download (should usually be true).                            ``false``
     dataverse.files.<id>.secret-key               <?>                 A key used to sign download requests sent to the remote store. Optional.   (none)
     dataverse.files.<id>.url-expiration-minutes  <?>                 If direct downloads and using signing: time until links expire. Optional.   60
@@ -967,6 +967,46 @@ Once you have configured a trusted remote store, you can point your users to the
     
     ===========================================  ==================  ==========================================================================  ===================
 
+.. _globus-storage:
+
+Globus Storage
+++++++++++++++
+
+Globus stores allow Dataverse to manage files stored in Globus endpoints or to reference files in remote Globus endpoints, with users leveraging Globus to transfer files to/from Dataverse (rather than using HTTP/HTTPS).
+See :doc:`/developers/big-data-support` for additional information on how to use a globus store. Consult the `Globus documentation <https://docs.globus.org/>`_ for information about using Globus and configuring Globus endpoints.
+
+In addition to having the type "globus" and requiring a label, Globus Stores share many options with Trusted Remote Stores and options to specify and access a Globus endpoint(s). As with Remote Stores, Globus Stores also use a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.).
+These and other available options are described in the table below.
+
+There are two types of Globus stores
+- managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse
+- remote - where Dataverse references files that remain on trusted remote Globus endpoints
+
+For managed stores, there are two variants, connecting to standard/file-based Globus endpoints and to endpoints using an underlying S3 store via the Globus S3 Connector.
+With the former, Dataverse has no direct access to the file contents and functionality related to ingest, fixity hash validation, etc. are not available. With the latter, Dataverse can access files internally via S3 and the functionality supported is similar to that when using S3 direct upload. 
+
+Once you have configured a globus store, it is recommended that you install the `dataverse-globus app <https://github.com/scholarsportal/dataverse-globus>`_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support.
+
+.. table::
+    :align: left
+
+    =======================================================  ==================  ==========================================================================  ===================
+    JVM Option                                               Value               Description                                                                 Default value
+    =======================================================  ==================  ==========================================================================  ===================
+    dataverse.files.<id>.type                                ``globus``          **Required** to mark this storage as globus enabled.                        (none)
+    dataverse.files.<id>.label                               <?>                 **Required** label to be shown in the UI for this storage.                  (none)
+    dataverse.files.<id>.base-store                          <?>                 **Required** The id of a base store (of type file, s3, or swift).           (the default store)
+    dataverse.files.<id>.remote-store-name                   <?>                 A short name used in the UI to indicate where a file is located. Optional.  (none)
+    dataverse.files.<id>.remote-store-url                    <?>                 A url to an info page about the remote store used in the UI. Optional.      (none)
+    dataverse.files.<id>.managed                             ``true``/``false``  Whether dataverse manages an associated Globus endpoint                     ``false``
+    dataverse.files.<id>.transfer-endpoint-with-basepath     <?>                 The *managed* Globus endpoint id and associated base path for file storage  (none)
+    dataverse.files.<id>.globus-token                        <?>                 A Globus token (base64 endcoded <Globus user id>:<Credential> 
+                                                                                 for a managed store) - using a microprofile alias is recommended            (none)
+    dataverse.files.<id>.reference-endpoints-with-basepaths  <?>                 A comma separated list of *remote* trusted Globus endpoint id/<basePath>s   (none)
+    dataverse.files.<id>.files-not-accessible-by-dataverse   ``true``/``false``  Should be true for S3 Connector-based *managed* stores                      ``false``
+    
+    =======================================================  ==================  ==========================================================================  ===================
+    
 .. _temporary-file-storage:
 
 Temporary Upload File Storage

From 4d7818a7be615033bd00261a6a0951c703c0ad3b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 5 Dec 2023 16:59:04 -0500
Subject: [PATCH 336/546] merge miss

---
 .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index 13ec049fa0a..8afc365417e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -87,7 +87,7 @@
 /* 
     Amazon AWS S3 driver
  */
-public class S3AccessIO<T extends DvObject> extends StorageIO<T> implements GlobusAccessibleStore {
+public class S3AccessIO<T extends DvObject> extends StorageIO<T> {
 
     private static final Config config = ConfigProvider.getConfig();
     private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO");
@@ -1194,7 +1194,6 @@ private static AmazonS3 getClient(String driverId) {
              * * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and 
              * * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found.
              */
-            String s3profile = getConfigParamForDriver(driverId, PROFILE,"default");
 
             ArrayList<AWSCredentialsProvider> providers = new ArrayList<>();
 

From ceacf7e92c045a61b96205536f442dc48142cb2a Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Tue, 5 Dec 2023 16:59:40 -0500
Subject: [PATCH 337/546] add a stub globus api page since it is referenced in
 the config doc

---
 .../source/developers/globus-api.rst          | 282 ++++++++++++++++++
 doc/sphinx-guides/source/developers/index.rst |   1 +
 2 files changed, 283 insertions(+)
 create mode 100644 doc/sphinx-guides/source/developers/globus-api.rst

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
new file mode 100644
index 00000000000..2775ffd2142
--- /dev/null
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -0,0 +1,282 @@
+Globus Transfer API
+===================
+
+The Globus API addresses three use cases:
+* Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector)
+* Reference of files that will remain in a remote Globus endpoint
+* Transfer from a Dataverse-managed Globus endpoint
+
+The ability for Dataverse to interact with Globus endpoints is configured via 
+Direct upload involves a series of three activities, each involving interacting with the server for a Dataverse installation:
+
+* Requesting initiation of a transfer from the server
+* Use of the pre-signed URL(s) returned in that call to perform an upload/multipart-upload of the file to S3
+* A call to the server to register the file/files as part of the dataset/replace a file in the dataset or to cancel the transfer
+
+This API is only enabled when a Dataset is configured with a data store supporting direct S3 upload.
+Administrators should be aware that partial transfers, where a client starts uploading the file/parts of the file and does not contact the server to complete/cancel the transfer, will result in data stored in S3 that is not referenced in the Dataverse installation (e.g. should be considered temporary and deleted.)
+
+ 
+Requesting Direct Upload of a DataFile
+--------------------------------------
+To initiate a transfer of a file to S3, make a call to the Dataverse installation indicating the size of the file to upload. The response will include a pre-signed URL(s) that allow the client to transfer the file. Pre-signed URLs include a short-lived token authorizing the action represented by the URL.
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
+  export SIZE=1000000000
+ 
+  curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/uploadurls?persistentId=$PERSISTENT_IDENTIFIER&size=$SIZE"
+
+The response to this call, assuming direct uploads are enabled, will be one of two forms:
+
+Single URL: when the file is smaller than the size at which uploads must be broken into multiple parts
+
+.. code-block:: bash
+
+  {
+    "status":"OK",
+    "data":{
+      "url":"...",
+      "partSize":1073741824,
+      "storageIdentifier":"s3://demo-dataverse-bucket:177883619b8-892ca9f7112e"
+    }
+  }
+
+Multiple URLs: when the file must be uploaded in multiple parts. The part size is set by the Dataverse installation and, for AWS-based storage, range from 5 MB to 5 GB
+
+.. code-block:: bash
+
+  {
+    "status":"OK",
+    "data":{
+    "urls":{
+      "1":"...",
+      "2":"...",
+      "3":"...",
+      "4":"...",
+      "5":"..."
+    }
+    "abort":"/api/datasets/mpupload?...",
+    "complete":"/api/datasets/mpupload?..."
+    "partSize":1073741824,
+    "storageIdentifier":"s3://demo-dataverse-bucket:177883b000e-49cedef268ac"
+  }
+
+In the example responses above, the URLs, which are very long, have been omitted. These URLs reference the S3 server and the specific object identifier that will be used, starting with, for example, https://demo-dataverse-bucket.s3.amazonaws.com/10.5072/FK2FOQPJS/177883b000e-49cedef268ac?...
+
+The client must then use the URL(s) to PUT the file, or if the file is larger than the specified partSize, parts of the file. 
+
+In the single part case, only one call to the supplied URL is required:
+
+.. code-block:: bash
+
+    curl -H 'x-amz-tagging:dv-state=temp' -X PUT -T <filename> "<supplied url>"
+
+
+In the multipart case, the client must send each part and collect the 'eTag' responses from the server. The calls for this are the same as the one for the single part case except that each call should send a <partSize> slice of the total file, with the last part containing the remaining bytes.
+The responses from the S3 server for these calls will include the 'eTag' for the uploaded part. 
+
+To successfully conclude the multipart upload, the client must call the 'complete' URI, sending a json object including the part eTags:
+
+.. code-block:: bash
+
+    curl -X PUT "$SERVER_URL/api/datasets/mpload?..." -d '{"1":"<eTag1 string>","2":"<eTag2 string>","3":"<eTag3 string>","4":"<eTag4 string>","5":"<eTag5 string>"}'
+  
+If the client is unable to complete the multipart upload, it should call the abort URL:
+
+.. code-block:: bash
+  
+    curl -X DELETE "$SERVER_URL/api/datasets/mpload?..."
+   
+  
+.. _direct-add-to-dataset-api:
+
+Adding the Uploaded file to the Dataset
+---------------------------------------
+
+Once the file exists in the s3 bucket, a final API call is needed to add it to the Dataset. This call is the same call used to upload a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter.
+jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for:
+
+* "storageIdentifier" - String, as specified in prior calls
+* "fileName" - String
+* "mimeType" - String
+* fixity/checksum: either: 
+
+  * "md5Hash" - String with MD5 hash value, or
+  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings 
+
+The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
+  export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}"
+
+  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
+  
+Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide.
+With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
+
+To add multiple Uploaded Files to the Dataset
+---------------------------------------------
+
+Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter.
+jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for:
+
+* "description" - A description of the file
+* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset
+* "storageIdentifier" - String
+* "fileName" - String
+* "mimeType" - String
+* "fixity/checksum" either:
+
+  * "md5Hash" - String with MD5 hash value, or
+  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings
+
+The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
+  export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \
+                      {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]"
+
+  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
+
+Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide.
+With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
+
+
+Replacing an existing file in the Dataset
+-----------------------------------------
+
+Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter.
+jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for:
+
+* "storageIdentifier" - String, as specified in prior calls
+* "fileName" - String
+* "mimeType" - String
+* fixity/checksum: either: 
+
+  * "md5Hash" - String with MD5 hash value, or
+  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings 
+
+The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512.
+Note that the API call does not validate that the file matches the hash value supplied. If a Dataverse instance is configured to validate file fixity hashes at publication time, a mismatch would be caught at that time and cause publication to fail.
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export FILE_IDENTIFIER=5072
+  export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}'
+
+  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA"
+  
+Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide.
+With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
+
+Replacing multiple existing files in the Dataset
+------------------------------------------------
+
+Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter.
+jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values:
+
+* "fileToReplaceId" - the id of the file being replaced
+* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false)
+* "description" - A description of the file
+* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset
+* "storageIdentifier" - String
+* "fileName" - String
+* "mimeType" - String
+* "fixity/checksum" either:
+
+  * "md5Hash" - String with MD5 hash value, or
+  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings
+
+
+The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512
+
+.. code-block:: bash
+
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
+  export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]'
+
+  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
+
+The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g.
+
+.. code-block::
+
+  {
+    "status": "OK",
+    "data": {
+      "Files": [
+        {
+          "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42",
+          "errorMessage": "Bad Request:The file to replace does not belong to this dataset.",
+          "fileDetails": {
+            "fileToReplaceId": 10,
+            "description": "My description.",
+            "directoryLabel": "data/subdir1",
+            "categories": [
+              "Data"
+            ],
+            "restrict": "false",
+            "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42",
+            "fileName": "file1.Bin",
+            "mimeType": "application/octet-stream",
+            "checksum": {
+              "@type": "SHA-1",
+              "@value": "123456"
+            }
+          }
+        },
+        {
+          "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53",
+          "successMessage": "Replaced successfully in the dataset",
+          "fileDetails": {
+            "description": "My description.",
+            "label": "file2.txt",
+            "restricted": false,
+            "directoryLabel": "data/subdir1",
+            "categories": [
+              "Data"
+            ],
+            "dataFile": {
+              "persistentId": "",
+              "pidURL": "",
+              "filename": "file2.txt",
+              "contentType": "text/plain",
+              "filesize": 2407,
+              "description": "My description.",
+              "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53",
+              "rootDataFileId": 11,
+              "previousDataFileId": 11,
+              "checksum": {
+                "type": "SHA-1",
+                "value": "123789"
+              }
+            }
+          }
+        }
+      ],
+      "Result": {
+        "Total number of files": 2,
+        "Number of files successfully replaced": 1
+      }
+    }
+  }
+
+
+Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide.
+With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst
index 60d97feeef9..458a78a6c95 100755
--- a/doc/sphinx-guides/source/developers/index.rst
+++ b/doc/sphinx-guides/source/developers/index.rst
@@ -39,6 +39,7 @@ Developer Guide
    big-data-support
    aux-file-support
    s3-direct-upload-api
+   globus-api
    dataset-semantic-metadata-api
    dataset-migration-api 
    workflows

From 03a4c77155934060c33c33ed27ea2f7628301e91 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 6 Dec 2023 10:58:33 +0000
Subject: [PATCH 338/546] Refactor: shortcut on datafile permission check

---
 .../harvard/iq/dataverse/PermissionServiceBean.java    | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index 2e4627576c6..107024bcfb9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -851,11 +851,13 @@ public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersio
         if (user.isSuperuser()) {
             return true;
         }
-        if (hasReleasedFiles(datasetVersion)) {
+        if (hasUnrestrictedReleasedFiles(datasetVersion)) {
             return true;
         }
         for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) {
-            if (userOn(user, fileMetadata.getDataFile()).has(Permission.DownloadFile)) {
+            DataFile dataFile = fileMetadata.getDataFile();
+            Set<RoleAssignee> ras = new HashSet<>(groupService.groupsFor(user, dataFile));
+            if (hasGroupPermissionsFor(ras, dataFile, EnumSet.of(Permission.DownloadFile))) {
                 return true;
             }
         }
@@ -863,7 +865,7 @@ public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersio
     }
 
     /**
-     * Checks if a DatasetVersion has released files.
+     * Checks if a DatasetVersion has unrestricted released files.
      *
      * This method is mostly based on {@link #isPublicallyDownloadable(DvObject)} although in this case, instead of basing
      * the search on a particular file, it searches for the total number of files in the target version that are present
@@ -872,7 +874,7 @@ public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersio
      * @param targetDatasetVersion DatasetVersion to check
      * @return boolean indicating whether the dataset version has released files or not
      */
-    private boolean hasReleasedFiles(DatasetVersion targetDatasetVersion) {
+    private boolean hasUnrestrictedReleasedFiles(DatasetVersion targetDatasetVersion) {
         Dataset targetDataset = targetDatasetVersion.getDataset();
         if (!targetDataset.isReleased()) {
             return false;

From 326b784da752091bf4c7b3bf4112ebfc327acb69 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 6 Dec 2023 10:59:08 +0000
Subject: [PATCH 339/546] Refactor: variable extracted in
 isPublicallyDownloadable

---
 .../java/edu/harvard/iq/dataverse/PermissionServiceBean.java | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index 107024bcfb9..1c568e83143 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -451,8 +451,9 @@ private boolean isPublicallyDownloadable(DvObject dvo) {
 
             if (!df.isRestricted()) {
                 if (df.getOwner().getReleasedVersion() != null) {
-                    if (df.getOwner().getReleasedVersion().getFileMetadatas() != null) {
-                        for (FileMetadata fm : df.getOwner().getReleasedVersion().getFileMetadatas()) {
+                    List<FileMetadata> fileMetadatas = df.getOwner().getReleasedVersion().getFileMetadatas();
+                    if (fileMetadatas != null) {
+                        for (FileMetadata fm : fileMetadatas) {
                             if (df.equals(fm.getDataFile())) {
                                 return true;
                             }

From 16c685dc30601d8a8b0140cec4b8621e1fe33a99 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 6 Dec 2023 11:22:06 +0000
Subject: [PATCH 340/546] Changed: passing DataverseRequest instead of User to
 canDownloadAtLeastOneFile

---
 .../harvard/iq/dataverse/PermissionServiceBean.java   | 11 ++++++-----
 .../java/edu/harvard/iq/dataverse/api/Datasets.java   |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index 1c568e83143..e87809ada56 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -844,20 +844,21 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio
     /**
      * Checks if a User can download at least one file of the target DatasetVersion.
      *
-     * @param user User to check
+     * @param dataverseRequest DataverseRequest to check
      * @param datasetVersion DatasetVersion to check
      * @return boolean indicating whether the user can download at least one file or not
      */
-    public boolean canDownloadAtLeastOneFile(User user, DatasetVersion datasetVersion) {
-        if (user.isSuperuser()) {
+    public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) {
+        if (dataverseRequest.getUser().isSuperuser()) {
             return true;
         }
         if (hasUnrestrictedReleasedFiles(datasetVersion)) {
             return true;
         }
-        for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) {
+        List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
+        for (FileMetadata fileMetadata : fileMetadatas) {
             DataFile dataFile = fileMetadata.getDataFile();
-            Set<RoleAssignee> ras = new HashSet<>(groupService.groupsFor(user, dataFile));
+            Set<RoleAssignee> ras = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile));
             if (hasGroupPermissionsFor(ras, dataFile, EnumSet.of(Permission.DownloadFile))) {
                 return true;
             }
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index a9cfefc33d8..6a1e11e690b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -4145,7 +4145,7 @@ public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext cr
                                                  @Context HttpHeaders headers) {
         return response(req -> {
             DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false);
-            return ok(permissionService.canDownloadAtLeastOneFile(getRequestUser(crc), datasetVersion));
+            return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion));
         }, getRequestUser(crc));
     }
 }

From 8ca2338723a0ec1a57a9affc923fe65229009909 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 6 Dec 2023 11:22:51 +0000
Subject: [PATCH 341/546] Fixed: method doc

---
 .../java/edu/harvard/iq/dataverse/PermissionServiceBean.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index e87809ada56..359e8823fce 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -842,7 +842,7 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio
     }
 
     /**
-     * Checks if a User can download at least one file of the target DatasetVersion.
+     * Checks if a DataverseRequest can download at least one file of the target DatasetVersion.
      *
      * @param dataverseRequest DataverseRequest to check
      * @param datasetVersion DatasetVersion to check

From 96cd5c9d55437180cfa256df38b0d5990c97ec6c Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 6 Dec 2023 11:24:49 +0000
Subject: [PATCH 342/546] Added: explanatory comment

---
 .../java/edu/harvard/iq/dataverse/PermissionServiceBean.java     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index 359e8823fce..6dc943f1ca8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -852,6 +852,7 @@ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, Data
         if (dataverseRequest.getUser().isSuperuser()) {
             return true;
         }
+        // This is a shortcut to avoid having to check version files if the condition is met
         if (hasUnrestrictedReleasedFiles(datasetVersion)) {
             return true;
         }

From 3c1820b060b303da2bfa97132667ceccb5d5e977 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 6 Dec 2023 11:48:09 +0000
Subject: [PATCH 343/546] Added: includeDeaccessioned query param to
 getCanDownloadAtLeastOneFile API endpoint

---
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 6a1e11e690b..579f4f78fe1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -4141,10 +4141,11 @@ public Response getUserPermissionsOnDataset(@Context ContainerRequestContext crc
     public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext crc,
                                                  @PathParam("id") String datasetId,
                                                  @PathParam("versionId") String versionId,
+                                                 @QueryParam("includeDeaccessioned") boolean includeDeaccessioned,
                                                  @Context UriInfo uriInfo,
                                                  @Context HttpHeaders headers) {
         return response(req -> {
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned);
             return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion));
         }, getRequestUser(crc));
     }

From 811d79a7f8d017745fcfd782b233ec583d3669e2 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 6 Dec 2023 08:33:38 -0500
Subject: [PATCH 344/546] change minio access key, more l33t #6783

---
 docker-compose-dev.yml                                     | 2 +-
 src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 98376e255dd..e68215d53d2 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -212,7 +212,7 @@ services:
       - minio_storage:/data
     environment:
       MINIO_ROOT_USER: 4cc355_k3y
-      MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k35
+      MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y
     command: server /data
 
 networks:
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
index 1306c30d9c1..41446349093 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
@@ -55,7 +55,7 @@ public static void setUp() {
                 .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build();
 
         String accessKeyMinio = "4cc355_k3y";
-        String secretKeyMinio = "s3cr3t_4cc355_k35";
+        String secretKeyMinio = "s3cr3t_4cc355_k3y";
         s3minio = AmazonS3ClientBuilder.standard()
                 // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local
                 .withPathStyleAccessEnabled(Boolean.TRUE)

From e9a670c8620c068419080aad25421afa04641958 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 6 Dec 2023 10:39:09 -0500
Subject: [PATCH 345/546] collection not DB #10101

---
 doc/sphinx-guides/source/qa/performance-tests.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md
index f433226d4ff..447c4f6c54d 100644
--- a/doc/sphinx-guides/source/qa/performance-tests.md
+++ b/doc/sphinx-guides/source/qa/performance-tests.md
@@ -20,4 +20,4 @@ Please note the performance database is also used occasionally by Julian and the
 
 Executing the Performance Script
 --------------------------------
-To execute the performance test script, you need to install a local copy of the database-helper-scripts project at <https://github.com/IQSS/dataverse-helper-scripts>. We have since produced a stripped-down script that calls just the DB and ds and works with python3. 
+To execute the performance test script, you need to install a local copy of the database-helper-scripts project at <https://github.com/IQSS/dataverse-helper-scripts>. We have since produced a stripped-down script that calls just the collection and dataset and works with Python 3.

From a81ad72a0896073e043ee57848e571d7a3754a8a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 6 Dec 2023 10:50:46 -0500
Subject: [PATCH 346/546] comment out optional listing of buckets #6783

---
 .../harvard/iq/dataverse/api/S3AccessIT.java  | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
index 41446349093..74150ca120a 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
@@ -62,16 +62,15 @@ public static void setUp() {
                 .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyMinio, secretKeyMinio)))
                 .withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build();
 
-        System.out.println("buckets on LocalStack before attempting to create " + BUCKET_NAME);
-        for (Bucket bucket : s3localstack.listBuckets()) {
-            System.out.println("bucket: " + bucket);
-        }
-
-        System.out.println("buckets on MinIO before attempting to create " + BUCKET_NAME);
-        for (Bucket bucket : s3minio.listBuckets()) {
-            System.out.println("bucket: " + bucket);
-        }
-
+//        System.out.println("buckets on LocalStack before attempting to create " + BUCKET_NAME);
+//        for (Bucket bucket : s3localstack.listBuckets()) {
+//            System.out.println("bucket: " + bucket);
+//        }
+//
+//        System.out.println("buckets on MinIO before attempting to create " + BUCKET_NAME);
+//        for (Bucket bucket : s3minio.listBuckets()) {
+//            System.out.println("bucket: " + bucket);
+//        }
         // create bucket if it doesn't exist
         // Note that we create the localstack bucket with conf/localstack/buckets.sh
         // because we haven't figured out how to create it properly in Java.

From 0bd9f139e5dca2851ca88ed12c5e31af9c5bbfe9 Mon Sep 17 00:00:00 2001
From: Steven Winship <39765413+stevenwinship@users.noreply.github.com>
Date: Wed, 6 Dec 2023 11:01:04 -0500
Subject: [PATCH 347/546] Update doc/release-notes/6.1-release-notes.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 doc/release-notes/6.1-release-notes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index c2b52ab34b8..06a3e01f7af 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -1,6 +1,6 @@
 # Dataverse 6.1
 
-(If this note appears truncated on the GitHub Releases page, you can view it in full in the source tree: https://github.com/IQSS/dataverse/blob/master/doc/release-notes/6.1-release-notes.md)
+Please note: To read these instructions in full, please go to https://github.com/IQSS/dataverse/releases/tag/v6.1 rather than the list of releases, which will cut them off.
 
 This release brings new features, enhancements, and bug fixes to the Dataverse software.
 Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project.

From c97d7b55e2932dacaa19e4e3ac403c88a25bd2ee Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 6 Dec 2023 11:01:56 -0500
Subject: [PATCH 348/546] globus api doc

---
 .../source/developers/globus-api.rst          | 348 ++++++++----------
 1 file changed, 149 insertions(+), 199 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index 2775ffd2142..6a94f220dc2 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -6,277 +6,227 @@ The Globus API addresses three use cases:
 * Reference of files that will remain in a remote Globus endpoint
 * Transfer from a Dataverse-managed Globus endpoint
 
-The ability for Dataverse to interact with Globus endpoints is configured via 
-Direct upload involves a series of three activities, each involving interacting with the server for a Dataverse installation:
+The ability for Dataverse to interact with Globus endpoints is configured via a Globus store - see :ref:`globus-storage`.
 
-* Requesting initiation of a transfer from the server
-* Use of the pre-signed URL(s) returned in that call to perform an upload/multipart-upload of the file to S3
-* A call to the server to register the file/files as part of the dataset/replace a file in the dataset or to cancel the transfer
+Globus transfers (or referencing a remote endpoint) for upload and download transfers involve a series of steps. These can be accomplished using the Dataverse and Globus APIs. (These are used internally by the `dataverse-globus app <https://github.com/scholarsportal/dataverse-globus>`_ when transfers are done via the Dataverse UI.) 
 
-This API is only enabled when a Dataset is configured with a data store supporting direct S3 upload.
-Administrators should be aware that partial transfers, where a client starts uploading the file/parts of the file and does not contact the server to complete/cancel the transfer, will result in data stored in S3 that is not referenced in the Dataverse installation (e.g. should be considered temporary and deleted.)
+Requesting Upload or Download Parameters
+----------------------------------------
 
- 
-Requesting Direct Upload of a DataFile
---------------------------------------
-To initiate a transfer of a file to S3, make a call to the Dataverse installation indicating the size of the file to upload. The response will include a pre-signed URL(s) that allow the client to transfer the file. Pre-signed URLs include a short-lived token authorizing the action represented by the URL.
+The first step in preparing for a Globus transfer/reference operation is to request the parameters relevant for a given dataset:
 
 .. code-block:: bash
 
-  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
-  export SERVER_URL=https://demo.dataverse.org
-  export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
-  export SIZE=1000000000
- 
-  curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/uploadurls?persistentId=$PERSISTENT_IDENTIFIER&size=$SIZE"
-
-The response to this call, assuming direct uploads are enabled, will be one of two forms:
+  curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/globusUploadParameters?locale=$LOCALE"
 
-Single URL: when the file is smaller than the size at which uploads must be broken into multiple parts
+The response will be of the form:
 
 .. code-block:: bash
 
   {
-    "status":"OK",
-    "data":{
-      "url":"...",
-      "partSize":1073741824,
-      "storageIdentifier":"s3://demo-dataverse-bucket:177883619b8-892ca9f7112e"
+          "status": "OK",
+          "data": {
+              "queryParameters": {
+                  "datasetId": 29,
+                  "siteUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com",
+                  "datasetVersion": ":draft",
+                  "dvLocale": "en",
+                  "datasetPid": "doi:10.5072/FK2/ILLPXE",
+                  "managed": "true",
+                  "endpoint": "d8c42580-6528-4605-9ad8-116a61982644"
+              },
+              "signedUrls": [
+                  {
+                      "name": "requestGlobusTransferPaths",
+                      "httpMethod": "POST",
+                      "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/requestGlobusUploadPaths?until=2023-11-22T01:52:03.648&user=dataverseAdmin&method=POST&token=63ac4bb748d12078dded1074916508e19e6f6b61f64294d38e0b528010b07d48783cf2e975d7a1cb6d4a3c535f209b981c7c6858bc63afdfc0f8ecc8a139b44a",
+                      "timeOut": 300
+                  },
+                  {
+                      "name": "addGlobusFiles",
+                      "httpMethod": "POST",
+                      "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/addGlobusFiles?until=2023-11-22T01:52:03.648&user=dataverseAdmin&method=POST&token=2aaa03f6b9f851a72e112acf584ffc0758ed0cc8d749c5a6f8c20494bb7bc13197ab123e1933f3dde2711f13b347c05e6cec1809a8f0b5484982570198564025",
+                      "timeOut": 300
+                  },
+                  {
+                      "name": "getDatasetMetadata",
+                      "httpMethod": "GET",
+                      "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/versions/:draft?until=2023-11-22T01:52:03.649&user=dataverseAdmin&method=GET&token=1878d6a829cd5540e89c07bdaf647f1bea5314cc7a55433b0b506350dd330cad61ade3714a8ee199a7b464fb3b8cddaea0f32a89ac3bfc4a86cd2ea3004ecbb8",
+                      "timeOut": 300
+                  },
+                  {
+                      "name": "getFileListing",
+                      "httpMethod": "GET",
+                      "signedUrl": "http://ec2-34-204-169-194.compute-1.amazonaws.com/api/v1/datasets/29/versions/:draft/files?until=2023-11-22T01:52:03.650&user=dataverseAdmin&method=GET&token=78e8ca8321624f42602af659227998374ef3788d0feb43d696a0e19086e0f2b3b66b96981903a1565e836416c504b6248cd3c6f7c2644566979bd16e23a99622",
+                      "timeOut": 300
+                  }
+              ]
+          }
     }
-  }
 
-Multiple URLs: when the file must be uploaded in multiple parts. The part size is set by the Dataverse installation and, for AWS-based storage, range from 5 MB to 5 GB
+The response includes the id for the Globus endpoint to use along with several signed URLs.
 
-.. code-block:: bash
+The getDatasetMetadata and getFileListing URLs are just signed versions of the standard Dataset metadata and file listing API calls. The other two are Globus specific.
 
-  {
-    "status":"OK",
-    "data":{
-    "urls":{
-      "1":"...",
-      "2":"...",
-      "3":"...",
-      "4":"...",
-      "5":"..."
-    }
-    "abort":"/api/datasets/mpupload?...",
-    "complete":"/api/datasets/mpupload?..."
-    "partSize":1073741824,
-    "storageIdentifier":"s3://demo-dataverse-bucket:177883b000e-49cedef268ac"
-  }
+If called for a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a
+the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the
+requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are
+describe further below.
+
+The call to set up for a transfer out (download) is similar:
 
-In the example responses above, the URLs, which are very long, have been omitted. These URLs reference the S3 server and the specific object identifier that will be used, starting with, for example, https://demo-dataverse-bucket.s3.amazonaws.com/10.5072/FK2FOQPJS/177883b000e-49cedef268ac?...
+.. code-block:: bash
 
-The client must then use the URL(s) to PUT the file, or if the file is larger than the specified partSize, parts of the file. 
+  curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/globusDownloadParameters?locale=$LOCALE"
 
-In the single part case, only one call to the supplied URL is required:
+Note that this API call supports an additional downloadId query parameter. This is only used when the globus-dataverse app is called from the Dataverse user interface. There is no need to use it when calling the API directly.
 
-.. code-block:: bash
+The returned response includes the same getDatasetMetadata and getFileListing URLs as in the upload case and includes "monitorGlobusDownload" and "requestGlobusDownload" URLs. The response will also indicate whether the store is "managed" and will provide the "endpoint" from which downloads can be made.
 
-    curl -H 'x-amz-tagging:dv-state=temp' -X PUT -T <filename> "<supplied url>"
 
+Performing an Upload/Transfer In
+--------------------------------
 
-In the multipart case, the client must send each part and collect the 'eTag' responses from the server. The calls for this are the same as the one for the single part case except that each call should send a <partSize> slice of the total file, with the last part containing the remaining bytes.
-The responses from the S3 server for these calls will include the 'eTag' for the uploaded part. 
+The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer or to reference files (based on the "managed" parameter). 
 
-To successfully conclude the multipart upload, the client must call the 'complete' URI, sending a json object including the part eTags:
+Once the user identifies which files are to be added, the requestGlobusTransferPaths or requestGlobusReferencePaths URLs can be called. These both reference the same API call but must be used with different entries in the JSON body sent:
 
 .. code-block:: bash
 
-    curl -X PUT "$SERVER_URL/api/datasets/mpload?..." -d '{"1":"<eTag1 string>","2":"<eTag2 string>","3":"<eTag3 string>","4":"<eTag4 string>","5":"<eTag5 string>"}'
-  
-If the client is unable to complete the multipart upload, it should call the abort URL:
+  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  export SERVER_URL=https://demo.dataverse.org
+  export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
+  export LOCALE=en-US
+ 
+  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUpload"
 
-.. code-block:: bash
-  
-    curl -X DELETE "$SERVER_URL/api/datasets/mpload?..."
-   
+Note that when using the dataverse-globus app or the return from the previous call, the URL for this call will be signed and no API_TOKEN is needed. 
   
-.. _direct-add-to-dataset-api:
+In the managed case, the JSON body sent must include the id of the Globus user that will perform the transfer and the number of files that will be transferred:
 
-Adding the Uploaded file to the Dataset
----------------------------------------
+.. code-block:: bash
+  {
+    "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", 
+    "numberOfFiles":2
+  }
 
-Once the file exists in the s3 bucket, a final API call is needed to add it to the Dataset. This call is the same call used to upload a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter.
-jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for:
+In the remote reference case, the JSON body sent must include the Globus endpoint/paths that will be referenced:
 
-* "storageIdentifier" - String, as specified in prior calls
-* "fileName" - String
-* "mimeType" - String
-* fixity/checksum: either: 
+.. code-block:: bash
+  {
+    "referencedFiles":[
+      "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt"
+    ]
+  }
+    
+The response will include a JSON object. In the managed case, the map is from new assigned file storageidentifiers and specific paths on the managed Globus endpoint:
+.. code-block:: bash
 
-  * "md5Hash" - String with MD5 hash value, or
-  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings 
+  {
+    "status":"OK",
+    "data":{
+      "globusm://18b49d3688c-62137dcb06e4":"/hdc1/10.5072/FK2/ILLPXE/18b49d3688c-62137dcb06e4",
+      "globusm://18b49d3688c-5c17d575e820":"/hdc1/10.5072/FK2/ILLPXE/18b49d3688c-5c17d575e820"
+    }
+  }
 
-The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512
+In the managed case, the specified Globus principal is granted write permission to the specified endpoint/path,
+which will allow initiation of a transfer from the external endpoint to the managed endpoint using the Globus API.
+The permission will be revoked if the transfer is not started and the next call to Dataverse to finish the transfer are not made within a short time (configurable, default of 5 minutes).
+ 
+In the remote/reference case, the map is from the initially supplied endpoint/paths to the new assigned file storageidentifiers:
 
 .. code-block:: bash
 
-  export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
-  export SERVER_URL=https://demo.dataverse.org
-  export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
-  export JSON_DATA="{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}"
-
-  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
-  
-Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide.
-With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
-
-To add multiple Uploaded Files to the Dataset
----------------------------------------------
+  {
+    "status":"OK",
+    "data":{
+      "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt":"globus://18bf8c933f4-ed2661e7d19b//d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt"
+    }
+  }
 
-Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter.
-jsonData for this call is an array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for:
 
-* "description" - A description of the file
-* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset
-* "storageIdentifier" - String
-* "fileName" - String
-* "mimeType" - String
-* "fixity/checksum" either:
 
-  * "md5Hash" - String with MD5 hash value, or
-  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings
+Adding Files to the Dataset
+---------------------------
 
-The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512
+In the managed case, once a Globus transfer has been initiated a final API call is made to Dataverse to provide it with the task identifier of the transfer and information about the files being transferred:
 
 .. code-block:: bash
 
   export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
   export SERVER_URL=https://demo.dataverse.org
-  export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
-  export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \
-                      {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]"
+  export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
+  export JSON_DATA="{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \
+                    "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \
+                    {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}"
 
-  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
+  
+  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA""
 
-Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide.
-With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
+Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL.
 
+With this information, Dataverse will begin to monitor the transfer and when it completes, will add all files for which the transfer succeeded.
+As the transfer can take significant time and the API call is asynchronous, the only way to determine if the transfer succeeded via API is to use the standard calls to check the dataset lock state and contents.
 
-Replacing an existing file in the Dataset
------------------------------------------
+Once the transfer completes, Dataverse will remove the write permission for the principal.
 
-Once the file exists in the s3 bucket, a final API call is needed to register it as a replacement of an existing file. This call is the same call used to replace a file to a Dataverse installation but, rather than sending the file bytes, additional metadata is added using the "jsonData" parameter.
-jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, whether to allow the mimetype to change (forceReplace=true), etc. For direct uploads, the jsonData object must include values for:
+Note that when using a managed endpoint that uses the Globus S3 Connector, the checksum should be correct as Dataverse can validate it. For file-based endpoints, the checksum should be included if available but Dataverse cannot verify it.
 
-* "storageIdentifier" - String, as specified in prior calls
-* "fileName" - String
-* "mimeType" - String
-* fixity/checksum: either: 
+In the remote/reference case, where there is no transfer to monitor, the standard /addFiles API call (see :ref:`direct-add-to-dataset-api`) is used instead. There are no changes for the Globus case.
 
-  * "md5Hash" - String with MD5 hash value, or
-  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings 
+Downloading/Transfer Out Via Globus
+-----------------------------------
 
-The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512.
-Note that the API call does not validate that the file matches the hash value supplied. If a Dataverse instance is configured to validate file fixity hashes at publication time, a mismatch would be caught at that time and cause publication to fail.
+To begin downloading files, the requestGlobusDownload URL is used:
 
 .. code-block:: bash
 
   export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
   export SERVER_URL=https://demo.dataverse.org
-  export FILE_IDENTIFIER=5072
-  export JSON_DATA='{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "forceReplace":"true", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}'
-
-  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/files/$FILE_IDENTIFIER/replace" -F "jsonData=$JSON_DATA"
+  export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
   
-Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide.
-With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.
+  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusDownload"
 
-Replacing multiple existing files in the Dataset
-------------------------------------------------
+The JSON body sent should include a list of file ids to download and, for a managed endpoint, the Globus principal that will make the transfer:
 
-Once the replacement files exist in the s3 bucket, a final API call is needed to register them as replacements for existing files. In this API call, additional metadata is added using the "jsonData" parameter.
-jsonData for this call is array of objects that normally include information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must include some additional values:
+.. code-block:: bash
+  {
+    "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", 
+    "fileIds":[60, 61]
+  }
+  
+Note that this API call takes an optional downloadId parameter that is used with the dataverse-globus app. When downloadId is included, the list of fileIds is not needed.
 
-* "fileToReplaceId" - the id of the file being replaced
-* "forceReplace" - whether to replace a file with one of a different mimetype (optional, default is false)
-* "description" - A description of the file
-* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset
-* "storageIdentifier" - String
-* "fileName" - String
-* "mimeType" - String
-* "fixity/checksum" either:
+The response is a JSON object mapping the requested file Ids to Globus endpoint/paths. In the managed case, the principal will have been given read permissions for the specified paths:
 
-  * "md5Hash" - String with MD5 hash value, or
-  * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings
+.. code-block:: bash
 
+  {
+    "status":"OK",
+    "data":{
+      "60": "d8c42580-6528-4605-9ad8-116a61982644/hdc1/10.5072/FK2/ILLPXE/18bf3af9c78-92b8e168090e",
+     "61": "d8c42580-6528-4605-9ad8-116a61982644/hdc1/10.5072/FK2/ILLPXE/18bf3af9c78-c8d81569305c"
+    }
+  }
 
-The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512
+For the remote case, the use can perform the transfer without further contact with Dataverse. In the managed case, the user must initiate the transfer via the Globus API and then inform Dataverse.
+Dataverse will then monitor the transfer and revoke the read permission when the transfer is complete. (Not making this last call could result in failure of the transfer.)
 
 .. code-block:: bash
 
   export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
   export SERVER_URL=https://demo.dataverse.org
-  export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV
-  export JSON_DATA='[{"fileToReplaceId": 10, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}},{"fileToReplaceId": 11, "forceReplace": true, "description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]'
-
-  curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/replaceFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA"
-
-The JSON object returned as a response from this API call includes a "data" that indicates how many of the file replacements succeeded and provides per-file error messages for those that don't, e.g.
+  export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
+  
+  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/monitorGlobusDownload"
+  
+The JSON body sent just contains the task identifier for the transfer:
 
-.. code-block::
+.. code-block:: bash
 
   {
-    "status": "OK",
-    "data": {
-      "Files": [
-        {
-          "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42",
-          "errorMessage": "Bad Request:The file to replace does not belong to this dataset.",
-          "fileDetails": {
-            "fileToReplaceId": 10,
-            "description": "My description.",
-            "directoryLabel": "data/subdir1",
-            "categories": [
-              "Data"
-            ],
-            "restrict": "false",
-            "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42",
-            "fileName": "file1.Bin",
-            "mimeType": "application/octet-stream",
-            "checksum": {
-              "@type": "SHA-1",
-              "@value": "123456"
-            }
-          }
-        },
-        {
-          "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53",
-          "successMessage": "Replaced successfully in the dataset",
-          "fileDetails": {
-            "description": "My description.",
-            "label": "file2.txt",
-            "restricted": false,
-            "directoryLabel": "data/subdir1",
-            "categories": [
-              "Data"
-            ],
-            "dataFile": {
-              "persistentId": "",
-              "pidURL": "",
-              "filename": "file2.txt",
-              "contentType": "text/plain",
-              "filesize": 2407,
-              "description": "My description.",
-              "storageIdentifier": "s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53",
-              "rootDataFileId": 11,
-              "previousDataFileId": 11,
-              "checksum": {
-                "type": "SHA-1",
-                "value": "123789"
-              }
-            }
-          }
-        }
-      ],
-      "Result": {
-        "Total number of files": 2,
-        "Number of files successfully replaced": 1
-      }
-    }
+    "taskIdentifier":"b5fd01aa-8963-11ee-83ae-d5484943e99a"
   }
+ 
 
-
-Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. Enabling out-of-band uploads is described at :ref:`file-storage` in the Configuration Guide.
-With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifier must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above.

From c7d73f64177745fa7892543407025f9130dcb83b Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 6 Dec 2023 11:25:22 -0500
Subject: [PATCH 349/546] default for globus-cache-maxage

---
 src/main/resources/META-INF/microprofile-config.properties | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties
index 504b5e46735..ec8427795ee 100644
--- a/src/main/resources/META-INF/microprofile-config.properties
+++ b/src/main/resources/META-INF/microprofile-config.properties
@@ -18,6 +18,7 @@ dataverse.build=
 dataverse.files.directory=${STORAGE_DIR:/tmp/dataverse}
 dataverse.files.uploads=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/uploads
 dataverse.files.docroot=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/docroot
+dataverse.files.globus-cache-maxage=5
 
 # SEARCH INDEX
 dataverse.solr.host=localhost

From 1fb7ddf6d89a1b36f9a059f016ac617aa6ec3758 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 6 Dec 2023 11:27:42 -0500
Subject: [PATCH 350/546] fix spacing

---
 doc/sphinx-guides/source/developers/globus-api.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index 6a94f220dc2..5b2b6982866 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -105,6 +105,7 @@ Note that when using the dataverse-globus app or the return from the previous ca
 In the managed case, the JSON body sent must include the id of the Globus user that will perform the transfer and the number of files that will be transferred:
 
 .. code-block:: bash
+
   {
     "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", 
     "numberOfFiles":2
@@ -113,6 +114,7 @@ In the managed case, the JSON body sent must include the id of the Globus user t
 In the remote reference case, the JSON body sent must include the Globus endpoint/paths that will be referenced:
 
 .. code-block:: bash
+
   {
     "referencedFiles":[
       "d8c42580-6528-4605-9ad8-116a61982644/hdc1/test1.txt"
@@ -120,6 +122,7 @@ In the remote reference case, the JSON body sent must include the Globus endpoin
   }
     
 The response will include a JSON object. In the managed case, the map is from new assigned file storageidentifiers and specific paths on the managed Globus endpoint:
+
 .. code-block:: bash
 
   {
@@ -161,7 +164,6 @@ In the managed case, once a Globus transfer has been initiated a final API call
                     "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \
                     {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}"
 
-  
   curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA""
 
 Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL.
@@ -191,6 +193,7 @@ To begin downloading files, the requestGlobusDownload URL is used:
 The JSON body sent should include a list of file ids to download and, for a managed endpoint, the Globus principal that will make the transfer:
 
 .. code-block:: bash
+
   {
     "principal":"d15d4244-fc10-47f3-a790-85bdb6db9a75", 
     "fileIds":[60, 61]

From c2ad0092c545a41f071129bcd85c398775a53a1e Mon Sep 17 00:00:00 2001
From: sbondka <sabrine.bondka@open-groupe.com>
Date: Wed, 6 Dec 2023 17:28:40 +0100
Subject: [PATCH 351/546] Add modifications

---
 .../source/_static/admin/dataverse-external-tools.tsv         | 1 +
 doc/sphinx-guides/source/admin/integrations.rst               | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
index 4f4c29d0670..ba60be59227 100644
--- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
+++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
@@ -5,3 +5,4 @@ Binder	explore	dataset	Binder allows you to spin up custom computing environment
 File Previewers	explore	file	"A set of tools that display the content of files - including audio, html, `Hypothes.is <https://hypothes.is/>`_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers"
 Data Curation Tool	configure	file	"A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions."
 Ask the Data	query	file	Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. 
+JupyterHub	explore	file	The `Dataverse-to-JupyterHub Data Transfer Connector <https://forgemia.inra.fr/dipso/eosc-pillar/dataverse-jupyterhub-connector>`_ is a tool that simplifies the transfer of data between Dataverse repositories and the cloud-based platform JupyterHub. It is designed for researchers, scientists, and data analysts, facilitating collaboration on projects by seamlessly moving datasets and files. The tool is a lightweight client-side web application built using React and relies on the Dataverse External Tool feature, allowing for easy deployment on modern integration systems. Currently optimized for small to medium-sized files, future plans include extending support for larger files and signed Dataverse endpoints. For more details, you can refer to the external tool manifest: https://forgemia.inra.fr/dipso/eosc-pillar/dataverse-jupyterhub-connector/-/blob/master/externalTools.json
diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst
index a9b962f33ca..ed3860a9ca1 100644
--- a/doc/sphinx-guides/source/admin/integrations.rst
+++ b/doc/sphinx-guides/source/admin/integrations.rst
@@ -188,12 +188,12 @@ Researchers can use a Google Sheets add-on to search for Dataverse installation'
 JupyterHub
 ++++++++++
 
-The Dataverse-to-JupyterHub Data Transfer Connector streamlines data transfer between Dataverse repositories and the cloud-based platform JupyterHub, enhancing collaborative research.
+The `Dataverse-to-JupyterHub Data Transfer Connector <https://forgemia.inra.fr/dipso/eosc-pillar/dataverse-jupyterhub-connector>`_ streamlines data transfer between Dataverse repositories and the cloud-based platform JupyterHub, enhancing collaborative research.
 This connector facilitates seamless two-way transfer of datasets and files, emphasizing the potential of an integrated research environment.
 It is a lightweight client-side web application built using React and relying on the Dataverse External Tool feature, allowing for easy deployment on modern integration systems. Currently, it supports small to medium-sized files, with plans to enable support for large files and signed Dataverse endpoints in the future.
 
 What kind of user is the feature intended for?
-The feature is intended for reasearchers, scientists and data analyst working with Dataverse instances and JupyterHub looking to ease the data transfer process.
+The feature is intended for researchers, scientists and data analyst who are working with Dataverse instances and JupyterHub looking to ease the data transfer process.
 
 .. _integrations-discovery:
 

From a9a8f0cadec9bc3b31f0546805c46cdbf578aef1 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 6 Dec 2023 11:37:06 -0500
Subject: [PATCH 352/546] clarify it's pages we're hitting #10101

---
 doc/sphinx-guides/source/qa/performance-tests.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md
index 447c4f6c54d..ad7972bd75e 100644
--- a/doc/sphinx-guides/source/qa/performance-tests.md
+++ b/doc/sphinx-guides/source/qa/performance-tests.md
@@ -20,4 +20,4 @@ Please note the performance database is also used occasionally by Julian and the
 
 Executing the Performance Script
 --------------------------------
-To execute the performance test script, you need to install a local copy of the database-helper-scripts project at <https://github.com/IQSS/dataverse-helper-scripts>. We have since produced a stripped-down script that calls just the collection and dataset and works with Python 3.
+To execute the performance test script, you need to install a local copy of the database-helper-scripts project at <https://github.com/IQSS/dataverse-helper-scripts>. We have since produced a stripped-down script that calls just the collection and dataset pages and works with Python 3.

From 6fee16dec8125390ea6aa7221a19fde0db2b9730 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 6 Dec 2023 11:52:24 -0500
Subject: [PATCH 353/546] #10151 incorporate json schema

---
 doc/release-notes/6.1-release-notes.md    | 6 +++++-
 doc/release-notes/9464-json-validation.md | 3 ---
 2 files changed, 5 insertions(+), 4 deletions(-)
 delete mode 100644 doc/release-notes/9464-json-validation.md

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 06a3e01f7af..990ba219cad 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -24,7 +24,7 @@ With the upload-out-of-band option enabled, it is also possible for file upload
  Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"`
 
 Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array.
-For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"]
+For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"]
 
 ### Improvements in the /versions API
 - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions
@@ -45,6 +45,8 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/
 - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version).
 - getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes.
 - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths.
+- getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection.
+- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset json file is in proper format and contains the required elements and fields for a given dataverse collection.
 
 ### Extended the existing endpoints:
 - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files.
@@ -112,6 +114,8 @@ to generate updated versions.
 - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html
 See also #10060.
 
+- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
+
 ### Solr Improvements
 - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
 
diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md
deleted file mode 100644
index f104263ba35..00000000000
--- a/doc/release-notes/9464-json-validation.md
+++ /dev/null
@@ -1,3 +0,0 @@
-Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
-
-For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html

From 15e80aa4c847cb5ce8574fe600723c9cc81a5bc2 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 6 Dec 2023 16:56:37 +0000
Subject: [PATCH 354/546] Fixed: roleAssignees setup in
 canDownloadAtLeastOneFile

---
 .../edu/harvard/iq/dataverse/PermissionServiceBean.java   | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index 6dc943f1ca8..471cac31e77 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -849,7 +849,8 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio
      * @return boolean indicating whether the user can download at least one file or not
      */
     public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) {
-        if (dataverseRequest.getUser().isSuperuser()) {
+        User user = dataverseRequest.getUser();
+        if (user.isSuperuser()) {
             return true;
         }
         // This is a shortcut to avoid having to check version files if the condition is met
@@ -859,8 +860,9 @@ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, Data
         List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas();
         for (FileMetadata fileMetadata : fileMetadatas) {
             DataFile dataFile = fileMetadata.getDataFile();
-            Set<RoleAssignee> ras = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile));
-            if (hasGroupPermissionsFor(ras, dataFile, EnumSet.of(Permission.DownloadFile))) {
+            Set<RoleAssignee> roleAssignees = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile));
+            roleAssignees.add(user);
+            if (hasGroupPermissionsFor(roleAssignees, dataFile, EnumSet.of(Permission.DownloadFile))) {
                 return true;
             }
         }

From 4b71b36305fb6c18f7282530dc4491976a352936 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 6 Dec 2023 17:02:07 +0000
Subject: [PATCH 355/546] Added: IT for getCanDownloadAtLeastOneFile endpoint

---
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 71 +++++++++++++++----
 1 file changed, 58 insertions(+), 13 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 945b741a94b..3510f2c06ef 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -80,7 +80,6 @@
 import javax.xml.stream.XMLStreamReader;
 
 import static java.lang.Thread.sleep;
-import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import org.hamcrest.CoreMatchers;
 
@@ -90,11 +89,7 @@
 import static org.hamcrest.CoreMatchers.startsWith;
 import static org.hamcrest.CoreMatchers.nullValue;
 import static org.hamcrest.Matchers.contains;
-
-import static org.junit.jupiter.api.Assertions.assertNotEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
+import static org.junit.jupiter.api.Assertions.*;
 
 
 public class DatasetsIT {
@@ -4123,10 +4118,10 @@ public void testGetUserPermissionsOnDataset() {
     }
 
     @Test
-    public void testGetCanDownloadAtLeastOneFile() {
-        Response createUser = UtilIT.createRandomUser();
-        createUser.then().assertThat().statusCode(OK.getStatusCode());
-        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+    public void testGetCanDownloadAtLeastOneFile() throws InterruptedException {
+        Response createUserResponse = UtilIT.createRandomUser();
+        createUserResponse.then().assertThat().statusCode(OK.getStatusCode());
+        String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse);
 
         Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
         createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode());
@@ -4135,15 +4130,65 @@ public void testGetCanDownloadAtLeastOneFile() {
         Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
         createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode());
         int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
+        String datasetPersistentId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId");
 
-        // Call with valid dataset id
-        Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, apiToken);
+        // Upload file
+        String pathToTestFile = "src/test/resources/images/coffeeshop.png";
+        Response uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToTestFile, Json.createObjectBuilder().build(), apiToken);
+        uploadResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        String fileId = JsonPath.from(uploadResponse.body().asString()).getString("data.files[0].dataFile.id");
+
+        // Publish dataset version
+        Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken);
+        publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode());
+        Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken);
+        publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        // Make sure the dataset is published
+        Thread.sleep(3000);
+
+        // Create a second user to call the getCanDownloadAtLeastOneFile method
+        Response createSecondUserResponse = UtilIT.createRandomUser();
+        createSecondUserResponse.then().assertThat().statusCode(OK.getStatusCode());
+        String secondUserApiToken = UtilIT.getApiTokenFromResponse(createSecondUserResponse);
+        String secondUserUsername = UtilIT.getUsernameFromResponse(createSecondUserResponse);
+
+        // Call with a valid dataset id when a file is released
+        Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken);
         canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
         boolean canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
         assertTrue(canDownloadAtLeastOneFile);
 
+        // Restrict file
+        Response restrictFileResponse = UtilIT.restrictFile(fileId, true, apiToken);
+        restrictFileResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        // Publish dataset version
+        publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken);
+        publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        // Make sure the dataset is published
+        Thread.sleep(3000);
+
+        // Call with a valid dataset id when a file is restricted and the user does not have access
+        canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken);
+        canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
+        canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
+        assertFalse(canDownloadAtLeastOneFile);
+
+        // Grant restricted file access to the user
+        Response grantFileAccessResponse = UtilIT.grantFileAccess(fileId, "@" + secondUserUsername, apiToken);
+        grantFileAccessResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        // Call with a valid dataset id when a file is restricted and the user has access
+        canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken);
+        canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
+        canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
+        assertTrue(canDownloadAtLeastOneFile);
+
         // Call with invalid dataset id
-        Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, apiToken);
+        Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, secondUserApiToken);
         getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());
     }
 }

From 6d2f87ca93c108a9b4ec4905372a2e1709b3f5cf Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Wed, 6 Dec 2023 12:24:26 -0500
Subject: [PATCH 356/546] adding review comment changes

---
 doc/release-notes/6.1-release-notes.md | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 990ba219cad..4b5c20f3953 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -12,8 +12,8 @@ This release contains major upgrades to core components. Detailed upgrade instru
 ## Detailed Release Highlights, New Features and Use Case Scenarios
 
 ### Dataverse installation can be now be configured to allow out-of-band upload
-- Installation can be now be configured to allow out-of-band upload by setting the `dataverse.files.<id>.upload-out-of-band` JVM option to `true`.
-By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
+In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI.
+By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
 With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store.
 
 ### Alternative Title is made repeatable.
@@ -23,7 +23,7 @@ With the upload-out-of-band option enabled, it is also possible for file upload
  Change in "alternativeTitle" field  multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml`
  Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"`
 
-Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array.
+Since Alternative Title is repeatable now, old json apis would not be compatible with a new version since value of alternative title has changed from simple string to an array.
 For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"]
 
 ### Improvements in the /versions API
@@ -70,7 +70,6 @@ This parameter applies a filter criteria to the operation and supports the follo
     - Can delete the dataset draft
 - getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation.
 
-
 ### DataFile API payload has been extended to include the following fields:
 - tabularData: Boolean field to know if the DataFile is of tabular type
 - fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner)
@@ -114,7 +113,7 @@ to generate updated versions.
 - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html
 See also #10060.
 
-- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
+- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
 
 ### Solr Improvements
 - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
@@ -125,12 +124,13 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide
 ### Development
 - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin.
 For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools
-
 - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews
-
 - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended.
   - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093.
   - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045.
+- `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is
+also paving the way to enable manipulating JVM options during end-to-end tests on remote ends.
+- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`.
 
 ## OpenID Connect Authentication Provider Improvements
 
@@ -175,6 +175,8 @@ As part of these testing improvements, the code coverage report file for unit te
 - dataverse.auth.oidc.subtitle
 - dataverse.auth.oidc.pkce.max-cache-size
 - dataverse.auth.oidc.pkce.max-cache-age
+- dataverse.files.{driverId}.upload-out-of-band
+- dataverse.files.guestbook-at-request
 
 ## Installation
 
@@ -182,14 +184,17 @@ If this is a new installation, please follow our [Installation Guide](https://gu
 
 Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club!
 
-You are also very welcome to join the [Global Dataverse Community Consortium](https://dataversecommunity.global) (GDCC).
+You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC).
 
 ## Upgrade Instructions
-
 Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc.
 
 These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0.
 
+## Backward Incompatibilities
+- Since Alternative Title is repeatable now, old json apis would not be compatible with a new version
+- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility,
+
 ## Complete List of Changes
 
 For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub.

From 90ff56ca979cd71f1c467ff1cfa0dfeb8f619691 Mon Sep 17 00:00:00 2001
From: Steven Winship <39765413+stevenwinship@users.noreply.github.com>
Date: Wed, 6 Dec 2023 12:43:43 -0500
Subject: [PATCH 357/546] Update doc/release-notes/6.1-release-notes.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 doc/release-notes/6.1-release-notes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 4b5c20f3953..e1a9214a982 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -122,7 +122,7 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide
 
 
 ### Development
-- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin.
+- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin.
 For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools
 - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews
 - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended.

From 10e0e25fe10dda9f49b6126f591b9483adb2f765 Mon Sep 17 00:00:00 2001
From: Steven Winship <39765413+stevenwinship@users.noreply.github.com>
Date: Wed, 6 Dec 2023 12:44:49 -0500
Subject: [PATCH 358/546] Update doc/release-notes/6.1-release-notes.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 doc/release-notes/6.1-release-notes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index e1a9214a982..427a07a4c2c 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -123,7 +123,7 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide
 
 ### Development
 - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin.
-For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools
+For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list.
 - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews
 - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended.
   - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093.

From 3d55ed31de8fb9e45a2cedfecf07e22c82dae12a Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Wed, 6 Dec 2023 12:47:53 -0500
Subject: [PATCH 359/546] adding review comment changes

---
 doc/release-notes/6.1-release-notes.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 427a07a4c2c..189f21f2322 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -19,21 +19,22 @@ With the upload-out-of-band option enabled, it is also possible for file upload
 ### Alternative Title is made repeatable.
 - One will need to update database with updated citation block.
   `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv`
-- One will also need to update solr schema:
- Change in "alternativeTitle" field  multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml`
- Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"`
+- One will also need to update Solr schema:
+ Change in "alternativeTitle" field  multiValued="true" in `/usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml`
+ Reload Solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"`
 
-Since Alternative Title is repeatable now, old json apis would not be compatible with a new version since value of alternative title has changed from simple string to an array.
+Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array.
 For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"]
 
-### Improvements in the /versions API
+### Improvements in the dataset versions API
 - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions
 - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output
 - when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files.
 
 This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide.
-### The following API endpoints have been added:
 
+### The following API endpoints have been added:
+- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version).
 - /api/files/{id}/downloadCount
 - /api/files/{id}/dataTables 
 - /api/files/{id}/metadata/tabularTags New endpoint to set tabular file tags.
@@ -42,11 +43,10 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/
 - setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created.
 - userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file.
 - hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version.
-- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version).
 - getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes.
 - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths.
 - getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection.
-- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset json file is in proper format and contains the required elements and fields for a given dataverse collection.
+- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection.
 
 ### Extended the existing endpoints:
 - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files.
@@ -113,7 +113,7 @@ to generate updated versions.
 - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html
 See also #10060.
 
-- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
+- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
 
 ### Solr Improvements
 - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
@@ -192,7 +192,7 @@ Upgrading requires a maintenance window and downtime. Please plan ahead, create
 These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0.
 
 ## Backward Incompatibilities
-- Since Alternative Title is repeatable now, old json apis would not be compatible with a new version
+- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version
 - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility,
 
 ## Complete List of Changes

From 1be5d4b6b2baddc5f30bf598d81bd5ed991f73ee Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Wed, 6 Dec 2023 12:52:39 -0500
Subject: [PATCH 360/546] adding review comment changes

---
 doc/release-notes/6.1-release-notes.md | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 189f21f2322..d0fe895565c 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -87,10 +87,8 @@ This parameter applies a filter criteria to the operation and supports the follo
 
 ### Misc
 - Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589.
-
 - Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default).
 The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions.
-
 - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports
 (which include the OAI-ORE metadata export file) have been updated to include
 information about the dataset version state, e.g. RELEASED or DEACCESSIONED
@@ -104,7 +102,7 @@ Dataverse installations that have been using archival Bags may wish to update an
 existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse
 [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls)
 to generate updated versions.
-
+- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews
 - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec.
   - To fix #9952, we surround the license info with `<` and `>`.
   - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information
@@ -124,7 +122,6 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide
 ### Development
 - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin.
 For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list.
-- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews
 - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended.
   - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093.
   - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045.

From 8e2ff826bdd0f41e598a56012fa780d5f9148a2e Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 6 Dec 2023 13:41:35 -0500
Subject: [PATCH 361/546] store tests

---
 .../dataaccess/GlobusOverlayAccessIOTest.java | 148 ++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
index e69de29bb2d..792a9974076 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
@@ -0,0 +1,148 @@
+/*
+ * SPDX-License-Identifier: Apache 2.0
+ */
+package edu.harvard.iq.dataverse.dataaccess;
+
+import edu.harvard.iq.dataverse.DOIServiceBean;
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DvObject;
+import edu.harvard.iq.dataverse.GlobalId;
+import edu.harvard.iq.dataverse.mocks.MocksFactory;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import static org.junit.jupiter.api.Assertions.*;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.mockito.junit.jupiter.MockitoSettings;
+import org.mockito.quality.Strictness;
+import java.io.IOException;
+import java.nio.file.Paths;
+
+@ExtendWith(MockitoExtension.class)
+@MockitoSettings(strictness = Strictness.STRICT_STUBS)
+public class GlobusOverlayAccessIOTest {
+
+    @Mock
+
+    private Dataset dataset;
+    private DataFile mDatafile;
+    private DataFile rDatafile;
+    private String baseStoreId1 = "182ad2bda2f-c3508e719076";
+    private String baseStoreId2 = "182ad2bda2f-c3508e719077";
+    private String logoPath = "d7c42580-6538-4605-9ad8-116a61982644/hdc1/image002.mrc";
+    private String authority = "10.5072";
+    private String identifier = "F2ABCDEF";
+
+    @BeforeEach
+    public void setUp() {
+        // Base Store
+        System.setProperty("dataverse.files.base.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+        System.setProperty("dataverse.files.base.label", "default");
+        System.setProperty("dataverse.files.base.directory", "/tmp/files");
+
+        // Managed Globus Store
+
+        // Nonsense endpoint/paths
+        System.setProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH,
+                "d7c42580-6538-4605-9ad8-116a61982644/hdc1");
+        // Nonsense value of the right form
+        System.setProperty("dataverse.files.globusm.globus-token",
+                "NzM2NTQxMDMtOTg1Yy00NDgzLWE1MTYtYTJlNDk0ZmI3MDhkOkpJZGZaZGxMZStQNUo3MTRIMDY2cDh6YzIrOXI2RmMrbFR6UG0zcSsycjA9");
+        System.setProperty("dataverse.files.globusm.remote-store-name", "GlobusEndpoint1");
+        System.setProperty("dataverse.files.globusm.type", "globus");
+        System.setProperty("dataverse.files.globusm.managed", "true");
+        System.setProperty("dataverse.files.globusm.base-store", "base");
+        System.setProperty("dataverse.files.globusm.label", "globusManaged");
+
+        // Remote Store
+        System.setProperty("dataverse.files.globusr.type", "globus");
+        System.setProperty("dataverse.files.globusr.base-store", "base");
+        System.setProperty("dataverse.files.globusr.managed", "false");
+        System.setProperty("dataverse.files.globusm.label", "globusRemote");
+        System.setProperty(
+                "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS,
+                "d7c42580-6538-4605-9ad8-116a61982644/hdc1");
+        System.setProperty("dataverse.files.globusr.remote-store-name", "DemoDataCorp");
+        dataset = MocksFactory.makeDataset();
+        dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/",
+                DOIServiceBean.DOI_RESOLVER_URL, null));
+        mDatafile = MocksFactory.makeDataFile();
+        mDatafile.setOwner(dataset);
+        mDatafile.setStorageIdentifier("globusm://" + baseStoreId1);
+
+        rDatafile = MocksFactory.makeDataFile();
+        rDatafile.setOwner(dataset);
+        rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath);
+    }
+
+    @AfterEach
+    public void tearDown() {
+        System.clearProperty("dataverse.files.base.type");
+        System.clearProperty("dataverse.files.base.label");
+        System.clearProperty("dataverse.files.base.directory");
+        System.clearProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH);
+        System.clearProperty("dataverse.files.globusm.globus-token");
+        System.clearProperty("dataverse.files.globusm.remote-store-name");
+        System.clearProperty("dataverse.files.globusm.type");
+        System.clearProperty("dataverse.files.globusm.managed");
+        System.clearProperty("dataverse.files.globusm.base-store");
+        System.clearProperty("dataverse.files.globusm.label");
+        System.clearProperty("dataverse.files.globusr.type");
+        System.clearProperty("dataverse.files.globusr.base-store");
+        System.clearProperty("dataverse.files.globusr.managed");
+        System.clearProperty("dataverse.files.globusm.label");
+        System.clearProperty(
+                "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS);
+        System.clearProperty("dataverse.files.globusr.remote-store-name");
+    }
+
+    @Test
+    void testGlobusOverlayIdentifiers() throws IOException {
+        assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusm", mDatafile.getStorageIdentifier()));
+        assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusr", rDatafile.getStorageIdentifier()));
+        assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusm", "globusr://localid//../of/the/hill"));
+        assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusr",
+                rDatafile.getStorageIdentifier().replace("hdc1", "")));
+
+        // We can read the storageIdentifier and get the driver
+        assertTrue(mDatafile.getStorageIdentifier()
+                .startsWith(DataAccess.getStorageDriverFromIdentifier(mDatafile.getStorageIdentifier())));
+        assertTrue(rDatafile.getStorageIdentifier()
+                .startsWith(DataAccess.getStorageDriverFromIdentifier(rDatafile.getStorageIdentifier())));
+
+        // We can get the driver type from it's ID
+        assertTrue(DataAccess.getDriverType("globusm").equals(System.getProperty("dataverse.files.globusm.type")));
+        assertTrue(DataAccess.getDriverType("globusr").equals(System.getProperty("dataverse.files.globusr.type")));
+
+        // When we get a StorageIO for the file, it is the right type
+        StorageIO<DataFile> mStorageIO = DataAccess.getStorageIO(mDatafile);
+        assertTrue(mStorageIO instanceof GlobusOverlayAccessIO);
+        StorageIO<DataFile> rStorageIO = DataAccess.getStorageIO(rDatafile);
+        assertTrue(rStorageIO instanceof GlobusOverlayAccessIO);
+
+        // When we use it, we can get properties like the remote store name
+        assertTrue(mStorageIO.getRemoteStoreName()
+                .equals(System.getProperty("dataverse.files.globusm.remote-store-name")));
+        assertTrue(rStorageIO.getRemoteStoreName()
+                .equals(System.getProperty("dataverse.files.globusr.remote-store-name")));
+
+        // Storage Locations are correct
+        String mLocation = mStorageIO.getStorageLocation();
+        assertEquals("globusm:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage()
+                + "/" + baseStoreId1, mLocation);
+        String rLocation = rStorageIO.getStorageLocation();
+        assertEquals("globusr://" + baseStoreId2 + "//" + logoPath, rLocation);
+
+        // If we ask for the path for an aux file, it is correct
+        System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority,
+                identifier, baseStoreId1 + ".auxobject").toString());
+        System.out.println(mStorageIO.getAuxObjectAsPath("auxobject").toString());
+        assertTrue(Paths.get(System.getProperty("dataverse.files.base.directory", "/tmp/files"), authority, identifier,
+                baseStoreId1 + ".auxobject").equals(mStorageIO.getAuxObjectAsPath("auxobject")));
+        assertTrue(Paths.get(System.getProperty("dataverse.files.base.directory", "/tmp/files"), authority, identifier,
+                baseStoreId2 + ".auxobject").equals(rStorageIO.getAuxObjectAsPath("auxobject")));
+    }
+}

From 865c9feb4230a0a3bc9880cb6088a563b3fe21fc Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 6 Dec 2023 13:53:35 -0500
Subject: [PATCH 362/546] getConfig tests

---
 .../iq/dataverse/dataaccess/StorageIOTest.java       | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java
index 2ed9d18036d..84a241b90f6 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java
@@ -243,4 +243,16 @@ public void testGenerateVariableHeader() {
         assertEquals("Random	Random\n", instance.generateVariableHeader(dvs));
         assertEquals(null, instance.generateVariableHeader(null));
     }
+    
+    @Test
+    public void testGetConfigParam() {
+        System.setProperty("dataverse.files.globus.type", "globus");
+    assertEquals("globus", StorageIO.getConfigParamForDriver("globus", StorageIO.TYPE));
+    System.clearProperty("dataverse.files.globus.type");
+    }
+    
+    @Test
+    public void testGetConfigParamWithDefault() {
+    assertEquals(DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER, StorageIO.getConfigParamForDriver("globus", AbstractRemoteOverlayAccessIO.BASE_STORE, DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER));
+    }
 }

From cb1beaae490126c2274219dfcb4cae56094b096a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 6 Dec 2023 14:11:15 -0500
Subject: [PATCH 363/546] finish changing minio secret key #6783

This should have been part of 811d79a7
---
 docker-compose-dev.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index e68215d53d2..5265a6b7c2d 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -42,7 +42,7 @@ services:
         -Ddataverse.files.minio1.upload-redirect=false
         -Ddataverse.files.minio1.download-redirect=false
         -Ddataverse.files.minio1.access-key=4cc355_k3y
-        -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k35
+        -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y
     ports:
       - "8080:8080" # HTTP (Dataverse Application)
       - "4848:4848" # HTTP (Payara Admin Console)

From 5b7a560a380db12d083e82a19a865eb79559e0a4 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 6 Dec 2023 14:41:31 -0500
Subject: [PATCH 364/546] refactor, test for getFileMap

---
 .../harvard/iq/dataverse/api/Datasets.java    |   3 +-
 .../AbstractRemoteOverlayAccessIO.java        |   2 +-
 .../dataverse/globus/GlobusServiceBean.java   | 134 +++++++++---------
 .../iq/dataverse/globus/GlobusUtil.java       |  33 +++++
 .../dataaccess/GlobusOverlayAccessIOTest.java |   1 -
 .../iq/dataverse/globus/GlobusUtilTest.java   |  88 ++++++++++++
 6 files changed, 190 insertions(+), 71 deletions(-)
 create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java
 create mode 100644 src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 939ebf1dcd4..b3bfc476423 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -110,6 +110,7 @@
 import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType;
 
 import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
+import edu.harvard.iq.dataverse.globus.GlobusUtil;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -3996,7 +3997,7 @@ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @Pat
             }
         }
         // Allowed to download all requested files
-        JsonObject files = globusService.getFilesMap(dataFiles, dataset);
+        JsonObject files = GlobusUtil.getFilesMap(dataFiles, dataset);
         if (GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) {
             // If managed, give the principal read permissions
             int status = globusService.setPermissionForDownload(dataset, body.getString("principal"));
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
index 8d058b7c9e3..6c26502acfa 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
@@ -42,7 +42,7 @@
 public abstract class AbstractRemoteOverlayAccessIO<T extends DvObject> extends StorageIO<T> {
 
     protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
-    protected static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths";
+    public static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths";
     static final String BASE_STORE = "base-store";
     protected static final String SECRET_KEY = "secret-key";
     static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes";
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 37959188857..8cc8e491416 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -159,9 +159,11 @@ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger
         }
     }
 
-    /** Request read/write access for the specified principal and generate a list of accessible paths for new files for the specified dataset.
+    /**
+     * Request read/write access for the specified principal and generate a list of
+     * accessible paths for new files for the specified dataset.
      * 
-     * @param principal - the id of the Globus principal doing the transfer
+     * @param principal     - the id of the Globus principal doing the transfer
      * @param dataset
      * @param numberOfPaths - how many files are to be transferred
      * @return
@@ -230,10 +232,15 @@ private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissi
         }
     }
 
-    /** Given an array of remote files to be referenced in the dataset, create a set of valid storage identifiers and return a map of the remote file paths to storage identifiers.
+    /**
+     * Given an array of remote files to be referenced in the dataset, create a set
+     * of valid storage identifiers and return a map of the remote file paths to
+     * storage identifiers.
      * 
      * @param dataset
-     * @param referencedFiles - a JSON array of remote files to be referenced in the dataset - each should be a string with the <Globus endpoint>/path/to/file
+     * @param referencedFiles - a JSON array of remote files to be referenced in the
+     *                        dataset - each should be a string with the <Globus
+     *                        endpoint>/path/to/file
      * @return - a map of supplied paths to valid storage identifiers
      */
     public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) {
@@ -262,15 +269,17 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref
         return fileMap.build();
     }
 
-    
-    /** A cache of temporary permission requests - for upload (rw) and download (r) access.
-     * When a temporary permission request is created, it is added to the cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, the permission will be revoked/deleted.
-     * (If a transfer has been started, the permission will not be revoked/deleted until the transfer is complete. This is handled in other methods.)
+    /**
+     * A cache of temporary permission requests - for upload (rw) and download (r)
+     * access. When a temporary permission request is created, it is added to the
+     * cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started,
+     * the permission will be revoked/deleted. (If a transfer has been started, the
+     * permission will not be revoked/deleted until the transfer is complete. This
+     * is handled in other methods.)
      */
     // Single cache of open rules/permission requests
     private final Cache<String, Long> rulesCache = Caffeine.newBuilder()
-            .expireAfterWrite(
-                    Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
+            .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
             .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> {
                 // Delete rules that expire
                 logger.fine("Rule " + ruleId + " expired");
@@ -280,20 +289,24 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref
 
             .build();
 
-    //Convenience method to add a temporary permission request to the cache - allows logging of temporary permission requests
+    // Convenience method to add a temporary permission request to the cache -
+    // allows logging of temporary permission requests
     private void monitorTemporaryPermissions(String ruleId, long datasetId) {
         logger.fine("Adding rule " + ruleId + " for dataset " + datasetId);
         rulesCache.put(ruleId, datasetId);
     }
 
-/** Call the Globus API to get info about the transfer.
- * 
- * @param accessToken
- * @param taskId - the Globus task id supplied by the user
- * @param globusLogger - the transaction-specific logger to use (separate log files are created in general, some calls may use the class logger)
- * @return
- * @throws MalformedURLException
- */
+    /**
+     * Call the Globus API to get info about the transfer.
+     * 
+     * @param accessToken
+     * @param taskId       - the Globus task id supplied by the user
+     * @param globusLogger - the transaction-specific logger to use (separate log
+     *                     files are created in general, some calls may use the
+     *                     class logger)
+     * @return
+     * @throws MalformedURLException
+     */
     public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException {
 
         URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId);
@@ -313,9 +326,12 @@ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger
         return task;
     }
 
-    /** Globus call to get an access token for the user using the long-term token we hold.
+    /**
+     * Globus call to get an access token for the user using the long-term token we
+     * hold.
      * 
-     * @param globusBasicToken - the base64 encoded Globus Basic token comprised of the <Globus user id>:<key>
+     * @param globusBasicToken - the base64 encoded Globus Basic token comprised of
+     *                         the <Globus user id>:<key>
      * @return - a valid Globus access token
      */
     public static AccessToken getClientToken(String globusBasicToken) {
@@ -433,7 +449,6 @@ static class MakeRequestResponse {
 
     }
 
-
     /**
      * Cache of open download Requests This cache keeps track of the set of files
      * selected for transfer out (download) via Globus. It is a means of
@@ -480,10 +495,11 @@ public String getGlobusAppUrlForDataset(Dataset d) {
         return getGlobusAppUrlForDataset(d, true, null);
     }
 
-    /** Generated the App URl for upload (in) or download (out)
+    /**
+     * Generated the App URl for upload (in) or download (out)
      * 
-     * @param d - the dataset involved
-     * @param upload - boolean, true for upload, false for download
+     * @param d         - the dataset involved
+     * @param upload    - boolean, true for upload, false for download
      * @param dataFiles - a list of the DataFiles to be downloaded
      * @return
      */
@@ -516,7 +532,7 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<DataFile
                     + "/globusUploadParameters?locale=" + localeCode;
         } else {
             // Download
-            JsonObject files = getFilesMap(dataFiles, d);
+            JsonObject files = GlobusUtil.getFilesMap(dataFiles, d);
 
             String downloadId = UUID.randomUUID().toString();
             downloadCache.put(downloadId, files);
@@ -540,27 +556,8 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List<DataFile
         return finalUrl;
     }
 
-    public JsonObject getFilesMap(List<DataFile> dataFiles, Dataset d) {
-        JsonObjectBuilder filesBuilder = Json.createObjectBuilder();
-        for (DataFile df : dataFiles) {
-            String storageId = df.getStorageIdentifier();
-            String[] parts = DataAccess.getDriverIdAndStorageLocation(DataAccess.getLocationFromStorageId(storageId, d));
-            String driverId =  parts[0];
-            String fileLocation = parts[1];
-            if (GlobusAccessibleStore.isDataverseManaged(driverId)) {
-                String endpointWithBasePath = GlobusAccessibleStore.getTransferEnpointWithPath(driverId);
-                fileLocation = endpointWithBasePath + "/" + fileLocation;
-            } else {
-                fileLocation = storageId.substring(storageId.lastIndexOf("//") + 2);
-            }
-            filesBuilder.add(df.getId().toString(), fileLocation);
-        }
-        return filesBuilder.build();
-    }
-
     private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List<DataFile> downloadDFList) {
         return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList));
-
     }
 
     @Asynchronous
@@ -608,8 +605,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                 rulesCache.invalidate(ruleId);
             }
         }
-        
-        //Wait before first check
+
+        // Wait before first check
         Thread.sleep(5000);
         // globus task status check
         task = globusStatusCheck(endpoint, taskIdentifier, globusLogger);
@@ -907,8 +904,8 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
         }
         task = globusStatusCheck(endpoint, taskIdentifier, globusLogger);
         String taskStatus = getTaskStatus(task);
-        
-        //Transfer is done (success or failure) so delete the rule
+
+        // Transfer is done (success or failure) so delete the rule
         if (ruleId != null) {
             logger.info("Deleting: rule: " + ruleId);
             deletePermission(ruleId, dataset, globusLogger);
@@ -1150,13 +1147,14 @@ private GlobusEndpoint getGlobusEndpoint(DvObject dvObject) {
 
         return endpoint;
     }
-    
+
     // This helper method is called from the Download terms/guestbook/etc. popup,
     // when the user clicks the "ok" button. We use it, instead of calling
     // downloadServiceBean directly, in order to differentiate between single
     // file downloads and multiple (batch) downloads - since both use the same
     // terms/etc. popup.
-    public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, boolean doNotSaveGuestbookResponse) {
+    public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse,
+            boolean doNotSaveGuestbookResponse) {
         PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()");
         guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD);
 
@@ -1170,7 +1168,7 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse,
             apiToken = new ApiToken();
             apiToken.setTokenString(privUrl.getToken());
         }
-        
+
         DataFile df = guestbookResponse.getDataFile();
         if (df != null) {
             logger.fine("Single datafile case for writeGuestbookAndStartTransfer");
@@ -1179,35 +1177,35 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse,
             if (!doNotSaveGuestbookResponse) {
                 fileDownloadService.writeGuestbookResponseRecord(guestbookResponse);
             }
-            PrimeFaces.current()
-                    .executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList));
+            PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList));
         } else {
-            //Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload
+            // Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload
             List<String> list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(",")));
             List<DataFile> selectedFiles = new ArrayList<DataFile>();
             for (String idAsString : list) {
                 try {
                     Long fileId = Long.parseLong(idAsString);
-                // If we need to create a GuestBookResponse record, we have to 
-                // look up the DataFile object for this file: 
-                if (!doNotSaveGuestbookResponse) {
-                    df = dataFileService.findCheapAndEasy(fileId);
-                    guestbookResponse.setDataFile(df);
-                    fileDownloadService.writeGuestbookResponseRecord(guestbookResponse);
-                    selectedFiles.add(df);
-                }
+                    // If we need to create a GuestBookResponse record, we have to
+                    // look up the DataFile object for this file:
+                    if (!doNotSaveGuestbookResponse) {
+                        df = dataFileService.findCheapAndEasy(fileId);
+                        guestbookResponse.setDataFile(df);
+                        fileDownloadService.writeGuestbookResponseRecord(guestbookResponse);
+                        selectedFiles.add(df);
+                    }
                 } catch (NumberFormatException nfe) {
-                    logger.warning("A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + idAsString);
+                    logger.warning(
+                            "A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: "
+                                    + idAsString);
                     return;
                 }
 
             }
             if (!selectedFiles.isEmpty()) {
-                //Use dataset from one file - files should all be from the same dataset
-                PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken,
-                        selectedFiles));
+                // Use dataset from one file - files should all be from the same dataset
+                PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, selectedFiles));
             }
         }
-     }
+    }
 
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java
new file mode 100644
index 00000000000..92cf8ac7704
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java
@@ -0,0 +1,33 @@
+package edu.harvard.iq.dataverse.globus;
+
+import java.util.List;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
+import jakarta.json.Json;
+import jakarta.json.JsonObject;
+import jakarta.json.JsonObjectBuilder;
+
+public class GlobusUtil {
+
+    public static JsonObject getFilesMap(List<DataFile> dataFiles, Dataset d) {
+        JsonObjectBuilder filesBuilder = Json.createObjectBuilder();
+        for (DataFile df : dataFiles) {
+            String storageId = df.getStorageIdentifier();
+            String[] parts = DataAccess
+                    .getDriverIdAndStorageLocation(DataAccess.getLocationFromStorageId(storageId, d));
+            String driverId = parts[0];
+            String fileLocation = parts[1];
+            if (GlobusAccessibleStore.isDataverseManaged(driverId)) {
+                String endpointWithBasePath = GlobusAccessibleStore.getTransferEnpointWithPath(driverId);
+                fileLocation = endpointWithBasePath + "/" + fileLocation;
+            } else {
+                fileLocation = storageId.substring(storageId.lastIndexOf("//") + 2);
+            }
+            filesBuilder.add(df.getId().toString(), fileLocation);
+        }
+        return filesBuilder.build();
+    }
+}
\ No newline at end of file
diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
index 792a9974076..856d71d7dc0 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
@@ -6,7 +6,6 @@
 import edu.harvard.iq.dataverse.DOIServiceBean;
 import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.Dataset;
-import edu.harvard.iq.dataverse.DvObject;
 import edu.harvard.iq.dataverse.GlobalId;
 import edu.harvard.iq.dataverse.mocks.MocksFactory;
 import org.junit.jupiter.api.AfterEach;
diff --git a/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java
new file mode 100644
index 00000000000..56f8731b9c8
--- /dev/null
+++ b/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java
@@ -0,0 +1,88 @@
+package edu.harvard.iq.dataverse.globus;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.mockito.Mockito.mock;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+
+import edu.harvard.iq.dataverse.DOIServiceBean;
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.GlobalId;
+import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO;
+import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
+import edu.harvard.iq.dataverse.mocks.MocksFactory;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
+import jakarta.json.JsonObject;
+
+public class GlobusUtilTest {
+
+    private Dataset dataset;
+    private DataFile mDatafile;
+    private DataFile rDatafile;
+    private String baseStoreId1 = "182ad2bda2f-c3508e719076";
+    private String baseStoreId2 = "182ad2bda2f-c3508e719077";
+    private String logoPath = "d7c42580-6538-4605-9ad8-116a61982644/hdc1/image002.mrc";
+    private String authority = "10.5072";
+    private String identifier = "F2ABCDEF";
+
+    @BeforeEach
+    public void setUp() {
+
+        // Managed Globus Store
+
+        // Nonsense endpoint/paths
+        System.setProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH,
+                "d7c42580-6538-4605-9ad8-116a61982644/hdc1");
+        System.setProperty("dataverse.files.globusm.managed", "true");
+
+        // Remote Store
+        System.setProperty("dataverse.files.globusr.managed", "false");
+        System.setProperty(
+                "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS,
+                "d7c42580-6538-4605-9ad8-116a61982644/hdc1");
+
+        dataset = MocksFactory.makeDataset();
+        dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/",
+                DOIServiceBean.DOI_RESOLVER_URL, null));
+        mDatafile = MocksFactory.makeDataFile();
+        mDatafile.setOwner(dataset);
+        mDatafile.setStorageIdentifier("globusm://" + baseStoreId1);
+
+        rDatafile = MocksFactory.makeDataFile();
+        rDatafile.setOwner(dataset);
+        rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath);
+        List<DataFile> files = new ArrayList<DataFile>();
+        files.add(mDatafile);
+        files.add(rDatafile);
+        dataset.setFiles(files);
+    }
+
+    @AfterEach
+    public void tearDown() {
+        System.clearProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH);
+        System.clearProperty("dataverse.files.globusm.managed");
+        System.clearProperty("dataverse.files.globusr.managed");
+        System.clearProperty(
+                "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS);
+    }
+
+    
+    @Test
+    public void testgetFilesMap() {
+        
+        JsonObject jo = GlobusUtil.getFilesMap(dataset.getFiles(), dataset);
+        System.out.println(JsonUtil.prettyPrint(jo));
+        assertEquals(jo.getString(Long.toString(mDatafile.getId())), "d7c42580-6538-4605-9ad8-116a61982644/hdc1/10.5072/F2ABCDEF/182ad2bda2f-c3508e719076");
+        assertEquals(jo.getString(Long.toString(rDatafile.getId())), logoPath);
+    }
+}

From 4ba629d643678acdd0b649128b8a76a805ee6906 Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Wed, 6 Dec 2023 15:28:32 -0500
Subject: [PATCH 365/546] adding review comment changes

---
 doc/release-notes/6.1-release-notes.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index d0fe895565c..38b99e6580b 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -11,6 +11,10 @@ This release contains major upgrades to core components. Detailed upgrade instru
 
 ## Detailed Release Highlights, New Features and Use Case Scenarios
 
+### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started
+Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default).
+  The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions.
+
 ### Dataverse installation can be now be configured to allow out-of-band upload
 In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI.
 By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
@@ -142,7 +146,7 @@ life easier during instance setups and reconfiguration. You no longer need to ge
 necessary JSON file.
 
 ### Adding PKCE Support
-
+[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273)
 Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable
 support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.)
 

From 93d9b35a07625622523a4490eee8f55d617defec Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 6 Dec 2023 16:32:17 -0500
Subject: [PATCH 366/546] future test code - requires config of Globus stores

---
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 53 +++++++++++++++++++
 .../edu/harvard/iq/dataverse/api/UtilIT.java  | 32 +++++++++++
 .../dataaccess/GlobusOverlayAccessIOTest.java | 34 ++++++------
 3 files changed, 104 insertions(+), 15 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 6a746b7c5b5..928574eb82b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -42,6 +42,9 @@
 
 import edu.harvard.iq.dataverse.authorization.DataverseRole;
 import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
+import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO;
+import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIOTest;
+import edu.harvard.iq.dataverse.dataaccess.StorageIO;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 
 import org.apache.commons.lang3.StringUtils;
@@ -135,6 +138,7 @@ public static void setUpClass() {
                 .statusCode(200);
          */
     }
+    
 
     @AfterAll
     public static void afterClass() {
@@ -4175,4 +4179,53 @@ public void testGetUserPermissionsOnDataset() {
         Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getUserPermissionsOnDataset("testInvalidId", apiToken);
         getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());
     }
+    
+    //Requires that a Globus remote store be set up as with the parameters in the GlobusOverlayAccessIOTest class
+    //Tests whether the API call succeeds and has some of the expected parameters
+    @Test
+    @Disabled
+    public void testGetGlobusUploadParameters() {
+        //Creates managed and remote Globus stores
+        GlobusOverlayAccessIOTest.setUp();
+
+        Response createUser = UtilIT.createRandomUser();
+        createUser.then().assertThat().statusCode(OK.getStatusCode());
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+        String username = UtilIT.getUsernameFromResponse(createUser);
+
+        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode());
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
+        createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode());
+        int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
+        
+        Response makeSuperUser = UtilIT.makeSuperUser(username);
+        assertEquals(200, makeSuperUser.getStatusCode());
+        
+        Response setDriver = UtilIT.setDatasetStorageDriver(datasetId, System.getProperty("dataverse.files.globusr.label"), apiToken);
+        assertEquals(200, setDriver.getStatusCode());
+        
+        Response getUploadParams = UtilIT.getDatasetGlobusUploadParameters(datasetId, "en_us", apiToken);
+        assertEquals(200, getUploadParams.getStatusCode());
+        JsonObject data = JsonUtil.getJsonObject(getUploadParams.getBody().asString());
+        JsonObject queryParams = data.getJsonObject("queryParameters");
+        assertEquals("en_us", queryParams.getString("dvLocale"));
+        assertEquals("false", queryParams.getString("managed"));
+        //Assumes only one reference endpoint with a basepath is configured
+        assertTrue(queryParams.getJsonArray("referenceEndpointsWithPaths").get(0).toString().indexOf(System.getProperty("dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS)) > -1);
+        JsonArray signedUrls = data.getJsonArray("signedUrls");
+        boolean found = false;
+        for (int i = 0; i < signedUrls.size(); i++) {
+            JsonObject signedUrl = signedUrls.getJsonObject(i);
+            if (signedUrl.getString("name").equals("requestGlobusReferencePaths")) {
+                found=true;
+                break;
+            }
+        }
+        assertTrue(found);
+        //Removes managed and remote Globus stores
+        GlobusOverlayAccessIOTest.tearDown();
+    }
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 869e755a183..bd2fe7e6f0b 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -3636,4 +3636,36 @@ static Response downloadTmpFile(String fullyQualifiedPathToFile, String apiToken
                 .get("/api/admin/downloadTmpFile?fullyQualifiedPathToFile=" + fullyQualifiedPathToFile);
     }
 
+    static Response setDatasetStorageDriver(Integer datasetId, String driverLabel, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .body(driverLabel)
+                .put("/api/datasets/" + datasetId + "/storageDriver");
+    }
+    
+    
+    //Globus Store related - not currently used
+    
+    static Response getDatasetGlobusUploadParameters(Integer datasetId, String locale, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .contentType("application/json")
+                .get("/api/datasets/" + datasetId + "/globusUploadParameters?locale=" + locale);
+    }
+    
+    static Response getDatasetGlobusDownloadParameters(Integer datasetId, String locale, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .contentType("application/json")
+                .get("/api/datasets/" + datasetId + "/globusDownloadParameters?locale=" + locale);
+    }
+    
+    static Response requestGlobusDownload(Integer datasetId, JsonObject body, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .body(body)
+                .contentType("application/json")
+                .post("/api/datasets/" + datasetId + "/requestGlobusDownload");
+    }
+
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
index 856d71d7dc0..1c84fa90a9e 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
@@ -8,8 +8,9 @@
 import edu.harvard.iq.dataverse.Dataset;
 import edu.harvard.iq.dataverse.GlobalId;
 import edu.harvard.iq.dataverse.mocks.MocksFactory;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
+
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.ExtendWith;
 import static org.junit.jupiter.api.Assertions.*;
@@ -35,8 +36,8 @@ public class GlobusOverlayAccessIOTest {
     private String authority = "10.5072";
     private String identifier = "F2ABCDEF";
 
-    @BeforeEach
-    public void setUp() {
+    @BeforeAll
+    public static void setUp() {
         // Base Store
         System.setProperty("dataverse.files.base.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
         System.setProperty("dataverse.files.base.label", "default");
@@ -65,20 +66,11 @@ public void setUp() {
                 "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS,
                 "d7c42580-6538-4605-9ad8-116a61982644/hdc1");
         System.setProperty("dataverse.files.globusr.remote-store-name", "DemoDataCorp");
-        dataset = MocksFactory.makeDataset();
-        dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/",
-                DOIServiceBean.DOI_RESOLVER_URL, null));
-        mDatafile = MocksFactory.makeDataFile();
-        mDatafile.setOwner(dataset);
-        mDatafile.setStorageIdentifier("globusm://" + baseStoreId1);
 
-        rDatafile = MocksFactory.makeDataFile();
-        rDatafile.setOwner(dataset);
-        rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath);
     }
 
-    @AfterEach
-    public void tearDown() {
+    @AfterAll
+    public static void tearDown() {
         System.clearProperty("dataverse.files.base.type");
         System.clearProperty("dataverse.files.base.label");
         System.clearProperty("dataverse.files.base.directory");
@@ -100,6 +92,18 @@ public void tearDown() {
 
     @Test
     void testGlobusOverlayIdentifiers() throws IOException {
+
+        dataset = MocksFactory.makeDataset();
+        dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/",
+                DOIServiceBean.DOI_RESOLVER_URL, null));
+        mDatafile = MocksFactory.makeDataFile();
+        mDatafile.setOwner(dataset);
+        mDatafile.setStorageIdentifier("globusm://" + baseStoreId1);
+
+        rDatafile = MocksFactory.makeDataFile();
+        rDatafile.setOwner(dataset);
+        rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath);
+
         assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusm", mDatafile.getStorageIdentifier()));
         assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusr", rDatafile.getStorageIdentifier()));
         assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusm", "globusr://localid//../of/the/hill"));

From 12b7c306dd31ebd987a2bae5f36dae27e4f0ba56 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 6 Dec 2023 16:32:24 -0500
Subject: [PATCH 367/546] typo

---
 .../iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
index 1c84fa90a9e..ad980aa28cd 100644
--- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java
@@ -61,7 +61,7 @@ public static void setUp() {
         System.setProperty("dataverse.files.globusr.type", "globus");
         System.setProperty("dataverse.files.globusr.base-store", "base");
         System.setProperty("dataverse.files.globusr.managed", "false");
-        System.setProperty("dataverse.files.globusm.label", "globusRemote");
+        System.setProperty("dataverse.files.globusr.label", "globusRemote");
         System.setProperty(
                 "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS,
                 "d7c42580-6538-4605-9ad8-116a61982644/hdc1");

From 1426dfb6fc52ace869e3c822a732d5b408ca7c4c Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 6 Dec 2023 16:47:54 -0500
Subject: [PATCH 368/546] add missing setting to release notes, add a todo to
 use two delays

---
 doc/release-notes/10162-globus-support.md                  | 7 ++++++-
 .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md
index d64e72b70a1..7bc3990f840 100644
--- a/doc/release-notes/10162-globus-support.md
+++ b/doc/release-notes/10162-globus-support.md
@@ -1,4 +1,6 @@
-Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support)
+Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, 
+and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. 
+Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support)
 - Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incomatibilities.
 - The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model
 - Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus)
@@ -10,5 +12,8 @@ Backward Incompatibilities:
 
 New JVM Options:
 - A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores).
+- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin.
+
+
 
 Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 8cc8e491416..d0660a55a6a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -277,6 +277,8 @@ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray ref
      * permission will not be revoked/deleted until the transfer is complete. This
      * is handled in other methods.)
      */
+    // ToDo - nominally this doesn't need to be as long as the allowed time for the
+    // downloadCache so there could be two separate settings.
     // Single cache of open rules/permission requests
     private final Cache<String, Long> rulesCache = Caffeine.newBuilder()
             .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))

From d2427bd39046f104c95e27d1869d1665b969724f Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 7 Dec 2023 09:49:52 -0500
Subject: [PATCH 369/546] #10151 incorporate recent additions

---
 doc/release-notes/6.1-release-notes.md      | 22 +++++++++++++++++++++
 doc/release-notes/8549-collection-quotas.md |  3 ---
 doc/release-notes/8760-bagit.md             | 15 --------------
 3 files changed, 22 insertions(+), 18 deletions(-)
 delete mode 100644 doc/release-notes/8549-collection-quotas.md
 delete mode 100644 doc/release-notes/8760-bagit.md

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 38b99e6580b..38a7a1064e6 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -30,6 +30,28 @@ With the upload-out-of-band option enabled, it is also possible for file upload
 Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array.
 For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"]
 
+### Collection Storage Size Quota Support
+-This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS.
+Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1
+
+### BagIT Export Configurations Updated
+For BagIT export, it is now possible to configure the following information in bag-info.txt...
+
+Source-Organization: Harvard Dataverse
+Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA
+Organization-Email: support@dataverse.harvard.edu
+
+... using new JVM/MPCONFIG options:
+
+- dataverse.bagit.sourceorg.name
+- dataverse.bagit.sourceorg.address
+- dataverse.bagit.sourceorg.email
+
+Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.
+
+For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt
+
+
 ### Improvements in the dataset versions API
 - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions
 - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output
diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md
deleted file mode 100644
index b3635d0c5a1..00000000000
--- a/doc/release-notes/8549-collection-quotas.md
+++ /dev/null
@@ -1,3 +0,0 @@
-This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS.
-Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1
-
diff --git a/doc/release-notes/8760-bagit.md b/doc/release-notes/8760-bagit.md
deleted file mode 100644
index 30601857309..00000000000
--- a/doc/release-notes/8760-bagit.md
+++ /dev/null
@@ -1,15 +0,0 @@
-For BagIT export, it is now possible to configure the following information in bag-info.txt...
-
-Source-Organization: Harvard Dataverse
-Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA
-Organization-Email: support@dataverse.harvard.edu
-
-... using new JVM/MPCONFIG options:
-
-- dataverse.bagit.sourceorg.name
-- dataverse.bagit.sourceorg.address
-- dataverse.bagit.sourceorg.email
-
-Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.
-
-For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt

From 05c53066ea26c809b6376051ff336f11a4bcee9d Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 7 Dec 2023 10:29:47 -0500
Subject: [PATCH 370/546] mention download tmp file API #10151

---
 doc/release-notes/6.1-release-notes.md      | 1 +
 doc/release-notes/8760-download-tmp-file.md | 3 ---
 2 files changed, 1 insertion(+), 3 deletions(-)
 delete mode 100644 doc/release-notes/8760-download-tmp-file.md

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 38a7a1064e6..1b4e884cded 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -73,6 +73,7 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/
 - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths.
 - getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection.
 - validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection.
+- downloadTmpFile (/api/admin/downloadTmpFile): For testing purposes, allows files to be downloaded from /tmp.
 
 ### Extended the existing endpoints:
 - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files.
diff --git a/doc/release-notes/8760-download-tmp-file.md b/doc/release-notes/8760-download-tmp-file.md
deleted file mode 100644
index 7623a91ac9a..00000000000
--- a/doc/release-notes/8760-download-tmp-file.md
+++ /dev/null
@@ -1,3 +0,0 @@
-A new API has been added for testing purposes that allows files to be downloaded from /tmp.
-
-See

From 97c33218fa7224c544657e72f52c27d9cd8951bf Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 7 Dec 2023 10:30:23 -0500
Subject: [PATCH 371/546] remove duplicate "new" heading in API changelog
 #10151

---
 doc/sphinx-guides/source/api/changelog.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index d2908533a14..910134e14f3 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -12,9 +12,6 @@ New
 ~~~
 - **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`.
 - **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`.
-
-New
-~~~
 - **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`.
 - **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`.
 

From 3a13ac8c56385ed2cc82bcc9db4f57fea7688a67 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 7 Dec 2023 10:34:21 -0500
Subject: [PATCH 372/546] #10151 add upgrade instructions

---
 doc/release-notes/6.1-release-notes.md        | 81 +++++++++++++++++++
 .../9002_allow_direct_upload_setting.md       |  5 --
 2 files changed, 81 insertions(+), 5 deletions(-)
 delete mode 100644 doc/release-notes/9002_allow_direct_upload_setting.md

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 38a7a1064e6..d5972338124 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -51,6 +51,13 @@ Previously, customization was possible by editing `Bundle.properties` but this i
 
 For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt
 
+### Direct Upload setting added
+A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files.<id>.upload-out-of-band` JVM option to `true`.
+
+By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
+
+With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store.
+
 
 ### Improvements in the dataset versions API
 - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions
@@ -138,6 +145,7 @@ to generate updated versions.
 See also #10060.
 
 - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
+- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed.
 
 ### Solr Improvements
 - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
@@ -214,6 +222,79 @@ Upgrading requires a maintenance window and downtime. Please plan ahead, create
 
 These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0.
 
+0\. These instructions assume that you are upgrading from 6.0. If you are running an earlier version, the only safe way to upgrade is to progress through the upgrades to all the releases in between before attempting the upgrade to 5.14. 
+
+If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user.
+
+In the following commands we assume that Payara 6 is installed in `/usr/local/payara6`. If not, adjust as needed.
+
+`export PAYARA=/usr/local/payara6`
+
+(or `setenv PAYARA /usr/local/payara6` if you are using a `csh`-like shell)
+
+1\. Undeploy the previous version.
+
+- `$PAYARA/bin/asadmin undeploy dataverse-6.0`
+
+2\. Stop Payara and remove the generated directory
+
+- `service payara stop`
+- `rm -rf $PAYARA/glassfish/domains/domain1/generated`
+
+3\. Start Payara
+
+- `service payara start`
+
+4\. Deploy this version.
+
+- `$PAYARA/bin/asadmin deploy dataverse-6.1.war`
+
+5\. Restart Payara
+
+- `service payara stop`
+- `service payara start`
+
+6\. Update Geospatial Metadata Block (to improve validation of bounding box values)
+
+- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv`
+- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv`
+
+6a\. Update Citation Metadata Block (to make Alternative Title repeatable)
+
+- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv`
+
+7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with  custom metadata blocks  (7b).
+
+7a\. For installations without custom or experimental metadata blocks:
+
+- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script))
+
+- Replace schema.xml
+
+  - `cp /tmp/dvinstall/schema.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf`
+
+- Start Solr instance (usually `service solr start`, depending on Solr/OS)
+
+7b\. For installations with custom or experimental metadata blocks:
+
+- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script))
+
+- There are 2 ways to regenerate the schema: Either by collecting the output of the Dataverse schema API and feeding it to the `update-fields.sh` script that we supply, as in the example below (modify the command lines as needed):
+```
+	wget https://raw.githubusercontent.com/IQSS/dataverse/master/conf/solr/9.3.0/update-fields.sh
+	chmod +x update-fields.sh
+	curl "http://localhost:8080/api/admin/index/solr/schema" | ./update-fields.sh /usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml
+```
+OR, alternatively, you can edit the following line in your schema.xml by hand as follows (to indicate that alternative title is now `multiValued="true"`):
+```
+     <field name="alternativeTitle" type="text_en" multiValued="true" stored="true" indexed="true"/>
+```
+     
+- Restart Solr instance (usually `service solr restart` depending on solr/OS)
+
+8\. Run ReExportAll to update dataset metadata exports. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/5.14/admin/metadataexport.html#batch-exports-through-the-api).
+
+
 ## Backward Incompatibilities
 - Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version
 - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility,
diff --git a/doc/release-notes/9002_allow_direct_upload_setting.md b/doc/release-notes/9002_allow_direct_upload_setting.md
deleted file mode 100644
index 1e76ed4ad47..00000000000
--- a/doc/release-notes/9002_allow_direct_upload_setting.md
+++ /dev/null
@@ -1,5 +0,0 @@
-A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files.<id>.upload-out-of-band` JVM option to `true`.
-
-By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
-
-With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store.

From a78213633e6f5bf345d1aedf4328eee5ee231ffb Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 7 Dec 2023 10:43:40 -0500
Subject: [PATCH 373/546] #10151 remove notes previously incorporated

---
 .../9547-validation-for-geospatial-metadata.md     |  9 ---------
 doc/release-notes/9859-ORE and Bag updates.md      | 14 --------------
 2 files changed, 23 deletions(-)
 delete mode 100644 doc/release-notes/9547-validation-for-geospatial-metadata.md
 delete mode 100644 doc/release-notes/9859-ORE and Bag updates.md

diff --git a/doc/release-notes/9547-validation-for-geospatial-metadata.md b/doc/release-notes/9547-validation-for-geospatial-metadata.md
deleted file mode 100644
index a44e1a3732b..00000000000
--- a/doc/release-notes/9547-validation-for-geospatial-metadata.md
+++ /dev/null
@@ -1,9 +0,0 @@
-Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed.
-
-For the "upgrade" steps section:
-
-Update Geospatial Metadata Block
-
-- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv`
-- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv`
-
diff --git a/doc/release-notes/9859-ORE and Bag updates.md b/doc/release-notes/9859-ORE and Bag updates.md
deleted file mode 100644
index dd3ae3bbbe1..00000000000
--- a/doc/release-notes/9859-ORE and Bag updates.md	
+++ /dev/null
@@ -1,14 +0,0 @@
-Dataverse's OAI_ORE Metadata Export format and archival BagIT exports 
-(which include the OAI-ORE metadata export file) have been updated to include 
-information about the dataset version state, e.g. RELEASED or DEACCESSIONED 
-and to indicate which version of Dataverse was used to create the archival Bag.
-As part of the latter, the current OAI_ORE Metadata format has been given a 1.0.0 
-version designation and it is expected that any future changes to the OAI_ORE export
-format will result in a version change and that tools such as DVUploader that can
-recreate datasets from archival Bags will start indicating which version(s) of the 
-OAI_ORE format they can read.
-
-Dataverse installations that have been using archival Bags may wish to update any
-existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse
-[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls)
-to generate updated versions.
\ No newline at end of file

From b517f6e0fca1802faa4455522a72e711963714ba Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 7 Dec 2023 10:53:07 -0500
Subject: [PATCH 374/546] #10151 S3 test notes

---
 doc/release-notes/6.1-release-notes.md | 2 ++
 doc/release-notes/6783-s3-tests.md     | 3 ---
 2 files changed, 2 insertions(+), 3 deletions(-)
 delete mode 100644 doc/release-notes/6783-s3-tests.md

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 9a35a31a734..375717ab9c9 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -163,6 +163,8 @@ For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.ht
 - `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is
 also paving the way to enable manipulating JVM options during end-to-end tests on remote ends.
 - As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`.
+- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT.
+- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running.
 
 ## OpenID Connect Authentication Provider Improvements
 
diff --git a/doc/release-notes/6783-s3-tests.md b/doc/release-notes/6783-s3-tests.md
deleted file mode 100644
index 1b9bb400cc6..00000000000
--- a/doc/release-notes/6783-s3-tests.md
+++ /dev/null
@@ -1,3 +0,0 @@
-Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT.
-
-In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running.

From 07a8659b60acdb766fb5a4742cf4ac4537e34615 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Fri, 8 Dec 2023 14:24:24 -0500
Subject: [PATCH 375/546] #10151 remove duplicate release note

out of band setting previously added
---
 doc/release-notes/6.1-release-notes.md | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 375717ab9c9..b6bb7d8b806 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -51,14 +51,6 @@ Previously, customization was possible by editing `Bundle.properties` but this i
 
 For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt
 
-### Direct Upload setting added
-A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files.<id>.upload-out-of-band` JVM option to `true`.
-
-By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
-
-With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store.
-
-
 ### Improvements in the dataset versions API
 - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions
 - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output

From ed5b0dbde90fd4b8592aa2bdce7ae205482063c8 Mon Sep 17 00:00:00 2001
From: qqmyers <jim.myers@computer.org>
Date: Fri, 8 Dec 2023 15:44:18 -0500
Subject: [PATCH 376/546] Apply suggestions from code review

Co-authored-by: Philip Durbin <philipdurbin@gmail.com>
---
 doc/release-notes/10162-globus-support.md                | 2 +-
 doc/sphinx-guides/source/developers/big-data-support.rst | 4 ++--
 doc/sphinx-guides/source/developers/globus-api.rst       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md
index 7bc3990f840..60670b5b101 100644
--- a/doc/release-notes/10162-globus-support.md
+++ b/doc/release-notes/10162-globus-support.md
@@ -1,7 +1,7 @@
 Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, 
 and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. 
 Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support)
-- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incomatibilities.
+- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities.
 - The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model
 - Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus)
 - The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs)
diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst
index fe49f9f6150..8d891e63317 100644
--- a/doc/sphinx-guides/source/developers/big-data-support.rst
+++ b/doc/sphinx-guides/source/developers/big-data-support.rst
@@ -149,7 +149,7 @@ Globus File Transfer
 
 Note: Globus file transfer is still experimental but feedback is welcome! See :ref:`support`.
 
-Users can transfer files via `Globus <ttps://www.globus.org>`_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) 
+Users can transfer files via `Globus <https://www.globus.org>`_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) 
 and a community-developed `dataverse-globus <https://github.com/scholarsportal/dataverse-globus>`_ app has been properly installed and configured.
 
 Globus endpoints can be in a variety of places, from data centers to personal computers. 
@@ -168,7 +168,7 @@ Dataverse-managed endpoints must be Globus 'guest collections' hosted on either
 S3 connector which requires a paid Globus subscription at the host institution). In either case, Dataverse is configured with the Globus credentials of a user account that can manage the endpoint.
 Users will need a Globus account, which can be obtained via their institution or directly from Globus (at no cost).
 
-With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionlity related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionlity normally associated with direct uploads to S3 is available.)
+With the file-system endpoint, Dataverse does not currently have access to the file contents. Thus, functionality related to ingest, previews, fixity hash validation, etc. are not available. (Using the S3-based endpoint, Dataverse has access via S3 and all functionality normally associated with direct uploads to S3 is available.)
 
 For the reference use case, Dataverse must be configured with a list of allowed endpoint/base paths from which files may be referenced. In this case, since Dataverse is not accessing the remote endpoint itself, it does not need Globus credentials. 
 Users will need a Globus account in this case, and the remote endpoint must be configured to allow them access (i.e. be publicly readable, or potentially involving some out-of-band mechanism to request access (that could be described in the dataset's Terms of Use and Access).
diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index 5b2b6982866..37d80d0a6cd 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -71,7 +71,7 @@ The getDatasetMetadata and getFileListing URLs are just signed versions of the s
 If called for a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a
 the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the
 requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are
-describe further below.
+described further below.
 
 The call to set up for a transfer out (download) is similar:
 

From 1d668970df1562c3cbc85d60be2abc55d8a96572 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Fri, 8 Dec 2023 15:56:27 -0500
Subject: [PATCH 377/546] #10151 standard guide links

---
 doc/release-notes/6.1-release-notes.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index b6bb7d8b806..24194a02026 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -49,14 +49,14 @@ Organization-Email: support@dataverse.harvard.edu
 
 Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.
 
-For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt
+For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt
 
 ### Improvements in the dataset versions API
 - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions
 - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output
 - when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files.
 
-This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide.
+This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide.
 
 ### The following API endpoints have been added:
 - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version).
@@ -128,13 +128,13 @@ Dataverse installations that have been using archival Bags may wish to update an
 existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse
 [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls)
 to generate updated versions.
-- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews
+- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews
 - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec.
   - To fix #9952, we surround the license info with `<` and `>`.
   - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information
   - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now.
 
-- We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html
+- We have started maintaining an API changelog: https://guides.dataverse.org/en/6.1/api/changelog.html
 See also #10060.
 
 - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)

From 85206de08acb6a8373199fb0d4eec2768cb6763d Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 8 Dec 2023 15:59:21 -0500
Subject: [PATCH 378/546] simply API changelog to be about breaking changes
 only #10151

---
 doc/release-notes/6.1-release-notes.md     |  2 +-
 doc/sphinx-guides/source/api/changelog.rst | 19 +++++--------------
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 24194a02026..a3b04749d68 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -134,7 +134,7 @@ to generate updated versions.
   - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information
   - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now.
 
-- We have started maintaining an API changelog: https://guides.dataverse.org/en/6.1/api/changelog.html
+- We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html
 See also #10060.
 
 - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index 910134e14f3..20225b99b5c 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -1,5 +1,7 @@
-API Changelog 
-=============
+API Changelog (Breaking Changes)
+================================
+
+This API changelog is experimental and we would love feedback on its usefulness. Its primary purpose is to inform API developers of any breaking changes. (We try not ship any backward incompatible changes, but it happens.) To see a list of new APIs and backward-compatible changes to existing API, please see each version's release notes at https://github.com/IQSS/dataverse/releases
 
 .. contents:: |toctitle|
     :local:
@@ -8,20 +10,9 @@ API Changelog
 v6.1
 ----
 
-New
-~~~
-- **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`.
-- **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`.
-- **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`.
-- **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`.
-
-Changes
-~~~~~~~
-- **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`.
+- The metadata field "Alternative Title" now supports multiple values so you must pass an array rather than a string when populating that field via API. See https://github.com/IQSS/dataverse/pull/9440
 
 v6.0
 ----
 
-Changes
-~~~~~~~
 - **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. See :doc:`dataaccess`.

From 0cd87d167211ee6bc047de3cba3e79acfb520e28 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 8 Dec 2023 16:37:03 -0500
Subject: [PATCH 379/546] address Review comments

---
 .../source/admin/integrations.rst             | 12 ++++++++
 doc/sphinx-guides/source/api/intro.rst        |  4 +++
 .../source/developers/globus-api.rst          |  6 ++--
 .../source/installation/config.rst            |  9 +++---
 .../edu/harvard/iq/dataverse/DatasetPage.java | 29 -------------------
 .../AbstractRemoteOverlayAccessIO.java        |  2 +-
 6 files changed, 25 insertions(+), 37 deletions(-)

diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst
index 9a24cf0715c..db566106b49 100644
--- a/doc/sphinx-guides/source/admin/integrations.rst
+++ b/doc/sphinx-guides/source/admin/integrations.rst
@@ -121,6 +121,18 @@ Its goal is to make the dashboard adjustable for a Dataverse installation's need
 
 The integrations dashboard is currently in development. A preview and more information can be found at: `rdm-integration GitHub repository <https://github.com/libis/rdm-integration>`_
 
+Globus
+++++++
+
+Globus transfer uses an efficient transfer mechanism and has additional features that make it suitable for large files and large numbers of files:
+
+* robust file transfer capable of restarting after network or endpoint failures
+* third-party transfer, which enables a user accessing a Dataverse installation in their desktop browser to initiate transfer of their files from a remote endpoint (i.e. on a local high-performance computing cluster), directly to an S3 store managed by the Dataverse installation
+
+Users can transfer files via `Globus <https://www.globus.org>`_ into and out of datasets, or reference files on a remote Globus endpoint, when their Dataverse installation is configured to use a Globus accessible store(s) 
+and a community-developed `dataverse-globus <https://github.com/scholarsportal/dataverse-globus>`_ app has been properly installed and configured.
+
+
 Embedding Data on Websites
 --------------------------
 
diff --git a/doc/sphinx-guides/source/api/intro.rst b/doc/sphinx-guides/source/api/intro.rst
index 6c61bb8c20d..8eb11798dd7 100755
--- a/doc/sphinx-guides/source/api/intro.rst
+++ b/doc/sphinx-guides/source/api/intro.rst
@@ -187,6 +187,10 @@ Lists of Dataverse APIs
   - Files
   - etc.
 
+- :doc:`/developers/dataset-semantic-metadata-api`: For creating, reading, editing, and deleting dataset metadata using JSON-LD.
+- :doc:`/developers/dataset-migration-api`: For migrating datasets from other repositories while retaining the original persistent identifiers and publication date.
+- :doc:`/developers/s3-direct-upload-api`: For the transfer of larger files/larger numbers of files directly to an S3 bucket managed by Dataverse. 
+- :doc:`/developers/globus-api`: For the Globus transfer of larger files/larger numbers of files directly via Globus endpoints managed by Dataverse or referencing files in remote endpoints. 
 - :doc:`metrics`: For query statistics about usage of a Dataverse installation.
 - :doc:`sword`: For depositing data using a standards-based approach rather than the :doc:`native-api`.
 
diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index 37d80d0a6cd..de9df06a798 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -160,11 +160,11 @@ In the managed case, once a Globus transfer has been initiated a final API call
   export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
   export SERVER_URL=https://demo.dataverse.org
   export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
-  export JSON_DATA="{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \
+  export JSON_DATA='{"taskIdentifier":"3f530302-6c48-11ee-8428-378be0d9c521", \
                     "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \
-                    {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}"
+                    {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}'
 
-  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA""
+  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA"
 
 Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL.
 
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index 4540219fc7c..f6c05a3bde8 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -499,8 +499,8 @@ Logging & Slow Performance
 
 .. _file-storage:
 
-File Storage: Using a Local Filesystem and/or Swift and/or Object Stores and/or Trusted Remote Stores and/or Globus Stores
---------------------------------------------------------------------------------------------------------------------------
+File Storage
+------------
 
 By default, a Dataverse installation stores all data files (files uploaded by end users) on the filesystem at ``/usr/local/payara6/glassfish/domains/domain1/files``. This path can vary based on answers you gave to the installer (see the :ref:`dataverse-installer` section of the Installation Guide) or afterward by reconfiguring the ``dataverse.files.\<id\>.directory`` JVM option described below.
 
@@ -999,7 +999,8 @@ See :doc:`/developers/big-data-support` for additional information on how to use
 In addition to having the type "globus" and requiring a label, Globus Stores share many options with Trusted Remote Stores and options to specify and access a Globus endpoint(s). As with Remote Stores, Globus Stores also use a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.).
 These and other available options are described in the table below.
 
-There are two types of Globus stores
+There are two types of Globus stores:
+
 - managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse
 - remote - where Dataverse references files that remain on trusted remote Globus endpoints
 
@@ -1024,7 +1025,7 @@ Once you have configured a globus store, it is recommended that you install the
     dataverse.files.<id>.globus-token                        <?>                 A Globus token (base64 endcoded <Globus user id>:<Credential> 
                                                                                  for a managed store) - using a microprofile alias is recommended            (none)
     dataverse.files.<id>.reference-endpoints-with-basepaths  <?>                 A comma separated list of *remote* trusted Globus endpoint id/<basePath>s   (none)
-    dataverse.files.<id>.files-not-accessible-by-dataverse   ``true``/``false``  Should be true for S3 Connector-based *managed* stores                      ``false``
+    dataverse.files.<id>.files-not-accessible-by-dataverse   ``true``/``false``  Should be false for S3 Connector-based *managed* stores, true for others    ``false``
     
     =======================================================  ==================  ==========================================================================  ===================
     
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index 9c7d599ba33..b79f387f20b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -5312,36 +5312,7 @@ public boolean isFileAccessRequestMultiButtonEnabled(){
         }
         return false;
     }
-/* These appear to be unused - toDo - delete
-    private Boolean downloadButtonAllEnabled = null;
 
-    public boolean isDownloadAllButtonEnabled() {
-
-        if (downloadButtonAllEnabled == null) {
-            for (FileMetadata fmd : workingVersion.getFileMetadatas()) {
-                if (!this.fileDownloadHelper.canDownloadFile(fmd)) {
-                    downloadButtonAllEnabled = false;
-                    break;
-                }
-            }
-            downloadButtonAllEnabled = true;
-        }
-        return downloadButtonAllEnabled;
-    }
-
-    public boolean isDownloadSelectedButtonEnabled(){
-
-        if( this.selectedFiles == null || this.selectedFiles.isEmpty() ){
-            return false;
-        }
-        for (FileMetadata fmd : this.selectedFiles){
-            if (this.fileDownloadHelper.canDownloadFile(fmd)){
-                return true;
-            }
-        }
-        return false;
-    }
-*/
     public boolean isFileAccessRequestMultiSignUpButtonRequired(){
         if (isSessionUserAuthenticated()){
             return false;
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
index 6c26502acfa..10ff68a56f3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
@@ -50,7 +50,7 @@ public abstract class AbstractRemoteOverlayAccessIO<T extends DvObject> extends
     protected static final String REMOTE_STORE_URL = "remote-store-url";
     
     // Whether Dataverse can access the file bytes
-    //Currently True for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits
+    // Currently False only for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits
     static final String FILES_NOT_ACCESSIBLE_BY_DATAVERSE = "files-not-accessible-by-dataverse";
 
     protected StorageIO<DvObject> baseStore = null;

From 9dd3f9785c6a5c8939bd9f023400f5f10c3ef58d Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Mon, 11 Dec 2023 09:28:16 +0000
Subject: [PATCH 380/546] Added: release notes for #10155

---
 .../10155-datasets-can-download-at-least-one-file.md           | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 doc/release-notes/10155-datasets-can-download-at-least-one-file.md

diff --git a/doc/release-notes/10155-datasets-can-download-at-least-one-file.md b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md
new file mode 100644
index 00000000000..566d505f7ca
--- /dev/null
+++ b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md
@@ -0,0 +1,3 @@
+The getCanDownloadAtLeastOneFile (/api/datasets/{id}/versions/{versionId}/canDownloadAtLeastOneFile) endpoint has been created.
+
+This endpoint allows to know if the calling user can download at least one file of a particular dataset version.

From 9fb44d3d45080a2e5c9de15ab0445cc052c956b3 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Mon, 11 Dec 2023 09:33:56 +0000
Subject: [PATCH 381/546] Added: docs for #10155

---
 doc/sphinx-guides/source/api/native-api.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 56190dd342c..99438520120 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -2686,6 +2686,19 @@ In particular, the user permissions that this API call checks, returned as boole
 
   curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/userPermissions"
 
+Know if a User can download at least one File from a Dataset Version
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This API call allows to know if the calling user can download at least one file of a dataset version.
+
+.. code-block:: bash
+
+  export SERVER_URL=https://demo.dataverse.org
+  export ID=24
+  export VERSION=1.0
+
+  curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/versions/$VERSION/canDownloadAtLeastOneFile"
+
 
 Files
 -----

From ca706662cd9f19b36d31530cf2747d810923ca3e Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 11 Dec 2023 11:06:36 -0500
Subject: [PATCH 382/546] bug fix - allowing S3 w/Globus config to work for
 download

---
 .../iq/dataverse/dataaccess/GlobusAccessibleStore.java      | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
index e4d062f0619..8bed60d8302 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
@@ -65,7 +65,11 @@ public static String getGlobusToken(String storeId) {
     }
     
     public static boolean isGlobusAccessible(String storeId) {
-        if(StorageIO.getConfigParamForDriver(storeId, StorageIO.TYPE).equals(DataAccess.GLOBUS)) {
+        String type = StorageIO.getConfigParamForDriver(storeId, StorageIO.TYPE);
+        if (type.equals(DataAccess.GLOBUS)) {
+            return true;
+        } else if (type.equals(DataAccess.S3)
+                && StorageIO.getConfigParamForDriver(storeId, TRANSFER_ENDPOINT_WITH_BASEPATH) != null) {
             return true;
         }
         return false;

From 09a227b30a2b5da05829297a9173952596e2df9c Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Mon, 11 Dec 2023 11:12:04 -0500
Subject: [PATCH 383/546] Change docs tp make clear that an S3 store can be
 used

---
 doc/sphinx-guides/source/installation/config.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index f6c05a3bde8..a7d7905ca4a 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -1004,10 +1004,10 @@ There are two types of Globus stores:
 - managed - where Dataverse manages the Globus endpoint, deciding where transferred files are stored and managing access control for users transferring files to/from Dataverse
 - remote - where Dataverse references files that remain on trusted remote Globus endpoints
 
-For managed stores, there are two variants, connecting to standard/file-based Globus endpoints and to endpoints using an underlying S3 store via the Globus S3 Connector.
+A managed Globus store connects to standard/file-based Globus endpoint. It is also possible to configure an S3 store as a managed store, if the managed endpoint uses an underlying S3 store via the Globus S3 Connector.
 With the former, Dataverse has no direct access to the file contents and functionality related to ingest, fixity hash validation, etc. are not available. With the latter, Dataverse can access files internally via S3 and the functionality supported is similar to that when using S3 direct upload. 
 
-Once you have configured a globus store, it is recommended that you install the `dataverse-globus app <https://github.com/scholarsportal/dataverse-globus>`_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support.
+Once you have configured a globus store, or configured an S3 store for Globus access, it is recommended that you install the `dataverse-globus app <https://github.com/scholarsportal/dataverse-globus>`_ to allow transfers in/out of Dataverse to be initated via the Dataverse user interface. Alternately, you can point your users to the :doc:`/developers/globus-api` for information about API support.
 
 .. table::
     :align: left

From 44bd5b7fb6d697d356d857a73847e1637aaa5763 Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Mon, 11 Dec 2023 11:19:46 -0500
Subject: [PATCH 384/546] add perf test results

---
 doc/release-notes/6.1-release-notes.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index a3b04749d68..b03a7a62baa 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -258,7 +258,7 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa
 
 - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv`
 
-7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with  custom metadata blocks  (7b).
+7\. Update Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with  custom metadata blocks  (7b).
 
 7a\. For installations without custom or experimental metadata blocks:
 
@@ -298,6 +298,10 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as
 
 For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub.
 
+## Performance Testing Results
+The results of performance testing can be found here:
+https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit#gid=0
+
 ## Getting Help
 
 For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org.

From 173b8a7a067b392de8e1c900c3e1d9eb806c71d6 Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Mon, 11 Dec 2023 11:25:44 -0500
Subject: [PATCH 385/546] fix backward comp Alternative Title

---
 doc/release-notes/6.1-release-notes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index b03a7a62baa..5bc0df4640c 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -291,7 +291,7 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as
 
 
 ## Backward Incompatibilities
-- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version
+- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version. Alternative Title must now be passed as an array of strings rather than a single string ([alt title])
 - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility,
 
 ## Complete List of Changes

From 1959f2ff22d9bbc4290a586fc49f1f49eccdbd04 Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Mon, 11 Dec 2023 11:29:24 -0500
Subject: [PATCH 386/546] removed unneeded header

---
 doc/release-notes/6.1-release-notes.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 5bc0df4640c..6d3d1912f81 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -6,11 +6,8 @@ This release brings new features, enhancements, and bug fixes to the Dataverse s
 Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project.
 
 ## Release Highlights (Major Upgrades, Breaking Changes)
-
 This release contains major upgrades to core components. Detailed upgrade instructions can be found below.
 
-## Detailed Release Highlights, New Features and Use Case Scenarios
-
 ### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started
 Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default).
   The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions.

From a4e25e17155896ae5c335ea8169229f248eaf22b Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 11 Dec 2023 12:15:56 -0500
Subject: [PATCH 387/546] reorg 6.1 release notes, add globus #10151

---
 doc/release-notes/6.1-release-notes.md | 262 +++++++++++++------------
 1 file changed, 137 insertions(+), 125 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 6d3d1912f81..475d4fc0887 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -5,57 +5,96 @@ Please note: To read these instructions in full, please go to https://github.com
 This release brings new features, enhancements, and bug fixes to the Dataverse software.
 Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project.
 
-## Release Highlights (Major Upgrades, Breaking Changes)
-This release contains major upgrades to core components. Detailed upgrade instructions can be found below.
+## Release highlights
 
-### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started
-Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default).
-  The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions.
+### Guestbook at request
+
+Dataverse can now be configured (via the `dataverse.files.guestbook-at-request` option) to display any configured guestbook to users when they request restricted files (new functionality) or when they download files (previous behavior).
+
+The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default, showing guestbooks when files are downloaded, remains as it was in prior Dataverse versions.
+
+### Collection-level storage quotas
+
+This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS.
+Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of Dataverse 6.1.
+
+### Globus support
+
+Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible and for the case of referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Please note:
+
+- Globus functionality remains experimental/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities.
+- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model.
+- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus).
+- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs)
+
+Backward incompatibilities:
+- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism
+- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured.
+
+New JVM options:
+- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores).
+- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin.
+
+Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used
+
+Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support)
+
+### Alternative Title now allows multiple values
+
+Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below for details.
+
+### External tools: configure tools now available at the dataset level
+
+Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589.
+
+### S3 out-of-band upload
 
-### Dataverse installation can be now be configured to allow out-of-band upload
 In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI.
 By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
 With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store.
 
-### Alternative Title is made repeatable.
-- One will need to update database with updated citation block.
-  `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv`
-- One will also need to update Solr schema:
- Change in "alternativeTitle" field  multiValued="true" in `/usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml`
- Reload Solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"`
+### JSON Schema for datasets
 
-Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array.
-For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"]
+Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
 
-### Collection Storage Size Quota Support
--This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS.
-Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1
+### OpenID Connect authentication provider improvements
 
-### BagIT Export Configurations Updated
-For BagIT export, it is now possible to configure the following information in bag-info.txt...
+#### Using MicroProfile Config for provisioning
 
-Source-Organization: Harvard Dataverse
-Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA
-Organization-Email: support@dataverse.harvard.edu
+With this release it is possible to provision a single OIDC-based authentication provider
+by using MicroProfile Config instead of or in addition to the classic Admin API provisioning.
 
-... using new JVM/MPCONFIG options:
+If you are using an external OIDC provider component as an identity management system and/or broker
+to other authentication providers such as Google, eduGain SAML and so on, this might make your
+life easier during instance setups and reconfiguration. You no longer need to generate the
+necessary JSON file.
 
-- dataverse.bagit.sourceorg.name
-- dataverse.bagit.sourceorg.address
-- dataverse.bagit.sourceorg.email
+#### Adding PKCE Support
 
-Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.
+[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273)
+Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable
+support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.)
 
-For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt
+### Solr improvements
 
-### Improvements in the dataset versions API
-- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions
-- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output
-- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files.
+As of this release, application-side support has been added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
 
-This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide.
+Please see the "Installing Solr" section of the Installation Prerequisites guide.
+
+### New release of Dataverse Previewers (including a Markdown previewer)
+
+Version 1.4 of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. Please note:
+
+- SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093.
+- Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045.
+- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews
+
+### New or improved APIs
+
+The development of a [new UI for Dataverse](https://github.com/IQSS/dataverse-frontend) is driving the addition or improvement of many APIs.
+
+#### New API endpoints
 
-### The following API endpoints have been added:
 - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version).
 - /api/files/{id}/downloadCount
 - /api/files/{id}/dataTables 
@@ -71,7 +110,33 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/
 - validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection.
 - downloadTmpFile (/api/admin/downloadTmpFile): For testing purposes, allows files to be downloaded from /tmp.
 
-### Extended the existing endpoints:
+#### Pagination of files in dataset versions
+
+- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions
+- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output
+- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files.
+
+This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide.
+
+
+#### DataFile API payload has been extended to include the following fields
+
+- tabularData: Boolean field to know if the DataFile is of tabular type
+- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner)
+- friendlyType: String
+
+#### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering
+
+- Access status: through the `accessStatus` query parameter, which supports the following values:
+    - Public
+    - Restricted
+    - EmbargoedThenRestricted
+    - EmbargoedThenPublic
+- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added.
+- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png".
+
+#### Additional improvements to existing API endpoints
+
 - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files.
 - getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain file counts. Added support for filtering by optional criteria query parameter:
     - contentType
@@ -93,25 +158,21 @@ This parameter applies a filter criteria to the operation and supports the follo
     - Can delete the dataset draft
 - getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation.
 
-### DataFile API payload has been extended to include the following fields:
-- tabularData: Boolean field to know if the DataFile is of tabular type
-- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner)
-- friendlyType: String
+### Improvements for developers
 
-### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering
-- Access status: through the `accessStatus` query parameter, which supports the following values:
-    - Public
-    - Restricted
-    - EmbargoedThenRestricted
-    - EmbargoedThenPublic
-- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added.
-- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png".
+- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (with the Payara Platform Tools plugin). For details, see https://guides.dataverse.org/en/6.1/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list.
+- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT.
+- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running.
+- With this release, we add a new type of testing to Dataverse: integration tests which are not end-to-end tests (like our API tests). Starting with OIDC authentication support, we test regularly on CI for working condition of both OIDC login options in UI and API.
+- The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21.
+- The support for setting JVM options during testing has been improved for developers. You now may add the `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is also paving the way to enable manipulating JVM options during end-to-end tests on remote ends.
+- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`.
 
+## Major use cases and infrastructure enhancements
 
-### Misc
-- Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589.
-- Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default).
-The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions.
+Changes and fixes in this release not already mentioned above include:
+
+- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue #9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed.
 - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports
 (which include the OAI-ORE metadata export file) have been updated to include
 information about the dataset version state, e.g. RELEASED or DEACCESSIONED
@@ -125,68 +186,18 @@ Dataverse installations that have been using archival Bags may wish to update an
 existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse
 [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls)
 to generate updated versions.
-- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews
+- For BagIT export, it is now possible to configure the following information in bag-info.txt. (Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.) For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt
+  - Source-Organization from `dataverse.bagit.sourceorg.name`.
+  - Organization-Address from `dataverse.bagit.sourceorg.address`.
+  - Organization-Email from `dataverse.bagit.sourceorg.address`.
 - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec.
   - To fix #9952, we surround the license info with `<` and `>`.
   - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information
   - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now.
-
 - We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html
 See also #10060.
 
-- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
-- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed.
-
-### Solr Improvements
-- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
-
-Please see the "Installing Solr" section of the Installation Prerequisites guide.
-
-
-### Development
-- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin.
-For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list.
-- A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended.
-  - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093.
-  - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045.
-- `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is
-also paving the way to enable manipulating JVM options during end-to-end tests on remote ends.
-- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`.
-- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT.
-- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running.
-
-## OpenID Connect Authentication Provider Improvements
-
-### Using MicroProfile Config For Provisioning
-
-With this release it is possible to provision a single OIDC-based authentication provider
-by using MicroProfile Config instead of or in addition to the classic Admin API provisioning.
-
-If you are using an external OIDC provider component as an identity management system and/or broker
-to other authentication providers such as Google, eduGain SAML and so on, this might make your
-life easier during instance setups and reconfiguration. You no longer need to generate the
-necessary JSON file.
-
-### Adding PKCE Support
-[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273)
-Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable
-support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.)
-
-## Improved Testing
-
-With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests
-like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition
-of both OIDC login options in UI and API.
-
-The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21.
-
-The support for setting JVM options during testing has been improved for developers. You now may add the
-`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is
-also paving the way to enable manipulating JVM options during end-to-end tests on remote ends.
-
-As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`.
-
-## New Configuration Options
+## New configuration options
 
 - dataverse.auth.oidc.enabled
 - dataverse.auth.oidc.client-id
@@ -199,8 +210,24 @@ As part of these testing improvements, the code coverage report file for unit te
 - dataverse.auth.oidc.pkce.max-cache-size
 - dataverse.auth.oidc.pkce.max-cache-age
 - dataverse.files.{driverId}.upload-out-of-band
+- dataverse.files.globus-cache-maxage
 - dataverse.files.guestbook-at-request
 
+## Backward incompatibilities
+
+- Since Alternative Title is now repeatable, the JSON you send to create or edit a dataset must be an array rather than a simple string. For example, instead of "value": "Alternative Title", you must send "value": ["Alternative Title1", "Alternative Title2"]
+- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility. See above for details.
+- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new JVM options `dataverse.bagit.sourceorg.name`, `dataverse.bagit.sourceorg.address`, and `dataverse.bagit.sourceorg.email`. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt
+- See "Globus support" above for backward incompatibilies specific to Globus.
+
+## Complete list of changes
+
+For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub.
+
+## Getting help
+
+For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org.
+
 ## Installation
 
 If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it!
@@ -209,7 +236,7 @@ Once you are in production, we would be delighted to update our [map of Datavers
 
 You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC).
 
-## Upgrade Instructions
+## Upgrade instructions
 Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc.
 
 These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0.
@@ -241,6 +268,8 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa
 
 - `$PAYARA/bin/asadmin deploy dataverse-6.1.war`
 
+As noted above, deployment of the war file might take several minutes due a database migration script required for the new storage quotas feature.
+
 5\. Restart Payara
 
 - `service payara stop`
@@ -255,7 +284,7 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa
 
 - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv`
 
-7\. Update Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with  custom metadata blocks  (7b).
+7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with  custom metadata blocks  (7b).
 
 7a\. For installations without custom or experimental metadata blocks:
 
@@ -285,20 +314,3 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as
 - Restart Solr instance (usually `service solr restart` depending on solr/OS)
 
 8\. Run ReExportAll to update dataset metadata exports. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/5.14/admin/metadataexport.html#batch-exports-through-the-api).
-
-
-## Backward Incompatibilities
-- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version. Alternative Title must now be passed as an array of strings rather than a single string ([alt title])
-- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility,
-
-## Complete List of Changes
-
-For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub.
-
-## Performance Testing Results
-The results of performance testing can be found here:
-https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit#gid=0
-
-## Getting Help
-
-For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org.

From 011b9291e6f694631d237bd047c3a170e6e93a2e Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 11 Dec 2023 13:58:08 -0500
Subject: [PATCH 388/546] remove globus snippet (already added) #10151

---
 doc/release-notes/10162-globus-support.md | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 doc/release-notes/10162-globus-support.md

diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md
deleted file mode 100644
index 60670b5b101..00000000000
--- a/doc/release-notes/10162-globus-support.md
+++ /dev/null
@@ -1,19 +0,0 @@
-Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, 
-and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. 
-Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support)
-- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities.
-- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model
-- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus)
-- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs)
-
-Backward Incompatibilities:
-- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism
-- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured.
-
-New JVM Options:
-- A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores).
-- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin.
-
-
-
-Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used

From 3e32f42959dce41e9c21c9e2285fdf719b048dc0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 11 Dec 2023 14:57:52 -0500
Subject: [PATCH 389/546] link to guides in more places, other tweaks #10151

---
 doc/release-notes/6.1-release-notes.md | 43 +++++++++++++-------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 475d4fc0887..fab11ce4959 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -13,12 +13,17 @@ Dataverse can now be configured (via the `dataverse.files.guestbook-at-request`
 
 The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default, showing guestbooks when files are downloaded, remains as it was in prior Dataverse versions.
 
+For details, see [dataverse.files.guestbook-at-request](https://guides.dataverse.org/en/6.1/installation/config.html#dataverse-files-guestbook-at-request) and PR #9599.
+
 ### Collection-level storage quotas
 
 This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS.
+
 Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of Dataverse 6.1.
 
-### Globus support
+For details, see [Storage Quotas for Collections](https://guides.dataverse.org/en/6.1/admin/collectionquotas.html) in the Admin Guide.
+
+### Globus support (experimental), continued
 
 Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible and for the case of referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Please note:
 
@@ -32,54 +37,50 @@ Backward incompatibilities:
 - The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured.
 
 New JVM options:
-- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores).
+- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage](https://guides.dataverse.org/en/6.1/installation/config.html#file-storage) section of the Installation Guide.
 - dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin.
 
 Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used
 
-Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support)
+Further details can be found in the [Big Data Support](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) section of the Developer Guide.
 
 ### Alternative Title now allows multiple values
 
-Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below for details.
+Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below and PR #9440 for details.
 
 ### External tools: configure tools now available at the dataset level
 
-Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589.
+Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See [External Tools](https://guides.dataverse.org/en/6.1/admin/external-tools.html#dataset-level-configure-tools) in the Admin Guide and PR #9925.
 
 ### S3 out-of-band upload
 
 In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI.
 By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server).
-With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store.
+With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://guides.dataverse.org/en/6.1/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://guides.dataverse.org/en/6.1/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store.
 
 ### JSON Schema for datasets
 
-Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465)
+Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.1/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10109.
 
-### OpenID Connect authentication provider improvements
+### OpenID Connect (OIDC) improvements
 
 #### Using MicroProfile Config for provisioning
 
-With this release it is possible to provision a single OIDC-based authentication provider
-by using MicroProfile Config instead of or in addition to the classic Admin API provisioning.
+With this release it is possible to provision a single OIDC-based authentication provider by using MicroProfile Config instead of or in addition to the classic Admin API provisioning.
 
-If you are using an external OIDC provider component as an identity management system and/or broker
-to other authentication providers such as Google, eduGain SAML and so on, this might make your
-life easier during instance setups and reconfiguration. You no longer need to generate the
-necessary JSON file.
+If you are using an external OIDC provider component as an identity management system and/or broker to other authentication providers such as Google, eduGain SAML and so on, this might make your life easier during instance setups and reconfiguration. You no longer need to generate the necessary JSON file.
 
 #### Adding PKCE Support
 
-[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273)
-Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable
-support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.)
+Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.)
+
+For both features, see the [OIDC](https://guides.dataverse.org/en/6.0/installation/oidc.html) section of the Installation Guide and PR #9273.
 
 ### Solr improvements
 
 As of this release, application-side support has been added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues.
 
-Please see the "Installing Solr" section of the Installation Prerequisites guide.
+Please see the [Installing Solr](https://guides.dataverse.org/en/6.1/installation/prerequisites.html#installing-solr) section of the Installation Guide.
 
 ### New release of Dataverse Previewers (including a Markdown previewer)
 
@@ -87,7 +88,7 @@ Version 1.4 of the standard Dataverse Previewers from https://github/com/gdcc/da
 
 - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093.
 - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045.
-- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews
+- There is now a [Markdown (.md)](https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews) previewer.
 
 ### New or improved APIs
 
@@ -172,7 +173,7 @@ This parameter applies a filter criteria to the operation and supports the follo
 
 Changes and fixes in this release not already mentioned above include:
 
-- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue #9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed.
+- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. See PR #10142.
 - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports
 (which include the OAI-ORE metadata export file) have been updated to include
 information about the dataset version state, e.g. RELEASED or DEACCESSIONED
@@ -184,7 +185,7 @@ recreate datasets from archival Bags will start indicating which version(s) of t
 OAI_ORE format they can read.
 Dataverse installations that have been using archival Bags may wish to update any
 existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse
-[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls)
+[archival Bag export API](https://guides.dataverse.org/en/6.1/installation/config.html#bagit-export-api-calls)
 to generate updated versions.
 - For BagIT export, it is now possible to configure the following information in bag-info.txt. (Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.) For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt
   - Source-Organization from `dataverse.bagit.sourceorg.name`.

From 92a298da25c03822c848e5a43253f039193665f9 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 11 Dec 2023 15:42:55 -0500
Subject: [PATCH 390/546] add missing new config options and sort #10151

---
 doc/release-notes/6.1-release-notes.md | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index fab11ce4959..1e09a207104 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -200,25 +200,28 @@ See also #10060.
 
 ## New configuration options
 
-- dataverse.auth.oidc.enabled
+- dataverse.auth.oidc.auth-server-url
 - dataverse.auth.oidc.client-id
 - dataverse.auth.oidc.client-secret
-- dataverse.auth.oidc.auth-server-url
+- dataverse.auth.oidc.enabled
 - dataverse.auth.oidc.pkce.enabled
+- dataverse.auth.oidc.pkce.max-cache-age
+- dataverse.auth.oidc.pkce.max-cache-size
 - dataverse.auth.oidc.pkce.method
-- dataverse.auth.oidc.title
 - dataverse.auth.oidc.subtitle
-- dataverse.auth.oidc.pkce.max-cache-size
-- dataverse.auth.oidc.pkce.max-cache-age
-- dataverse.files.{driverId}.upload-out-of-band
+- dataverse.auth.oidc.title
+- dataverse.bagit.sourceorg.address
+- dataverse.bagit.sourceorg.address
+- dataverse.bagit.sourceorg.name
 - dataverse.files.globus-cache-maxage
 - dataverse.files.guestbook-at-request
+- dataverse.files.{driverId}.upload-out-of-band
 
 ## Backward incompatibilities
 
 - Since Alternative Title is now repeatable, the JSON you send to create or edit a dataset must be an array rather than a simple string. For example, instead of "value": "Alternative Title", you must send "value": ["Alternative Title1", "Alternative Title2"]
 - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility. See above for details.
-- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new JVM options `dataverse.bagit.sourceorg.name`, `dataverse.bagit.sourceorg.address`, and `dataverse.bagit.sourceorg.email`. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt
+- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new `dataverse.bagit` JVM options mentioned above. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt
 - See "Globus support" above for backward incompatibilies specific to Globus.
 
 ## Complete list of changes

From 80634c7a59d7bfce4ab0e871d80d34f446579123 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 11 Dec 2023 15:54:30 -0500
Subject: [PATCH 391/546] address feedback from review #9919

---
 doc/sphinx-guides/source/developers/performance.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/performance.rst b/doc/sphinx-guides/source/developers/performance.rst
index aa50cd6e40c..46c152f322e 100644
--- a/doc/sphinx-guides/source/developers/performance.rst
+++ b/doc/sphinx-guides/source/developers/performance.rst
@@ -116,12 +116,12 @@ We'd like to rate limit commands (CreateDataset, etc.) so that we can keep them
 Solr
 ~~~~
 
-While in the past Solr performance hasn't been much of a concern, in recent years we've noticed performance problems when Harvard Dataverse is under load. We are investigating in `#9635 <https://github.com/IQSS/dataverse/issues/9635>`_.
+While in the past Solr performance hasn't been much of a concern, in recent years we've noticed performance problems when Harvard Dataverse is under load. Improvements were made in `PR #10050 <https://github.com/IQSS/dataverse/pull/10050>`_, for example.
 
 Datasets with Large Numbers of Files or Versions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-We'd like to scale Dataverse to better handle large number of files or versions (`#9763 <https://github.com/IQSS/dataverse/issues/9763>`_).
+We'd like to scale Dataverse to better handle large number of files or versions. Progress was made in `PR #9883 <https://github.com/IQSS/dataverse/pull/9883>`_.
 
 Withstanding Bots
 ~~~~~~~~~~~~~~~~~
@@ -183,7 +183,7 @@ Most likely there is training available that is oriented toward performance. The
 Learn from the Community How They Monitor Performance
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Some members of the Dataverse community are likely users of newish tools like the ELK stack (Elasticsearch, Logstash, and Kibana), the TICK stack (Telegraph InfluxDB Chronograph and Kapacitor), GoAccess, Prometheus, Graphite, and more we haven't even heard of. In the :doc:`/admin/monitoring` section of the Admin Guide, we already encourage the community to share findings (, but we could dedicate time to this topic at our annual meeting or community calls.
+Some members of the Dataverse community are likely users of newish tools like the ELK stack (Elasticsearch, Logstash, and Kibana), the TICK stack (Telegraph InfluxDB Chronograph and Kapacitor), GoAccess, Prometheus, Graphite, and more we haven't even heard of. In the :doc:`/admin/monitoring` section of the Admin Guide, we already encourage the community to share findings, but we could dedicate time to this topic at our annual meeting or community calls.
 
 Teach the Community to Do Performance Testing
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From 3d6343eca2846edca97e4d9699f3305fb7c19c62 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 11 Dec 2023 16:09:46 -0500
Subject: [PATCH 392/546] mention configurable docroot #10151

---
 doc/release-notes/6.1-release-notes.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md
index 1e09a207104..1279d09a023 100644
--- a/doc/release-notes/6.1-release-notes.md
+++ b/doc/release-notes/6.1-release-notes.md
@@ -195,6 +195,7 @@ to generate updated versions.
   - To fix #9952, we surround the license info with `<` and `>`.
   - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information
   - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now.
+- It's now possible to configure the docroot, which holds collection logos and more. See [dataverse.files.docroot](https://guides.dataverse.org/en/6.1/installation/config.html#dataverse-files-docroot) in the Installation Guide and PR #9819. 
 - We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html
 See also #10060.
 
@@ -213,6 +214,7 @@ See also #10060.
 - dataverse.bagit.sourceorg.address
 - dataverse.bagit.sourceorg.address
 - dataverse.bagit.sourceorg.name
+- dataverse.files.docroot
 - dataverse.files.globus-cache-maxage
 - dataverse.files.guestbook-at-request
 - dataverse.files.{driverId}.upload-out-of-band

From fa32ef5a413f6b0fbfab7d6e96e602a31bc18ac4 Mon Sep 17 00:00:00 2001
From: Guillermo Portas <hey@gportas.me>
Date: Tue, 12 Dec 2023 11:36:52 +0000
Subject: [PATCH 393/546] Update doc/sphinx-guides/source/api/native-api.rst

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 doc/sphinx-guides/source/api/native-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 99438520120..1e86f24356b 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -2686,7 +2686,7 @@ In particular, the user permissions that this API call checks, returned as boole
 
   curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/userPermissions"
 
-Know if a User can download at least one File from a Dataset Version
+Know If a User Can Download at Least One File from a Dataset Version
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 This API call allows to know if the calling user can download at least one file of a dataset version.

From 476977b48925ae6eae4dabf69b0de0d7d40d6841 Mon Sep 17 00:00:00 2001
From: Guillermo Portas <hey@gportas.me>
Date: Tue, 12 Dec 2023 11:37:01 +0000
Subject: [PATCH 394/546] Update doc/sphinx-guides/source/api/native-api.rst

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 doc/sphinx-guides/source/api/native-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 1e86f24356b..9ceeb4410ef 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -2689,7 +2689,7 @@ In particular, the user permissions that this API call checks, returned as boole
 Know If a User Can Download at Least One File from a Dataset Version
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-This API call allows to know if the calling user can download at least one file of a dataset version.
+This API endpoint indicates if the calling user can download at least one file from a dataset version. Note that Shibboleth group permissions are not considered.
 
 .. code-block:: bash
 

From 64861afbc11c4475ca3d85e729f4b73e962d5efa Mon Sep 17 00:00:00 2001
From: Guillermo Portas <hey@gportas.me>
Date: Tue, 12 Dec 2023 11:37:36 +0000
Subject: [PATCH 395/546] Update
 doc/release-notes/10155-datasets-can-download-at-least-one-file.md

Co-authored-by: Philip Durbin <philip_durbin@harvard.edu>
---
 .../10155-datasets-can-download-at-least-one-file.md            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/10155-datasets-can-download-at-least-one-file.md b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md
index 566d505f7ca..a0b0d02310a 100644
--- a/doc/release-notes/10155-datasets-can-download-at-least-one-file.md
+++ b/doc/release-notes/10155-datasets-can-download-at-least-one-file.md
@@ -1,3 +1,3 @@
 The getCanDownloadAtLeastOneFile (/api/datasets/{id}/versions/{versionId}/canDownloadAtLeastOneFile) endpoint has been created.
 
-This endpoint allows to know if the calling user can download at least one file of a particular dataset version.
+This API endpoint indicates if the calling user can download at least one file from a dataset version. Note that Shibboleth group permissions are not considered.

From 39e4bcee0f164854301b45f0ba6cbd4e11b4cf5c Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Tue, 12 Dec 2023 13:42:46 +0000
Subject: [PATCH 396/546] Fixed: minio storage volume mapping

---
 docker-compose-dev.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 5265a6b7c2d..6f8decc0dfb 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -209,7 +209,7 @@ services:
     networks:
       - dataverse
     volumes:
-      - minio_storage:/data
+      - ./docker-dev-volumes/minio_storage:/data
     environment:
       MINIO_ROOT_USER: 4cc355_k3y
       MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y

From 0c279adc3e93bd09bedc08a3f1bda48876fc1de3 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Tue, 12 Dec 2023 13:50:08 +0000
Subject: [PATCH 397/546] Removed: sleep calls from
 testGetCanDownloadAtLeastOneFile IT

---
 .../java/edu/harvard/iq/dataverse/api/DatasetsIT.java     | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index b2cf5c75467..f36b93b85ab 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -4225,7 +4225,7 @@ public void testGetGlobusUploadParameters() {
     }
 
     @Test
-    public void testGetCanDownloadAtLeastOneFile() throws InterruptedException {
+    public void testGetCanDownloadAtLeastOneFile() {
         Response createUserResponse = UtilIT.createRandomUser();
         createUserResponse.then().assertThat().statusCode(OK.getStatusCode());
         String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse);
@@ -4252,9 +4252,6 @@ public void testGetCanDownloadAtLeastOneFile() throws InterruptedException {
         Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken);
         publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
 
-        // Make sure the dataset is published
-        Thread.sleep(3000);
-
         // Create a second user to call the getCanDownloadAtLeastOneFile method
         Response createSecondUserResponse = UtilIT.createRandomUser();
         createSecondUserResponse.then().assertThat().statusCode(OK.getStatusCode());
@@ -4275,9 +4272,6 @@ public void testGetCanDownloadAtLeastOneFile() throws InterruptedException {
         publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken);
         publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
 
-        // Make sure the dataset is published
-        Thread.sleep(3000);
-
         // Call with a valid dataset id when a file is restricted and the user does not have access
         canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken);
         canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());

From 960a20c79dc8a3292ff3d26973d8e35d8a4f481c Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 12 Dec 2023 14:06:21 -0500
Subject: [PATCH 398/546] #10168 fix error response status

---
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index b3bfc476423..05355cbbc68 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -4288,7 +4288,7 @@ public Response getDatasetVersionArchivalStatus(@Context ContainerRequestContext
                     headers);
 
             if (dsv.getArchivalCopyLocation() == null) {
-                return error(Status.NO_CONTENT, "This dataset version has not been archived");
+                return error(Status.NOT_FOUND, "This dataset version has not been archived");
             } else {
                 JsonObject status = JsonUtil.getJsonObject(dsv.getArchivalCopyLocation());
                 return ok(status);

From 40e5d39c73ec2097fb16d65e8fff33078168498b Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 12 Dec 2023 14:53:45 -0500
Subject: [PATCH 399/546] how to test Docker images made during a release

---
 .../source/developers/making-releases.rst           | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst
index 23c4773a06e..432b4ca2672 100755
--- a/doc/sphinx-guides/source/developers/making-releases.rst
+++ b/doc/sphinx-guides/source/developers/making-releases.rst
@@ -67,6 +67,19 @@ Once important tests have passed (compile, unit tests, etc.), merge the pull req
 
 If this is a hotfix release, skip this whole "merge develop to master" step (the "develop" branch is not involved until later).
 
+(Optional) Test Docker Images
+-----------------------------
+
+After the "master" branch has been updated and the GitHub Action to build and push Docker images has run (see `PR #9776 <https://github.com/IQSS/dataverse/pull/9776>`_), go to https://hub.docker.com/u/gdcc and make sure the "alpha" tag for the following images has been updated:
+
+- https://hub.docker.com/r/gdcc/base
+- https://hub.docker.com/r/gdcc/dataverse
+- https://hub.docker.com/r/gdcc/configbaker
+
+To test these images against our API test suite, go to the "alpha" workflow at https://github.com/gdcc/api-test-runner/actions/workflows/alpha.yml and run it.
+
+If there are failures, additional dependencies or settings may have been added to the "develop" workflow. Copy them over and try again.
+
 Build the Guides for the Release
 --------------------------------
 

From a240bd0fa81cc4a9db0cc9c8ddb37ad733324fcd Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Tue, 12 Dec 2023 15:20:07 -0500
Subject: [PATCH 400/546] bump htmlunit to 3.9.0

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 34b0ad2e835..d690e509f46 100644
--- a/pom.xml
+++ b/pom.xml
@@ -650,7 +650,7 @@
         <dependency>
             <groupId>org.htmlunit</groupId>
             <artifactId>htmlunit</artifactId>
-            <version>3.2.0</version>
+            <version>3.9.0</version>
             <scope>test</scope>
         </dependency>
     </dependencies>

From b1f15bb95ff58dd62c7aaa1a2ababa1f44b83881 Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Tue, 12 Dec 2023 15:30:54 -0500
Subject: [PATCH 401/546] bump DuraCloud to 8.0.0

---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 34b0ad2e835..be4fa605aab 100644
--- a/pom.xml
+++ b/pom.xml
@@ -466,7 +466,7 @@
         <dependency>
             <groupId>org.duracloud</groupId>
             <artifactId>common</artifactId>
-            <version>7.1.1</version>
+            <version>8.0.0</version>
             <exclusions>
                 <exclusion>
                     <groupId>org.slf4j</groupId>
@@ -481,7 +481,7 @@
         <dependency>
             <groupId>org.duracloud</groupId>
             <artifactId>storeclient</artifactId>
-            <version>7.1.1</version>
+            <version>8.0.0</version>
             <exclusions>
                 <exclusion>
                     <groupId>org.slf4j</groupId>

From daf89261174600b1db106974cc941213fa0b36bd Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 12 Dec 2023 15:37:27 -0500
Subject: [PATCH 402/546] #10168 update integration tests

---
 src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 928574eb82b..7efd44b9533 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -3291,7 +3291,8 @@ public void testArchivalStatusAPI() throws IOException {
 
         //Verify the status is empty
         Response nullStatus = UtilIT.getDatasetVersionArchivalStatus(datasetId, "1.0", apiToken);
-        nullStatus.then().assertThat().statusCode(NO_CONTENT.getStatusCode());
+        nullStatus.prettyPrint();
+        nullStatus.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
 
         //Set it
         Response setStatus = UtilIT.setDatasetVersionArchivalStatus(datasetId, "1.0", apiToken, "pending",
@@ -3309,7 +3310,7 @@ public void testArchivalStatusAPI() throws IOException {
 
         //Make sure it's gone
         Response nullStatus2 = UtilIT.getDatasetVersionArchivalStatus(datasetId, "1.0", apiToken);
-        nullStatus2.then().assertThat().statusCode(NO_CONTENT.getStatusCode());
+        nullStatus2.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
 
     }
 

From 2ce0fb8f083ef8dfedfb71feea0d58ff2f9c7647 Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Tue, 12 Dec 2023 16:06:52 -0500
Subject: [PATCH 403/546] bump google.cloud.version to 0.209.0

---
 modules/dataverse-parent/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 7b305cad581..25d714b39ed 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -152,7 +152,7 @@
         <postgresql.version>42.6.0</postgresql.version>
         <solr.version>9.3.0</solr.version>
         <aws.version>1.12.290</aws.version>
-        <google.cloud.version>0.177.0</google.cloud.version>
+        <google.cloud.version>0.209.0</google.cloud.version>
     
         <!-- Basic libs, logging -->
         <jakartaee-api.version>8.0.0</jakartaee-api.version>

From 349f7dbcaaaf260c00126567f9f4c6d32b0c367c Mon Sep 17 00:00:00 2001
From: sbondka <sabrine.bondka@open-groupe.com>
Date: Wed, 13 Dec 2023 15:31:31 +0100
Subject: [PATCH 404/546] Add presentation link

---
 doc/sphinx-guides/source/admin/integrations.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst
index ed3860a9ca1..53a663b942e 100644
--- a/doc/sphinx-guides/source/admin/integrations.rst
+++ b/doc/sphinx-guides/source/admin/integrations.rst
@@ -193,7 +193,7 @@ This connector facilitates seamless two-way transfer of datasets and files, emph
 It is a lightweight client-side web application built using React and relying on the Dataverse External Tool feature, allowing for easy deployment on modern integration systems. Currently, it supports small to medium-sized files, with plans to enable support for large files and signed Dataverse endpoints in the future.
 
 What kind of user is the feature intended for?
-The feature is intended for researchers, scientists and data analyst who are working with Dataverse instances and JupyterHub looking to ease the data transfer process.
+The feature is intended for researchers, scientists and data analyst who are working with Dataverse instances and JupyterHub looking to ease the data transfer process. See `presentation <https://harvard.zoom.us/rec/share/0RpoN_a7HPXF9jpBovtvxVgcaEbqrv5ZBSIKISVemdZjswGxOzbalQYpjebCbLA1.y2ZjRXYxhq8C_SU7>`_ for details.
 
 .. _integrations-discovery:
 

From ea644b89a3149ff8599fe3fcaa3a2bf6f5804e71 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 13 Dec 2023 14:16:47 -0500
Subject: [PATCH 405/546] add "message sent" success message #2638

---
 src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java | 2 ++
 src/main/java/propertyFiles/Bundle.properties                  | 1 +
 src/main/webapp/contactFormFragment.xhtml                      | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java
index 6be768321c4..68912969003 100644
--- a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java
+++ b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java
@@ -6,6 +6,7 @@
 import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.util.JsfHelper;
 import edu.harvard.iq.dataverse.util.MailUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import java.util.Optional;
@@ -217,6 +218,7 @@ public String sendMessage() {
         }
             logger.fine("sending feedback: " + feedback);
             mailService.sendMail(feedback.getFromEmail(), feedback.getToEmail(), feedback.getCcEmail(), feedback.getSubject(), feedback.getBody());
+        JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("contact.sent"));
         return null;
     }
     
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 10576c0c116..0c6ce979a94 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -184,6 +184,7 @@ contact.context.file.intro={0}\n\nYou have just been sent the following message
 contact.context.file.ending=\n\n---\n\n{0}\n{1}\n\nGo to file {2}/file.xhtml?fileId={3}\n\nYou received this email because you have been listed as a contact for the dataset. If you believe this was an error, please contact {4} at {5}. To respond directly to the individual who sent the message, simply reply to this email.
 contact.context.support.intro={0},\n\nThe following message was sent from {1}.\n\n---\n\n
 contact.context.support.ending=\n\n---\n\nMessage sent from Support contact form.
+contact.sent=Message sent.
 
 # dataverseuser.xhtml
 account.info=Account Information
diff --git a/src/main/webapp/contactFormFragment.xhtml b/src/main/webapp/contactFormFragment.xhtml
index cb4eb3d0872..8950ec5acf8 100644
--- a/src/main/webapp/contactFormFragment.xhtml
+++ b/src/main/webapp/contactFormFragment.xhtml
@@ -81,7 +81,7 @@
                 <!-- send | cancel buttons -->
                 <div class="button-block">
                     <p:commandButton styleClass="btn btn-default" value="#{bundle['contact.send']}"
-                                   update="@form" oncomplete="if (args &amp;&amp; !args.validationFailed) PF('contactForm').hide();" actionListener="#{sendFeedbackDialog.sendMessage}">
+                                   update="@form,messagePanel" oncomplete="if (args &amp;&amp; !args.validationFailed) PF('contactForm').hide();" actionListener="#{sendFeedbackDialog.sendMessage}">
                         <f:param name="DO_VALIDATION" value="true"/>
                     </p:commandButton>
                     <button class="btn btn-link" onclick="PF('contactForm').hide()" type="button">

From 9e919fa0356fc23d56e474f1ca23497f9f4add45 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Wed, 13 Dec 2023 16:26:14 -0500
Subject: [PATCH 406/546] Fix that users can't access deaccessioned dataset

---
 .../harvard/iq/dataverse/api/Datasets.java    | 20 ++++++++++---------
 ...pecificPublishedDatasetVersionCommand.java | 20 ++++++++++++++++---
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 828ba218cc4..81d165b5a76 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -493,7 +493,8 @@ public Response getVersion(@Context ContainerRequestContext crc,
                                @Context UriInfo uriInfo,
                                @Context HttpHeaders headers) {
         return response( req -> {
-            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned);
+
+            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeFiles == null ? true : includeFiles);
 
             if (dsv == null || dsv.getId() == null) {
                 return notFound("Dataset version not found");
@@ -524,7 +525,7 @@ public Response getVersionFiles(@Context ContainerRequestContext crc,
                                     @Context UriInfo uriInfo,
                                     @Context HttpHeaders headers) {
         return response(req -> {
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeDeaccessioned);
             DatasetVersionFilesServiceBean.FileOrderCriteria fileOrderCriteria;
             try {
                 fileOrderCriteria = orderCriteria != null ? DatasetVersionFilesServiceBean.FileOrderCriteria.valueOf(orderCriteria) : DatasetVersionFilesServiceBean.FileOrderCriteria.NameAZ;
@@ -574,7 +575,7 @@ public Response getVersionFileCounts(@Context ContainerRequestContext crc,
             } catch (IllegalArgumentException e) {
                 return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus)));
             }
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeDeaccessioned);
             JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder();
             jsonObjectBuilder.add("total", datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria));
             jsonObjectBuilder.add("perContentType", json(datasetVersionFilesServiceBean.getFileMetadataCountPerContentType(datasetVersion, fileSearchCriteria)));
@@ -2770,10 +2771,11 @@ public static <T> T handleVersion(String versionId, DsVersionHandler<T> hdl)
     }
 
     private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse {
-        return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, false);
+        //The checkPerms was added to check the permissions ONLY when the dataset is deaccessioned.
+        return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, false, false);
     }
 
-    private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned) throws WrappedResponse {
+    private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned, boolean checkPerms) throws WrappedResponse {
         DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler<Command<DatasetVersion>>() {
 
             @Override
@@ -2788,7 +2790,7 @@ public Command<DatasetVersion> handleDraft() {
 
             @Override
             public Command<DatasetVersion> handleSpecific(long major, long minor) {
-                return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor, includeDeaccessioned);
+                return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor, includeDeaccessioned, checkPerms);
             }
 
             @Override
@@ -3050,7 +3052,7 @@ public Response getDownloadSize(@Context ContainerRequestContext crc,
             } catch (IllegalArgumentException e) {
                 return error(Response.Status.BAD_REQUEST, "Invalid mode: " + mode);
             }
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned, includeDeaccessioned);
             long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileSearchCriteria, fileDownloadSizeMode);
             String message = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), datasetStorageSize);
             JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder();
@@ -4015,7 +4017,7 @@ public Response getDatasetVersionCitation(@Context ContainerRequestContext crc,
                                               @Context UriInfo uriInfo,
                                               @Context HttpHeaders headers) {
         return response(req -> ok(
-                getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned).getCitation(true, false)), getRequestUser(crc));
+                getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeDeaccessioned).getCitation(true, false)), getRequestUser(crc));
     }
 
     @POST
@@ -4026,7 +4028,7 @@ public Response deaccessionDataset(@Context ContainerRequestContext crc, @PathPa
             return badRequest(BundleUtil.getStringFromBundle("datasets.api.deaccessionDataset.invalid.version.identifier.error", List.of(DS_VERSION_LATEST_PUBLISHED)));
         }
         return response(req -> {
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false, false);
             try {
                 JsonObject jsonObject = JsonUtil.getJsonObject(jsonBody);
                 datasetVersion.setVersionNote(jsonObject.getString("deaccessionReason"));
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java
index a87eb8a99a5..07256f057e2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java
@@ -25,23 +25,36 @@ public class GetSpecificPublishedDatasetVersionCommand extends AbstractCommand<D
     private final long majorVersion;
     private final long minorVersion;
     private boolean includeDeaccessioned;
+    private boolean checkPerms;
 
     public GetSpecificPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, long majorVersionNum, long minorVersionNum) {
-        this(aRequest, anAffectedDataset, majorVersionNum, minorVersionNum, false);
+        this(aRequest, anAffectedDataset, majorVersionNum, minorVersionNum, false, false);
     }
 
-    public GetSpecificPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, long majorVersionNum, long minorVersionNum, boolean includeDeaccessioned) {
+   
+
+    public GetSpecificPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, long majorVersionNum, long minorVersionNum, boolean includeDeaccessioned, boolean checkPerms) {
         super(aRequest, anAffectedDataset);
         ds = anAffectedDataset;
         majorVersion = majorVersionNum;
         minorVersion = minorVersionNum;
         this.includeDeaccessioned = includeDeaccessioned;
+        this.checkPerms = checkPerms;
     }
 
+
     @Override
     public DatasetVersion execute(CommandContext ctxt) throws CommandException {
+        
         for (DatasetVersion dsv : ds.getVersions()) {
-            if (dsv.isReleased() || (includeDeaccessioned && dsv.isDeaccessioned() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset))) {
+            if (dsv.isReleased() || (includeDeaccessioned && dsv.isDeaccessioned())) {
+                
+                if(dsv.isDeaccessioned() && checkPerms){
+                    if(!ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset)){
+                        return null;
+                    }
+                }
+
                 if (dsv.getVersionNumber().equals(majorVersion) && dsv.getMinorVersionNumber().equals(minorVersion)) {
                     return dsv;
                 }
@@ -49,4 +62,5 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException {
         }
         return null;
     }
+
 }

From 6a30ebb328f4f8ed286c402f4481313b9d18ca48 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Wed, 13 Dec 2023 16:52:40 -0500
Subject: [PATCH 407/546] Change checkPerms param on citation

---
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 81d165b5a76..89d19fd3203 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -4017,7 +4017,7 @@ public Response getDatasetVersionCitation(@Context ContainerRequestContext crc,
                                               @Context UriInfo uriInfo,
                                               @Context HttpHeaders headers) {
         return response(req -> ok(
-                getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeDeaccessioned).getCitation(true, false)), getRequestUser(crc));
+                getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, false).getCitation(true, false)), getRequestUser(crc));
     }
 
     @POST

From 53c80e1fcc6849fe33be7aa1d013bad65a0a44ec Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Thu, 14 Dec 2023 14:11:34 -0500
Subject: [PATCH 408/546] Review of variables across affected methods

---
 .../edu/harvard/iq/dataverse/api/Datasets.java    | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 89d19fd3203..380ad53d672 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -97,7 +97,6 @@
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 import edu.harvard.iq.dataverse.util.SignpostingResources;
 import edu.harvard.iq.dataverse.search.IndexServiceBean;
-
 import static edu.harvard.iq.dataverse.api.ApiConstants.*;
 import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*;
 import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
@@ -106,9 +105,7 @@
 import edu.harvard.iq.dataverse.workflow.WorkflowContext;
 import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean;
 import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType;
-
 import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
-
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
@@ -127,7 +124,6 @@
 import java.util.logging.Logger;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
-
 import jakarta.ejb.EJB;
 import jakarta.ejb.EJBException;
 import jakarta.inject.Inject;
@@ -151,7 +147,6 @@
 import jakarta.ws.rs.core.*;
 import jakarta.ws.rs.core.Response.Status;
 import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
-
 import org.apache.commons.lang3.StringUtils;
 import org.glassfish.jersey.media.multipart.FormDataBodyPart;
 import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
@@ -268,12 +263,10 @@ public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id"
         }, getRequestUser(crc));
     }
     
-    // TODO: 
     // This API call should, ideally, call findUserOrDie() and the GetDatasetCommand 
     // to obtain the dataset that we are trying to export - which would handle
     // Auth in the process... For now, Auth isn't necessary - since export ONLY 
     // WORKS on published datasets, which are open to the world. -- L.A. 4.5
-    
     @GET
     @Path("/export")
     @Produces({"application/xml", "application/json", "application/html", "application/ld+json" })
@@ -494,7 +487,7 @@ public Response getVersion(@Context ContainerRequestContext crc,
                                @Context HttpHeaders headers) {
         return response( req -> {
 
-            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeFiles == null ? true : includeFiles);
+            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeFiles == null ? false : includeFiles);
 
             if (dsv == null || dsv.getId() == null) {
                 return notFound("Dataset version not found");
@@ -525,7 +518,7 @@ public Response getVersionFiles(@Context ContainerRequestContext crc,
                                     @Context UriInfo uriInfo,
                                     @Context HttpHeaders headers) {
         return response(req -> {
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeDeaccessioned);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, true);
             DatasetVersionFilesServiceBean.FileOrderCriteria fileOrderCriteria;
             try {
                 fileOrderCriteria = orderCriteria != null ? DatasetVersionFilesServiceBean.FileOrderCriteria.valueOf(orderCriteria) : DatasetVersionFilesServiceBean.FileOrderCriteria.NameAZ;
@@ -575,7 +568,7 @@ public Response getVersionFileCounts(@Context ContainerRequestContext crc,
             } catch (IllegalArgumentException e) {
                 return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus)));
             }
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeDeaccessioned);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, true);
             JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder();
             jsonObjectBuilder.add("total", datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria));
             jsonObjectBuilder.add("perContentType", json(datasetVersionFilesServiceBean.getFileMetadataCountPerContentType(datasetVersion, fileSearchCriteria)));
@@ -3052,7 +3045,7 @@ public Response getDownloadSize(@Context ContainerRequestContext crc,
             } catch (IllegalArgumentException e) {
                 return error(Response.Status.BAD_REQUEST, "Invalid mode: " + mode);
             }
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned, includeDeaccessioned);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned, true);
             long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileSearchCriteria, fileDownloadSizeMode);
             String message = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), datasetStorageSize);
             JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder();

From 55f2264f14ca70aeccf7dc5b9c419d730f282b9e Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Thu, 14 Dec 2023 14:39:35 -0500
Subject: [PATCH 409/546] Corrected logic since the default is true

---
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 380ad53d672..d949cebf0b9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -487,7 +487,7 @@ public Response getVersion(@Context ContainerRequestContext crc,
                                @Context HttpHeaders headers) {
         return response( req -> {
 
-            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeFiles == null ? false : includeFiles);
+            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeFiles == null ? true : includeFiles);
 
             if (dsv == null || dsv.getId() == null) {
                 return notFound("Dataset version not found");

From 102f63bae5b6e9ac5cf025751e81cd22b67e626e Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Thu, 14 Dec 2023 14:52:09 -0500
Subject: [PATCH 410/546] #10186 switch to libraries-bom per qqmyers

---
 modules/dataverse-parent/pom.xml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index 25d714b39ed..e2d1ceec539 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -40,8 +40,8 @@
             </dependency>
             <dependency>
                 <groupId>com.google.cloud</groupId>
-                <artifactId>google-cloud-bom</artifactId>
-                <version>${google.cloud.version}</version>
+                <artifactId>libraries-bom</artifactId>
+                <version>${google.library.version}</version>
                 <type>pom</type>
                 <scope>import</scope>
             </dependency>
@@ -152,7 +152,7 @@
         <postgresql.version>42.6.0</postgresql.version>
         <solr.version>9.3.0</solr.version>
         <aws.version>1.12.290</aws.version>
-        <google.cloud.version>0.209.0</google.cloud.version>
+        <google.library.version>26.29.0</google.library.version>
     
         <!-- Basic libs, logging -->
         <jakartaee-api.version>8.0.0</jakartaee-api.version>

From f3bcc6f1b19139c13b140afdd27c8bb7bef20a5d Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Thu, 14 Dec 2023 15:41:46 -0500
Subject: [PATCH 411/546] includeFiles renamed to excludeFiles

---
 .../harvard/iq/dataverse/api/Datasets.java    | 12 +++++---
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 30 ++-----------------
 .../edu/harvard/iq/dataverse/api/UtilIT.java  |  4 +--
 3 files changed, 13 insertions(+), 33 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index d949cebf0b9..a5d4bfc5bee 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -481,22 +481,26 @@ public Response listVersions(@Context ContainerRequestContext crc, @PathParam("i
     public Response getVersion(@Context ContainerRequestContext crc,
                                @PathParam("id") String datasetId,
                                @PathParam("versionId") String versionId,
-                               @QueryParam("includeFiles") Boolean includeFiles,
+                               @QueryParam("excludeFiles") Boolean excludeFiles,
                                @QueryParam("includeDeaccessioned") boolean includeDeaccessioned,
                                @Context UriInfo uriInfo,
                                @Context HttpHeaders headers) {
         return response( req -> {
+            
+           
+            //If excludeFiles is null the default is to provide the files and because of this we need to check permissions. 
+            boolean checkPerms = excludeFiles == null ? true : !excludeFiles;
 
-            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, includeFiles == null ? true : includeFiles);
+            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, checkPerms);
 
             if (dsv == null || dsv.getId() == null) {
                 return notFound("Dataset version not found");
             }
 
-            if (includeFiles == null ? true : includeFiles) {
+            if (excludeFiles == null ? true : !excludeFiles) {
                 dsv = datasetversionService.findDeep(dsv.getId());
             }
-            return ok(json(dsv, includeFiles == null ? true : includeFiles));
+            return ok(json(dsv, excludeFiles == null ? true : !excludeFiles));
         }, getRequestUser(crc));
     }
 
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 6a746b7c5b5..7ba91663b43 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -3,29 +3,23 @@
 import edu.harvard.iq.dataverse.DatasetVersionFilesServiceBean;
 import edu.harvard.iq.dataverse.FileSearchCriteria;
 import io.restassured.RestAssured;
-
 import static edu.harvard.iq.dataverse.DatasetVersion.ARCHIVE_NOTE_MAX_LENGTH;
 import static edu.harvard.iq.dataverse.api.ApiConstants.*;
 import static io.restassured.RestAssured.given;
-
 import io.restassured.path.json.JsonPath;
 import io.restassured.http.ContentType;
 import io.restassured.response.Response;
-
 import java.time.LocalDate;
 import java.time.format.DateTimeFormatter;
 import java.util.*;
 import java.util.logging.Logger;
-
 import org.apache.commons.lang3.RandomStringUtils;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.skyscreamer.jsonassert.JSONAssert;
 import org.junit.jupiter.api.Disabled;
-
 import jakarta.json.JsonObject;
-
 import static jakarta.ws.rs.core.Response.Status.CREATED;
 import static jakarta.ws.rs.core.Response.Status.FORBIDDEN;
 import static jakarta.ws.rs.core.Response.Status.OK;
@@ -35,26 +29,17 @@
 import static jakarta.ws.rs.core.Response.Status.METHOD_NOT_ALLOWED;
 import static jakarta.ws.rs.core.Response.Status.CONFLICT;
 import static jakarta.ws.rs.core.Response.Status.NO_CONTENT;
-
 import edu.harvard.iq.dataverse.DataFile;
-
 import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER;
-
 import edu.harvard.iq.dataverse.authorization.DataverseRole;
 import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
-
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.exception.ExceptionUtils;
-
 import io.restassured.parsing.Parser;
-
 import static io.restassured.path.json.JsonPath.with;
-
 import io.restassured.path.xml.XmlPath;
-
 import static edu.harvard.iq.dataverse.api.UtilIT.equalToCI;
-
 import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers;
 import edu.harvard.iq.dataverse.datavariable.VarGroup;
 import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
@@ -63,7 +48,6 @@
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.json.JSONLDUtil;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
-
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
@@ -71,7 +55,6 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.Files;
-
 import jakarta.json.Json;
 import jakarta.json.JsonArray;
 import jakarta.json.JsonObjectBuilder;
@@ -79,31 +62,24 @@
 import javax.xml.stream.XMLInputFactory;
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
-
 import static java.lang.Thread.sleep;
 import static org.junit.jupiter.api.Assertions.assertEquals;
-
 import org.hamcrest.CoreMatchers;
-
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.hasItems;
 import static org.hamcrest.CoreMatchers.startsWith;
 import static org.hamcrest.CoreMatchers.nullValue;
 import static org.hamcrest.Matchers.contains;
-
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
-
 public class DatasetsIT {
 
     private static final Logger logger = Logger.getLogger(DatasetsIT.class.getCanonicalName());
     
-    
-
     @BeforeAll
     public static void setUpClass() {
         
@@ -668,8 +644,8 @@ public void testDatasetVersionsAPI() {
         
         // Now check that the file is NOT shown, when we ask the versions api to 
         // skip files: 
-        boolean skipFiles = true; 
-        unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, skipFiles, false);
+        boolean excludeFiles = true; 
+        unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, excludeFiles, false);
         unpublishedDraft.prettyPrint();
         unpublishedDraft.then().assertThat()
                 .body("data.files", equalTo(null))
@@ -718,7 +694,7 @@ public void testDatasetVersionsAPI() {
                 .body("data.size()", equalTo(1));
         
         // And now call the "short", no-files version of the same api
-        versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms, skipFiles);
+        versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms, excludeFiles);
         versionsResponse.prettyPrint();
         versionsResponse.then().assertThat()
                 .statusCode(OK.getStatusCode())
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 869e755a183..6ba8ab2bf9d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -1466,7 +1466,7 @@ static Response getDatasetVersion(String persistentId, String versionNumber, Str
         return getDatasetVersion(persistentId, versionNumber, apiToken, false, false);
     }
 
-    static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean skipFiles, boolean includeDeaccessioned) {
+    static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean excludeFiles, boolean includeDeaccessioned) {
         return given()
                 .header(API_TOKEN_HTTP_HEADER, apiToken)
                 .queryParam("includeDeaccessioned", includeDeaccessioned)
@@ -1474,7 +1474,7 @@ static Response getDatasetVersion(String persistentId, String versionNumber, Str
                         + versionNumber
                         + "?persistentId="
                         + persistentId
-                        + (skipFiles ? "&includeFiles=false" : ""));
+                        + (excludeFiles ? "&excludeFiles=true" : ""));
     }
 
     static Response getMetadataBlockFromDatasetVersion(String persistentId, String versionNumber, String metadataBlock, String apiToken) {

From a18bf58ff95caac2fcd8906b2a706f97654943d3 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Thu, 14 Dec 2023 16:23:54 -0500
Subject: [PATCH 412/546] Added breaking changes

---
 doc/sphinx-guides/source/api/changelog.rst | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index d2908533a14..96ae51699f3 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -5,25 +5,26 @@ API Changelog
     :local:
     :depth: 1
 
-v6.1
-----
+6.2
+---
+Changes
+~~~~~~~
+- **/api/datasets/{id}/versions/{versionId}**: The includeFiles parameter has been renamed to excludeFiles. The default behavior remains the same, which is to include files. However, when excludeFiles is set to true, the files will be excluded. A bug that caused the API to only return a deaccessioned dataset if the user had edit privileges has been fixed.
 
-New
-~~~
-- **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`.
-- **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`.
+
+6.1
+---
 
 New
 ~~~
 - **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`.
-- **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`.
 
 Changes
 ~~~~~~~
 - **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`.
 
-v6.0
-----
+6.0
+---
 
 Changes
 ~~~~~~~

From e2189b9bfbd4da4b4bdebba050133c7c3b97cbb8 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 15 Dec 2023 10:57:22 -0500
Subject: [PATCH 413/546] link to shib groups #10155

---
 doc/sphinx-guides/source/api/native-api.rst          | 2 +-
 doc/sphinx-guides/source/installation/shibboleth.rst | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 9ceeb4410ef..906d3bc7b66 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -2689,7 +2689,7 @@ In particular, the user permissions that this API call checks, returned as boole
 Know If a User Can Download at Least One File from a Dataset Version
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-This API endpoint indicates if the calling user can download at least one file from a dataset version. Note that Shibboleth group permissions are not considered.
+This API endpoint indicates if the calling user can download at least one file from a dataset version. Note that permissions based on :ref:`shib-groups` are not considered.
 
 .. code-block:: bash
 
diff --git a/doc/sphinx-guides/source/installation/shibboleth.rst b/doc/sphinx-guides/source/installation/shibboleth.rst
index 3a2e1b99c70..9f7c04c1534 100644
--- a/doc/sphinx-guides/source/installation/shibboleth.rst
+++ b/doc/sphinx-guides/source/installation/shibboleth.rst
@@ -408,6 +408,8 @@ Rather than looking up the user's id in the ``authenticateduser`` database table
 
 Per above, you now need to tell the user to use the password reset feature to set a password for their local account.
 
+.. _shib-groups:
+
 Institution-Wide Shibboleth Groups
 ----------------------------------
 

From bfe6a6d964abdd8b75a5d0e3c6afe79193b9153f Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 15 Dec 2023 16:43:06 +0000
Subject: [PATCH 414/546] Removed: isSuperuser check from
 canDownloadAtLeastOneFile causing method to return true when a dataset has no
 files

---
 .../edu/harvard/iq/dataverse/PermissionServiceBean.java    | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index 471cac31e77..8fb762e3e5b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -849,11 +849,6 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio
      * @return boolean indicating whether the user can download at least one file or not
      */
     public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) {
-        User user = dataverseRequest.getUser();
-        if (user.isSuperuser()) {
-            return true;
-        }
-        // This is a shortcut to avoid having to check version files if the condition is met
         if (hasUnrestrictedReleasedFiles(datasetVersion)) {
             return true;
         }
@@ -861,7 +856,7 @@ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, Data
         for (FileMetadata fileMetadata : fileMetadatas) {
             DataFile dataFile = fileMetadata.getDataFile();
             Set<RoleAssignee> roleAssignees = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile));
-            roleAssignees.add(user);
+            roleAssignees.add(dataverseRequest.getUser());
             if (hasGroupPermissionsFor(roleAssignees, dataFile, EnumSet.of(Permission.DownloadFile))) {
                 return true;
             }

From 7ece3a4f7b60732b07e09e5e72d3f8a8b49cc626 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 15 Dec 2023 16:48:56 +0000
Subject: [PATCH 415/546] Added: fully expanded canDownloadAtLeastOneFile API
 call example to native API docs

---
 doc/sphinx-guides/source/api/native-api.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 906d3bc7b66..6591c983824 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -2699,6 +2699,11 @@ This API endpoint indicates if the calling user can download at least one file f
 
   curl -H "X-Dataverse-key: $API_TOKEN" -X GET "$SERVER_URL/api/datasets/$ID/versions/$VERSION/canDownloadAtLeastOneFile"
 
+The fully expanded example above (without environment variables) looks like this:
+
+.. code-block:: bash
+
+  curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/datasets/24/versions/1.0/canDownloadAtLeastOneFile"
 
 Files
 -----

From f9fd754c10022fc976138e9e13bd5ca588e1ab97 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 15 Dec 2023 17:37:14 +0000
Subject: [PATCH 416/546] Added: fileDownloader dataset role test case to
 getCanDownloadAtLeastOneFile API IT

---
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 28 +++++++++++++++++--
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index f36b93b85ab..cfa05cbb9e7 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -4258,7 +4258,7 @@ public void testGetCanDownloadAtLeastOneFile() {
         String secondUserApiToken = UtilIT.getApiTokenFromResponse(createSecondUserResponse);
         String secondUserUsername = UtilIT.getUsernameFromResponse(createSecondUserResponse);
 
-        // Call with a valid dataset id when a file is released
+        // Call when a file is released
         Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken);
         canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
         boolean canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
@@ -4272,7 +4272,7 @@ public void testGetCanDownloadAtLeastOneFile() {
         publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken);
         publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
 
-        // Call with a valid dataset id when a file is restricted and the user does not have access
+        // Call when a file is restricted and the user does not have access
         canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken);
         canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
         canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
@@ -4282,12 +4282,34 @@ public void testGetCanDownloadAtLeastOneFile() {
         Response grantFileAccessResponse = UtilIT.grantFileAccess(fileId, "@" + secondUserUsername, apiToken);
         grantFileAccessResponse.then().assertThat().statusCode(OK.getStatusCode());
 
-        // Call with a valid dataset id when a file is restricted and the user has access
+        // Call when a file is restricted and the user has access
         canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken);
         canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
         canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
         assertTrue(canDownloadAtLeastOneFile);
 
+        // Create a third user to call the getCanDownloadAtLeastOneFile method
+        Response createThirdUserResponse = UtilIT.createRandomUser();
+        createThirdUserResponse.then().assertThat().statusCode(OK.getStatusCode());
+        String thirdUserApiToken = UtilIT.getApiTokenFromResponse(createThirdUserResponse);
+        String thirdUserUsername = UtilIT.getUsernameFromResponse(createThirdUserResponse);
+
+        // Call when a file is restricted and the user does not have access
+        canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, thirdUserApiToken);
+        canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
+        canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
+        assertFalse(canDownloadAtLeastOneFile);
+
+        // Grant fileDownloader role on the dataset to the user
+        Response grantDatasetFileDownloaderRoleOnDatasetResponse = UtilIT.grantRoleOnDataset(datasetPersistentId, "fileDownloader", "@" + thirdUserUsername, apiToken);
+        grantDatasetFileDownloaderRoleOnDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        // Call when a file is restricted and the user has fileDownloader role on the dataset
+        canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, thirdUserApiToken);
+        canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
+        canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
+        assertTrue(canDownloadAtLeastOneFile);
+
         // Call with invalid dataset id
         Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, secondUserApiToken);
         getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());

From 63978b34c044d9f6ed3d3ae33e17517c75bce7b6 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 15 Dec 2023 13:36:52 -0500
Subject: [PATCH 417/546] add test for collection level #10155

---
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index cfa05cbb9e7..1e406486087 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -4310,6 +4310,28 @@ public void testGetCanDownloadAtLeastOneFile() {
         canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
         assertTrue(canDownloadAtLeastOneFile);
 
+        // Create a fourth user to call the getCanDownloadAtLeastOneFile method
+        Response createFourthUserResponse = UtilIT.createRandomUser();
+        createFourthUserResponse.then().assertThat().statusCode(OK.getStatusCode());
+        String fourthUserApiToken = UtilIT.getApiTokenFromResponse(createFourthUserResponse);
+        String fourthUserUsername = UtilIT.getUsernameFromResponse(createFourthUserResponse);
+
+        // Call when a file is restricted and the user does not have access
+        canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, fourthUserApiToken);
+        canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
+        canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
+        assertFalse(canDownloadAtLeastOneFile);
+
+        // Grant fileDownloader role on the collection to the user
+        Response grantDatasetFileDownloaderRoleOnCollectionResponse = UtilIT.grantRoleOnDataverse(dataverseAlias, "fileDownloader", "@" + fourthUserUsername, apiToken);
+        grantDatasetFileDownloaderRoleOnCollectionResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        // Call when a file is restricted and the user has fileDownloader role on the dataset
+        canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, fourthUserApiToken);
+        canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
+        canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");
+        assertTrue(canDownloadAtLeastOneFile);
+
         // Call with invalid dataset id
         Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, secondUserApiToken);
         getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());

From c6e9b2904ea4592436d76e0e90ed1719be0963e4 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Fri, 15 Dec 2023 18:46:15 +0000
Subject: [PATCH 418/546] Fixed: typo in test case comment

---
 src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 1e406486087..e34f5a3956d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -4326,7 +4326,7 @@ public void testGetCanDownloadAtLeastOneFile() {
         Response grantDatasetFileDownloaderRoleOnCollectionResponse = UtilIT.grantRoleOnDataverse(dataverseAlias, "fileDownloader", "@" + fourthUserUsername, apiToken);
         grantDatasetFileDownloaderRoleOnCollectionResponse.then().assertThat().statusCode(OK.getStatusCode());
 
-        // Call when a file is restricted and the user has fileDownloader role on the dataset
+        // Call when a file is restricted and the user has fileDownloader role on the collection
         canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, fourthUserApiToken);
         canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode());
         canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data");

From e099136ad8ed1fe6b9aa62c9b26a4f7b6bc1d789 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 15 Dec 2023 14:57:31 -0500
Subject: [PATCH 419/546] Change includeFiles with excludeFiles

---
 .../java/edu/harvard/iq/dataverse/api/Datasets.java  | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index a5d4bfc5bee..c37dc99e00f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -463,14 +463,15 @@ public Response useDefaultCitationDate(@Context ContainerRequestContext crc, @Pa
     @GET
     @AuthRequired
     @Path("{id}/versions")
-    public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("includeFiles") Boolean includeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) {
+    public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("excludeFiles") Boolean excludeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) {
 
         return response( req -> {
             Dataset dataset = findDatasetOrDie(id);
+            Boolean deepLookup = excludeFiles == null ? true : !excludeFiles;
 
-            return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit, (includeFiles == null ? true : includeFiles)) )
+            return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit, deepLookup) )
                                 .stream()
-                                .map( d -> json(d, includeFiles == null ? true : includeFiles) )
+                                .map( d -> json(d, deepLookup) )
                                 .collect(toJsonArray()));
         }, getRequestUser(crc));
     }
@@ -491,7 +492,8 @@ public Response getVersion(@Context ContainerRequestContext crc,
             //If excludeFiles is null the default is to provide the files and because of this we need to check permissions. 
             boolean checkPerms = excludeFiles == null ? true : !excludeFiles;
 
-            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, checkPerms);
+            Dataset dst = findDatasetOrDie(datasetId);
+            DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, dst, uriInfo, headers, includeDeaccessioned, checkPerms);
 
             if (dsv == null || dsv.getId() == null) {
                 return notFound("Dataset version not found");
@@ -2792,7 +2794,7 @@ public Command<DatasetVersion> handleSpecific(long major, long minor) {
 
             @Override
             public Command<DatasetVersion> handleLatestPublished() {
-                return new GetLatestPublishedDatasetVersionCommand(req, ds, includeDeaccessioned);
+                return new GetLatestPublishedDatasetVersionCommand(req, ds, includeDeaccessioned, checkPerms);
             }
         }));
         if (dsv == null || dsv.getId() == null) {

From 8a6964666b796f75479b9ecc1e99cc8a5f106e46 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 15 Dec 2023 14:58:06 -0500
Subject: [PATCH 420/546] includeFiles with excludeFiles

---
 ...LatestAccessibleDatasetVersionCommand.java |  2 +-
 ...tLatestPublishedDatasetVersionCommand.java | 15 +++-
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 81 +++++++++++++++++++
 3 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java
index 1454a4b1fdd..96e8ee73a50 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java
@@ -41,6 +41,6 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException {
         if (ds.getLatestVersion().isDraft() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.ViewUnpublishedDataset)) {
             return ctxt.engine().submit(new GetDraftDatasetVersionCommand(getRequest(), ds));
         }
-        return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned));
+        return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned, true));
     }
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
index 4e4252fd155..a4952bbf524 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
@@ -17,21 +17,30 @@
 public class GetLatestPublishedDatasetVersionCommand extends AbstractCommand<DatasetVersion> {
     private final Dataset ds;
     private final boolean includeDeaccessioned;
+    private boolean checkPerms;
 
     public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) {
-        this(aRequest, anAffectedDataset, false);
+        this(aRequest, anAffectedDataset, false, false);
     }
 
-    public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned) {
+    public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPerms) {
         super(aRequest, anAffectedDataset);
         ds = anAffectedDataset;
         this.includeDeaccessioned = includeDeaccessioned;
+        this.checkPerms = checkPerms;
     }
 
     @Override
     public DatasetVersion execute(CommandContext ctxt) throws CommandException {
+
         for (DatasetVersion dsv : ds.getVersions()) {
-            if (dsv.isReleased() || (includeDeaccessioned && dsv.isDeaccessioned() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset))) {
+            if (dsv.isReleased() || (includeDeaccessioned && dsv.isDeaccessioned())) {
+                
+                if(dsv.isDeaccessioned() && checkPerms){
+                    if(!ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset)){
+                        return null;
+                    }
+                }
                 return dsv;
             }
         }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 7ba91663b43..1f48f3bdb1d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -651,6 +651,15 @@ public void testDatasetVersionsAPI() {
                 .body("data.files", equalTo(null))
                 .statusCode(OK.getStatusCode());
 
+        excludeFiles = false; 
+        unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, excludeFiles, false);
+        unpublishedDraft.prettyPrint();
+        unpublishedDraft.then().assertThat()
+                .body("data.files.size()", equalTo(1))
+                .statusCode(OK.getStatusCode());
+
+        
+
         // Publish collection and dataset
         UtilIT.publishDataverseViaNativeApi(collectionAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode());
         UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode());
@@ -694,11 +703,83 @@ public void testDatasetVersionsAPI() {
                 .body("data.size()", equalTo(1));
         
         // And now call the "short", no-files version of the same api
+        excludeFiles = true;
         versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms, excludeFiles);
         versionsResponse.prettyPrint();
         versionsResponse.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body("data[0].files", equalTo(null));
+
+
+        
+        //Set of tests on non-deaccesioned dataset
+                
+        boolean includeDeaccessioned = true;
+        excludeFiles = true;
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
+        
+        excludeFiles = false;
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));
+        
+        includeDeaccessioned = false;
+        excludeFiles = true;
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
+
+        excludeFiles = false;
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));
+
+        
+        //We deaccession the dataset
+        Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken);
+        deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        //Set of tests on deaccesioned dataset, only 3/9 should return OK message
+
+        includeDeaccessioned = true;
+        excludeFiles = true;
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
+        excludeFiles = false;
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));;
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(404);
+        
+        includeDeaccessioned = false;
+        excludeFiles = true;
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(404);
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(404);
+        excludeFiles = false;
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(404);
+        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
+            then().assertThat().statusCode(404);
+
+      
+
+
+
+
+        
+        
+
+        
     }
 
     

From 45b9823ef639fc08aa86b18f42810794af0ca2e0 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 15 Dec 2023 14:58:28 -0500
Subject: [PATCH 421/546] Includes set of tests for this change

---
 .../java/edu/harvard/iq/dataverse/api/UtilIT.java | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 6ba8ab2bf9d..e5b6cabfb85 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -1839,15 +1839,15 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken) {
         return getDatasetVersions(idOrPersistentId, apiToken, false);
     }
 
-    static Response getDatasetVersions(String idOrPersistentId, String apiToken, boolean skipFiles) {
-        return getDatasetVersions(idOrPersistentId, apiToken, null, null, skipFiles);
+    static Response getDatasetVersions(String idOrPersistentId, String apiToken, boolean excludeFiles) {
+        return getDatasetVersions(idOrPersistentId, apiToken, null, null, excludeFiles);
     }
 
     static Response getDatasetVersions(String idOrPersistentId, String apiToken, Integer offset, Integer limit) {
         return getDatasetVersions(idOrPersistentId, apiToken, offset, limit, false);
     }
 
-    static Response getDatasetVersions(String idOrPersistentId, String apiToken, Integer offset, Integer limit, boolean skipFiles) {
+    static Response getDatasetVersions(String idOrPersistentId, String apiToken, Integer offset, Integer limit, boolean excludeFiles) {
         logger.info("Getting Dataset Versions");
         String idInPath = idOrPersistentId; // Assume it's a number.
         String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
@@ -1855,11 +1855,11 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken, Int
             idInPath = ":persistentId";
             optionalQueryParam = "?persistentId=" + idOrPersistentId;
         }
-        if (skipFiles) {
+        if (excludeFiles) {
             if ("".equals(optionalQueryParam)) {
-                optionalQueryParam = "?includeFiles=false";
+                optionalQueryParam = "?excludeFiles=true";
             } else {
-                optionalQueryParam = optionalQueryParam.concat("&includeFiles=false");
+                optionalQueryParam = optionalQueryParam.concat("&excludeFiles=true");
             }
         }
         if (offset != null) {
@@ -1881,6 +1881,9 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken, Int
             requestSpecification = given()
                     .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken);
         }
+        String io = "/api/datasets/" + idInPath + "/versions" + optionalQueryParam;
+        System.out.println(io);
+        
         return requestSpecification.get("/api/datasets/" + idInPath + "/versions" + optionalQueryParam);
     }
 

From 39b9a3bc05dbfa77ba75635629ff0eb6ae83c291 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 15 Dec 2023 15:09:01 -0500
Subject: [PATCH 422/546] Adds to changelog

---
 doc/sphinx-guides/source/api/changelog.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index 96ae51699f3..f546d3e5bc6 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -10,7 +10,7 @@ API Changelog
 Changes
 ~~~~~~~
 - **/api/datasets/{id}/versions/{versionId}**: The includeFiles parameter has been renamed to excludeFiles. The default behavior remains the same, which is to include files. However, when excludeFiles is set to true, the files will be excluded. A bug that caused the API to only return a deaccessioned dataset if the user had edit privileges has been fixed.
-
+- **/api/datasets/{id}/versions**: The includeFiles parameter has been renamed to excludeFiles. The default behavior remains the same, which is to include files. However, when excludeFiles is set to true, the files will be excluded.
 
 6.1
 ---

From 5c3cb092ba8c36852cdf8d8d23295dc14b7c0cab Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 15 Dec 2023 15:36:57 -0500
Subject: [PATCH 423/546] Removed debug lines

---
 src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index e5b6cabfb85..8352fc65eb4 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -1881,8 +1881,6 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken, Int
             requestSpecification = given()
                     .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken);
         }
-        String io = "/api/datasets/" + idInPath + "/versions" + optionalQueryParam;
-        System.out.println(io);
         
         return requestSpecification.get("/api/datasets/" + idInPath + "/versions" + optionalQueryParam);
     }

From 86a533df1bb3679030149f865a15c7c98084fe69 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 15 Dec 2023 15:41:41 -0500
Subject: [PATCH 424/546] Status check is made before body now

---
 .../java/edu/harvard/iq/dataverse/api/DatasetsIT.java    | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 1f48f3bdb1d..f7668878459 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -639,7 +639,6 @@ public void testDatasetVersionsAPI() {
         Response unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, ":draft", apiToken);
         unpublishedDraft.prettyPrint();
         unpublishedDraft.then().assertThat()
-                .body("data.files.size()", equalTo(1))
                 .statusCode(OK.getStatusCode());
         
         // Now check that the file is NOT shown, when we ask the versions api to 
@@ -648,15 +647,15 @@ public void testDatasetVersionsAPI() {
         unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, excludeFiles, false);
         unpublishedDraft.prettyPrint();
         unpublishedDraft.then().assertThat()
-                .body("data.files", equalTo(null))
-                .statusCode(OK.getStatusCode());
+                .statusCode(OK.getStatusCode())
+                .body("data.files", equalTo(null));
 
         excludeFiles = false; 
         unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, excludeFiles, false);
         unpublishedDraft.prettyPrint();
         unpublishedDraft.then().assertThat()
-                .body("data.files.size()", equalTo(1))
-                .statusCode(OK.getStatusCode());
+                .statusCode(OK.getStatusCode())
+                .body("data.files.size()", equalTo(1));
 
         
 

From cd89e3748c08358404d5386f8c1a62905661a510 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 15 Dec 2023 15:48:44 -0500
Subject: [PATCH 425/546] Changelog fixed

---
 doc/sphinx-guides/source/api/changelog.rst | 32 +++++++++-------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst
index f546d3e5bc6..05487315d43 100644
--- a/doc/sphinx-guides/source/api/changelog.rst
+++ b/doc/sphinx-guides/source/api/changelog.rst
@@ -1,31 +1,25 @@
-API Changelog 
-=============
+API Changelog (Breaking Changes)
+================================
+
+This API changelog is experimental and we would love feedback on its usefulness. Its primary purpose is to inform API developers of any breaking changes. (We try not ship any backward incompatible changes, but it happens.) To see a list of new APIs and backward-compatible changes to existing API, please see each version's release notes at https://github.com/IQSS/dataverse/releases
 
 .. contents:: |toctitle|
     :local:
     :depth: 1
 
-6.2
----
-Changes
-~~~~~~~
+v6.2
+----
+
 - **/api/datasets/{id}/versions/{versionId}**: The includeFiles parameter has been renamed to excludeFiles. The default behavior remains the same, which is to include files. However, when excludeFiles is set to true, the files will be excluded. A bug that caused the API to only return a deaccessioned dataset if the user had edit privileges has been fixed.
 - **/api/datasets/{id}/versions**: The includeFiles parameter has been renamed to excludeFiles. The default behavior remains the same, which is to include files. However, when excludeFiles is set to true, the files will be excluded.
 
-6.1
----
 
-New
-~~~
-- **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`.
+v6.1
+----
 
-Changes
-~~~~~~~
-- **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`.
+- The metadata field "Alternative Title" now supports multiple values so you must pass an array rather than a string when populating that field via API. See https://github.com/IQSS/dataverse/pull/9440
 
-6.0
----
+v6.0
+----
 
-Changes
-~~~~~~~
-- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. See :doc:`dataaccess`.
+- **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. See :doc:`dataaccess`.
\ No newline at end of file

From 37fd44a825843422154e2d3d6ddaf8ce3599db04 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Mon, 18 Dec 2023 21:49:54 -0500
Subject: [PATCH 426/546] a few minor additions to the releases guide #10180

---
 .../source/developers/making-releases.rst     | 26 ++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst
index 432b4ca2672..332e28f55bb 100755
--- a/doc/sphinx-guides/source/developers/making-releases.rst
+++ b/doc/sphinx-guides/source/developers/making-releases.rst
@@ -125,9 +125,11 @@ Go to https://jenkins.dataverse.org/job/IQSS_Dataverse_Internal/ and make the fo
 
 Click "Save" then "Build Now".
 
-The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"5.10.1","build":"907-b844672``).
+This will build the war file, and then automatically deploy it on dataverse-internal. Verify that the application has deployed successfully. 
 
-Note that the build number comes from script in an early build step...
+The build number will appear in ``/api/info/version`` (along with the commit mentioned above) from a running installation (e.g. ``{"version":"5.10.1","build":"907-b844672``). 
+
+Note that the build number comes from the following script in an early Jenkins build step...
 
 .. code-block:: bash
 
@@ -142,12 +144,16 @@ Build Installer (dvinstall.zip)
 ssh into the dataverse-internal server and do the following:
 
 - In a git checkout of the dataverse source switch to the master branch and pull the latest.
-- Copy the war file from the previous step to the ``target`` directory in the root of the repo (create it, if necessary).
+- Copy the war file from the previous step to the ``target`` directory in the root of the repo (create it, if necessary):
+- ``mkdir target``
+- ``cp /tmp/dataverse-5.10.1.war target``
 - ``cd scripts/installer``
 - ``make``
 
 A zip file called ``dvinstall.zip`` should be produced.
 
+Alternatively, you can build the installer on your own dev. instance. But make sure you use the war file produced in the step above, not a war file build from master on your own system! That's because we want the released application war file to contain the build number described above. Download the war file directly from Jenkins, or from dataverse-internal. 
+
 Make Artifacts Available for Download
 -------------------------------------
 
@@ -161,6 +167,11 @@ Upload the following artifacts to the draft release you created:
   - metadata block tsv files
   - config files
 
+Deploy on Demo
+--------------
+
+Now that you have the release ready to go, give it one final test by deploying it on demo. Note that this is also an opportunity to re-test the upgrade checklist as described in the release note. 
+
 Publish the Release
 -------------------
 
@@ -171,7 +182,14 @@ Update Guides Link
 
 "latest" at https://guides.dataverse.org/en/latest/ is a symlink to the directory with the latest release. That directory (e.g. ``5.10.1``) was put into place by the Jenkins "guides" job described above.
 
-ssh into the guides server and update the symlink to point to the latest release.
+ssh into the guides server and update the symlink to point to the latest release, as in the example below.
+
+.. code-block:: bash
+
+  cd /var/www/html/en
+  ln -s 5.10.1 latest
+
+
 
 Close Milestone on GitHub and Create a New One
 ----------------------------------------------

From 2d5c486aabe21823778bf5b413c9f8b1a5251b6e Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 19 Dec 2023 09:21:28 -0500
Subject: [PATCH 427/546] made demo a link to https://demo.dataverse.org

---
 doc/sphinx-guides/source/developers/making-releases.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst
index 332e28f55bb..76a3f30aea6 100755
--- a/doc/sphinx-guides/source/developers/making-releases.rst
+++ b/doc/sphinx-guides/source/developers/making-releases.rst
@@ -170,7 +170,7 @@ Upload the following artifacts to the draft release you created:
 Deploy on Demo
 --------------
 
-Now that you have the release ready to go, give it one final test by deploying it on demo. Note that this is also an opportunity to re-test the upgrade checklist as described in the release note. 
+Now that you have the release ready to go, give it one final test by deploying it on https://demo.dataverse.org . Note that this is also an opportunity to re-test the upgrade checklist as described in the release note. 
 
 Publish the Release
 -------------------

From 1061a0c2a098642847312ee6711dffe4ca180010 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 19 Dec 2023 09:56:13 -0500
Subject: [PATCH 428/546] Add changes with provided feedback

---
 .../edu/harvard/iq/dataverse/api/Datasets.java | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index c37dc99e00f..bb601b9e177 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -524,7 +524,7 @@ public Response getVersionFiles(@Context ContainerRequestContext crc,
                                     @Context UriInfo uriInfo,
                                     @Context HttpHeaders headers) {
         return response(req -> {
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, true);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned);
             DatasetVersionFilesServiceBean.FileOrderCriteria fileOrderCriteria;
             try {
                 fileOrderCriteria = orderCriteria != null ? DatasetVersionFilesServiceBean.FileOrderCriteria.valueOf(orderCriteria) : DatasetVersionFilesServiceBean.FileOrderCriteria.NameAZ;
@@ -574,7 +574,7 @@ public Response getVersionFileCounts(@Context ContainerRequestContext crc,
             } catch (IllegalArgumentException e) {
                 return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus)));
             }
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, true);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned);
             JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder();
             jsonObjectBuilder.add("total", datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria));
             jsonObjectBuilder.add("perContentType", json(datasetVersionFilesServiceBean.getFileMetadataCountPerContentType(datasetVersion, fileSearchCriteria)));
@@ -2774,7 +2774,11 @@ private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String
         return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, false, false);
     }
 
-    private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned, boolean checkPerms) throws WrappedResponse {
+    private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned) throws WrappedResponse{
+        return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, includeDeaccessioned, true);
+    }
+
+    private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) throws WrappedResponse {
         DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler<Command<DatasetVersion>>() {
 
             @Override
@@ -2789,12 +2793,12 @@ public Command<DatasetVersion> handleDraft() {
 
             @Override
             public Command<DatasetVersion> handleSpecific(long major, long minor) {
-                return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor, includeDeaccessioned, checkPerms);
+                return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor, includeDeaccessioned, checkPermsWhenDeaccessioned);
             }
 
             @Override
             public Command<DatasetVersion> handleLatestPublished() {
-                return new GetLatestPublishedDatasetVersionCommand(req, ds, includeDeaccessioned, checkPerms);
+                return new GetLatestPublishedDatasetVersionCommand(req, ds, includeDeaccessioned, checkPermsWhenDeaccessioned);
             }
         }));
         if (dsv == null || dsv.getId() == null) {
@@ -3051,7 +3055,7 @@ public Response getDownloadSize(@Context ContainerRequestContext crc,
             } catch (IllegalArgumentException e) {
                 return error(Response.Status.BAD_REQUEST, "Invalid mode: " + mode);
             }
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned, true);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, version, findDatasetOrDie(dvIdtf), uriInfo, headers, includeDeaccessioned);
             long datasetStorageSize = datasetVersionFilesServiceBean.getFilesDownloadSize(datasetVersion, fileSearchCriteria, fileDownloadSizeMode);
             String message = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize.download"), datasetStorageSize);
             JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder();
@@ -4027,7 +4031,7 @@ public Response deaccessionDataset(@Context ContainerRequestContext crc, @PathPa
             return badRequest(BundleUtil.getStringFromBundle("datasets.api.deaccessionDataset.invalid.version.identifier.error", List.of(DS_VERSION_LATEST_PUBLISHED)));
         }
         return response(req -> {
-            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false, false);
+            DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers);
             try {
                 JsonObject jsonObject = JsonUtil.getJsonObject(jsonBody);
                 datasetVersion.setVersionNote(jsonObject.getString("deaccessionReason"));

From 58e31af66f24c9664f591f39c20735ab2341512b Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 19 Dec 2023 10:04:00 -0500
Subject: [PATCH 429/546] Comments added to clarify the use of the methods

---
 .../java/edu/harvard/iq/dataverse/api/Datasets.java  | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index bb601b9e177..19e4ee3f87b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -2769,15 +2769,25 @@ public static <T> T handleVersion(String versionId, DsVersionHandler<T> hdl)
         }
     }
 
+    /*
+     * This method will default includeDeaccessioned to false and checkPermsWhenDeaccessioned to false. Use it only when you are sure that the you don't need to work with
+     * a deaccessioned dataset.
+     */
     private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse {
         //The checkPerms was added to check the permissions ONLY when the dataset is deaccessioned.
         return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, false, false);
     }
-
+    
+    /*
+     * This method will checkPermsWhenDeaccessioned to true. Be aware that the version will be only be obtainable if the user has edit permissions.
+     */
     private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned) throws WrappedResponse{
         return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, includeDeaccessioned, true);
     }
 
+    /*
+     * This method will let you define when the permissions should be checked for a deaccesioned dataset. If checkPermsWhenDeaccessioned is true, the version will be only be obtainable if the user has edit permissions.
+     */
     private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) throws WrappedResponse {
         DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler<Command<DatasetVersion>>() {
 

From 2ac5eded2b97d13f11905fc66bfd5f5b7d475c6e Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 19 Dec 2023 10:18:11 -0500
Subject: [PATCH 430/546] Comments update

---
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 19e4ee3f87b..dd5d1eca592 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -2770,7 +2770,7 @@ public static <T> T handleVersion(String versionId, DsVersionHandler<T> hdl)
     }
 
     /*
-     * This method will default includeDeaccessioned to false and checkPermsWhenDeaccessioned to false. Use it only when you are sure that the you don't need to work with
+     * includeDeaccessioned default to false and checkPermsWhenDeaccessioned to false. Use it only when you are sure that the you don't need to work with
      * a deaccessioned dataset.
      */
     private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse {
@@ -2779,14 +2779,14 @@ private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String
     }
     
     /*
-     * This method will checkPermsWhenDeaccessioned to true. Be aware that the version will be only be obtainable if the user has edit permissions.
+     * checkPermsWhenDeaccessioned default to true. Be aware that the version will be only be obtainable if the user has edit permissions.
      */
     private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned) throws WrappedResponse{
         return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, includeDeaccessioned, true);
     }
 
     /*
-     * This method will let you define when the permissions should be checked for a deaccesioned dataset. If checkPermsWhenDeaccessioned is true, the version will be only be obtainable if the user has edit permissions.
+     * Will allow to define when the permissions should be checked when a deaccesioned dataset is requested. If the user doesn't have edit permissions will result in an error.
      */
     private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) throws WrappedResponse {
         DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler<Command<DatasetVersion>>() {

From da20f263c562728d40606cec6b37fb49ee8d1667 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 19 Dec 2023 10:33:44 -0500
Subject: [PATCH 431/546] Update Write Release Notes

---
 doc/sphinx-guides/source/developers/making-releases.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst
index 76a3f30aea6..e73811a77e1 100755
--- a/doc/sphinx-guides/source/developers/making-releases.rst
+++ b/doc/sphinx-guides/source/developers/making-releases.rst
@@ -24,6 +24,7 @@ The task at or near release time is to collect these notes into a single doc.
 - Create an issue in GitHub to track the work of creating release notes for the upcoming release.
 - Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the issue-specific release notes mentioned above.
 - Delete the previously-created, issue-specific release notes as the content is added to the main release notes file.
+- Include instructions to describe the steps required to upgrade the application from the previous version. These must be customized for release numbers and special circumstances such as changes to metadata blocks and infrastructure.
 - Take the release notes .md through the regular Code Review and QA process.
 
 Create a GitHub Issue and Branch for the Release

From e67afb7188fa4cebc76d6d980a9f79aa9e3ffc93 Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Wed, 20 Dec 2023 13:17:17 -0500
Subject: [PATCH 432/546] #9920 add release note per pdurbin

---
 doc/release-notes/9920-postgres16.md | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 doc/release-notes/9920-postgres16.md

diff --git a/doc/release-notes/9920-postgres16.md b/doc/release-notes/9920-postgres16.md
new file mode 100644
index 00000000000..8aab76e98b9
--- /dev/null
+++ b/doc/release-notes/9920-postgres16.md
@@ -0,0 +1,3 @@
+This release adds install script support for the new permissions model in Postgres versions 15+, and bumps FlyWay to support Postgres 16.
+
+Postgres 13 remains the version used with automated testing.

From 0cb547bc8457cea05e44c6e56c9321925ff688bc Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Thu, 21 Dec 2023 17:01:02 -0500
Subject: [PATCH 433/546] Create dirs for dataset as needed

---
 .../dataverse/globus/GlobusServiceBean.java   | 108 ++++++++++++++----
 1 file changed, 88 insertions(+), 20 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index d0660a55a6a..61884045f35 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -134,7 +134,7 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi
      * @param globusLogger - a separate logger instance, may be null
      */
     public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) {
-        globusLogger.info("Start deleting rule " + ruleId + " for dataset " + dataset.getId());
+        globusLogger.fine("Start deleting rule " + ruleId + " for dataset " + dataset.getId());
         if (ruleId.length() > 0) {
             if (dataset != null) {
                 GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
@@ -179,25 +179,95 @@ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int
         permissions.setPrincipal(principal);
         permissions.setPath(endpoint.getBasePath() + "/");
         permissions.setPermissions("rw");
-
+        
         JsonObjectBuilder response = Json.createObjectBuilder();
-        response.add("status", requestPermission(endpoint, dataset, permissions));
-        String driverId = dataset.getEffectiveStorageDriverId();
-        JsonObjectBuilder paths = Json.createObjectBuilder();
-        for (int i = 0; i < numberOfPaths; i++) {
-            String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId);
-            int lastIndex = Math.max(storageIdentifier.lastIndexOf("/"), storageIdentifier.lastIndexOf(":"));
-            paths.add(storageIdentifier, endpoint.getBasePath() + "/" + storageIdentifier.substring(lastIndex + 1));
+        //Try to create the directory (202 status) if it does not exist (502-already exists)
+        int mkDirStatus = makeDirs(endpoint, dataset);
+        if (!(mkDirStatus== 202 || mkDirStatus == 502)) {
+            return response.add("status", mkDirStatus).build();
+        }
+        //The dir for the dataset's data exists, so try to request permission for the principal
+        int requestPermStatus = requestPermission(endpoint, dataset, permissions);
+        response.add("status", requestPermStatus);
+        if (requestPermStatus == 201) {
+            String driverId = dataset.getEffectiveStorageDriverId();
+            JsonObjectBuilder paths = Json.createObjectBuilder();
+            for (int i = 0; i < numberOfPaths; i++) {
+                String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId);
+                int lastIndex = Math.max(storageIdentifier.lastIndexOf("/"), storageIdentifier.lastIndexOf(":"));
+                paths.add(storageIdentifier, endpoint.getBasePath() + "/" + storageIdentifier.substring(lastIndex + 1));
 
+            }
+            response.add("paths", paths.build());
         }
-        response.add("paths", paths.build());
         return response.build();
     }
 
+    /**
+     * Call to create the directories for the specified dataset.
+     * 
+     * @param dataset
+     * @return - an error status at whichever subdir the process fails at or the
+     *         final success status
+     */
+    private int makeDirs(GlobusEndpoint endpoint, Dataset dataset) {
+        logger.fine("Creating dirs: " + endpoint.getBasePath());
+        int index = endpoint.getBasePath().lastIndexOf(dataset.getAuthorityForFileStorage())
+                + dataset.getAuthorityForFileStorage().length();
+        String nextDir = endpoint.getBasePath().substring(0, index);
+        int response = makeDir(endpoint, nextDir);
+        String identifier = dataset.getIdentifierForFileStorage();
+        //Usually identifiers will have 0 or 1 slashes (e.g. FK2/ABCDEF) but the while loop will handle any that could have more
+        //Will skip if the first makeDir above failed
+        while ((identifier.length() > 0) && ((response == 202 || response == 502))) {
+            index = identifier.indexOf('/');
+            if (index == -1) {
+                //Last dir to create
+                response = makeDir(endpoint, nextDir + "/" + identifier);
+                identifier = "";
+            } else {
+                //The next dir to create
+                nextDir = nextDir + "/" + identifier.substring(0, index);
+                response = makeDir(endpoint, nextDir);
+                //The rest of the identifier
+                identifier = identifier.substring(index + 1);
+            }
+        }
+        return response;
+    }
+    
+    private int makeDir(GlobusEndpoint endpoint, String dir) {
+        MakeRequestResponse result = null;
+        String body = "{\"DATA_TYPE\":\"mkdir\",\"path\":\"" + dir + "\"}";
+        try {
+            logger.info(body);
+            URL url = new URL(
+                    "https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint.getId() + "/mkdir");
+            result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", body);
+
+            switch (result.status) {
+            case 202:
+                logger.fine("Dir " + dir + " was created successfully.");
+                break;
+            case 502:
+                logger.fine("Dir " + dir + " already exists.");
+                break;
+            default:
+                logger.warning("Status " + result.status + " received when creating dir " + dir);
+                logger.fine("Response: " + result.jsonResponse);
+            }
+        } catch (MalformedURLException ex) {
+            // Misconfiguration
+            logger.warning("Failed to create dir on " + endpoint.getId());
+            return 500;
+        }
+        return result.status;
+    }
+    
     private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissions permissions) {
         Gson gson = new GsonBuilder().create();
         MakeRequestResponse result = null;
-        logger.info("Start creating the rule");
+        logger.fine("Start creating the rule");
 
         try {
             URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access");
@@ -218,7 +288,7 @@ private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissi
                 if (globusResponse != null && globusResponse.containsKey("access_id")) {
                     permissions.setId(globusResponse.getString("access_id"));
                     monitorTemporaryPermissions(permissions.getId(), dataset.getId());
-                    logger.info("Access rule " + permissions.getId() + " was created successfully");
+                    logger.fine("Access rule " + permissions.getId() + " was created successfully");
                 } else {
                     // Shouldn't happen!
                     logger.warning("Access rule id not returned for dataset " + dataset.getId());
@@ -363,7 +433,6 @@ private static MakeRequestResponse makeRequest(URL url, String authType, String
         try {
             connection = (HttpURLConnection) url.openConnection();
             // Basic
-            logger.info(authType + " " + authCode);
             logger.fine("For URL: " + url.toString());
             connection.setRequestProperty("Authorization", authType + " " + authCode);
             // connection.setRequestProperty("Content-Type",
@@ -713,7 +782,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S
                                 .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId))
                                 .filter(Objects::nonNull).collect(Collectors.toList());
                         if (newfileJsonObject != null) {
-                            logger.info("List Size: " + newfileJsonObject.size());
+                            logger.fine("List Size: " + newfileJsonObject.size());
                             // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) {
                             JsonPatch path = Json.createPatchBuilder()
                                     .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build();
@@ -884,7 +953,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
         String taskIdentifier = jsonObject.getString("taskIdentifier");
 
         GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
-        logger.info("Endpoint path: " + endpoint.getBasePath());
+        logger.fine("Endpoint path: " + endpoint.getBasePath());
 
         // If the rules_cache times out, the permission will be deleted. Presumably that
         // doesn't affect a
@@ -892,10 +961,10 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
         GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger);
         String ruleId = getRuleId(endpoint, task.getOwner_id(), "r");
         if (ruleId != null) {
-            logger.info("Found rule: " + ruleId);
+            logger.fine("Found rule: " + ruleId);
             Long datasetId = rulesCache.getIfPresent(ruleId);
             if (datasetId != null) {
-                logger.info("Deleting from cache: rule: " + ruleId);
+                logger.fine("Deleting from cache: rule: " + ruleId);
                 // Will not delete rule
                 rulesCache.invalidate(ruleId);
             }
@@ -909,7 +978,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
 
         // Transfer is done (success or failure) so delete the rule
         if (ruleId != null) {
-            logger.info("Deleting: rule: " + ruleId);
+            logger.fine("Deleting: rule: " + ruleId);
             deletePermission(ruleId, dataset, globusLogger);
         }
 
@@ -1032,7 +1101,6 @@ public JsonObject calculateMissingMetadataFields(List<String> inputList, Logger
     }
 
     private CompletableFuture<FileDetailsHolder> calculateDetailsAsync(String id, Logger globusLogger) {
-        // logger.info(" calcualte additional details for these globus id ==== " + id);
 
         return CompletableFuture.supplyAsync(() -> {
             try {
@@ -1071,7 +1139,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger)
                 count = 3;
             } catch (IOException ioex) {
                 count = 3;
-                logger.info(ioex.getMessage());
+                logger.fine(ioex.getMessage());
                 globusLogger.info(
                         "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: ");
             } catch (Exception ex) {

From 7c78d990792b86a8084958f874b3abf111991267 Mon Sep 17 00:00:00 2001
From: sbondka <sabrine.bondka@open-groupe.com>
Date: Tue, 26 Dec 2023 11:51:05 +0100
Subject: [PATCH 434/546] Add Doc

---
 .../source/developers/version-control.rst              | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst
index 31fc0a4e602..91f59c76e61 100644
--- a/doc/sphinx-guides/source/developers/version-control.rst
+++ b/doc/sphinx-guides/source/developers/version-control.rst
@@ -65,7 +65,9 @@ The example of creating a pull request below has to do with fixing an important
 Find or Create a GitHub Issue
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-For guidance on which issue to work on, please ask! Also, see https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md
+Issue is a bug (unexpected behavior) or a new feature in Dataverse, to know how to find or create an issue in dataverse please see https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md
+
+For guidance on which issue to work on, please ask! with email to support@dataverse.org 
 
 Let's say you want to tackle https://github.com/IQSS/dataverse/issues/3728 which points out a typo in a page of the Dataverse Software's documentation.
 
@@ -79,7 +81,7 @@ Always create your feature branch from the latest code in develop, pulling the l
 Commit Your Change to Your New Branch
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Making a commit (or several commits) to that branch. Ideally the first line of your commit message includes the number of the issue you are addressing, such as ``Fixed BlockedApiPolicy #3728``.
+Making a commit (or several commits) to that branch, enter a description of the changes you have made. Ideally the first line of your commit message includes the number of the issue you are addressing, such as ``Fixed BlockedApiPolicy #3728``.
 
 Push Your Branch to GitHub
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -89,7 +91,9 @@ Push your feature branch to your fork of the Dataverse Software. Your git comman
 Make a Pull Request
 ~~~~~~~~~~~~~~~~~~~
 
-Make a pull request to get approval to merge your changes into the develop branch. Note that once a pull request is created, we'll remove the corresponding issue from our kanban board so that we're only tracking one card.
+Make a pull request to get approval to merge your changes into the develop branch.
+If the pull request notes indicate that release notes are necessary, the workflow can then verify the existence of a corresponding file and respond with a 'thank you!' message. On the other hand, if no release notes are detected, the contributor can be gently reminded of their absence. Please see :doc:`making-releases` for guidance on writing release notes.
+Note that once a pull request is created, we'll remove the corresponding issue from our kanban board so that we're only tracking one card.
 
 Feedback on the pull request template we use is welcome! Here's an example of a pull request for issue #3827: https://github.com/IQSS/dataverse/pull/3827
 

From 57e0206bf19191923abf722ef8d57aa0a84f7aae Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Wed, 3 Jan 2024 10:40:07 -0500
Subject: [PATCH 435/546] Suggested placeholder text

---
 src/main/java/propertyFiles/Bundle.properties | 1 +
 src/main/webapp/contactFormFragment.xhtml     | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 0c6ce979a94..b1c38e52496 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -155,6 +155,7 @@ contact.support=Support
 contact.from=From
 contact.from.required=User email is required.
 contact.from.invalid=Email is invalid.
+contact.from.emailPlaceholder=valid@email.org
 contact.subject=Subject
 contact.subject.required=Subject is required.
 contact.subject.selectTab.top=Select subject...
diff --git a/src/main/webapp/contactFormFragment.xhtml b/src/main/webapp/contactFormFragment.xhtml
index 8950ec5acf8..470a137e6cf 100644
--- a/src/main/webapp/contactFormFragment.xhtml
+++ b/src/main/webapp/contactFormFragment.xhtml
@@ -37,7 +37,7 @@
                         <label jsf:for="userEmail" class="col-sm-3 control-label">#{bundle['contact.from']} <span class="glyphicon glyphicon-asterisk text-danger" title="#{bundle.requiredField}"/></label>
                         <div class="col-sm-9">
                             <p:inputText id="userEmail" styleClass="form-control" size="30" value="#{sendFeedbackDialog.userEmail}"
-                                         validator="#{sendFeedbackDialog.validateUserEmail}" validatorMessage="#{bundle['contact.from.invalid']}" requiredMessage="#{bundle['contact.from.required']}" required="#{param['DO_VALIDATION']}"/>
+                                         validator="#{sendFeedbackDialog.validateUserEmail}" validatorMessage="#{bundle['contact.from.invalid']}" requiredMessage="#{bundle['contact.from.required']}" required="#{param['DO_VALIDATION']}" placeholder="#{bundle['contact.from.emailPlaceholder']}"/>
                             <h:message for="userEmail" styleClass="bg-danger text-danger"/>
                         </div>
                     </div>

From 057d1b926513a4716737a4b766a8fb46e709d44e Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 4 Jan 2024 09:05:21 -0500
Subject: [PATCH 436/546] add docker compose config to get HarvestingServerIT
 to pass #9275

---
 docker-compose-dev.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 6f8decc0dfb..ce9f39a418a 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -19,6 +19,9 @@ services:
       DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8
       DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test
       DATAVERSE_JSF_REFRESH_PERIOD: "1"
+      # to get HarvestingServerIT to pass
+      dataverse_oai_server_maxidentifiers: "2"
+      dataverse_oai_server_maxrecords: "2"
       JVM_ARGS: -Ddataverse.files.storage-driver-id=file1
         -Ddataverse.files.file1.type=file
         -Ddataverse.files.file1.label=Filesystem

From 37d3d41a51867758cac611215f830ad2af1d31a1 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 4 Jan 2024 09:11:41 -0500
Subject: [PATCH 437/546] assert 500 error when invalid query params are passed
 #9275

---
 .../harvard/iq/dataverse/api/HarvestingServerIT.java  | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index e02964ef28f..07788eca6db 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -860,7 +860,16 @@ public void testMultiRecordOaiSet() throws InterruptedException {
         logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode());
         assertEquals(200, deleteResponse.getStatusCode(), "Failed to delete the control multi-record set");
     }
-    
+
+    @Test
+    public void testInvalidQueryParams() {
+        // "foo" is not a valid verb
+        String oaiVerbPath = "/oai?foo=bar";
+        Response identifyResponse = given().get(oaiVerbPath);
+        // TODO Why is this 500? https://github.com/IQSS/dataverse/issues/9275
+        identifyResponse.then().assertThat().statusCode(500);
+    }
+
     // TODO: 
     // What else can we test? 
     // Some ideas: 

From 2ab5ba99a357fa88f44fe72201f827cb26cff448 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 4 Jan 2024 10:50:15 -0500
Subject: [PATCH 438/546] #9686 update migration script

---
 ...gclient-id.sql => V6.1.0.1__9686-move-harvestingclient-id.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V6.0.0.5__9686-move-harvestingclient-id.sql => V6.1.0.1__9686-move-harvestingclient-id.sql} (100%)

diff --git a/src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql
similarity index 100%
rename from src/main/resources/db/migration/V6.0.0.5__9686-move-harvestingclient-id.sql
rename to src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql

From 27fa15458cf9d68192a3e0eed53f43371990de8e Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 4 Jan 2024 16:21:16 -0500
Subject: [PATCH 439/546] show errors (in XML) for verb params #9275

---
 .../9275-harvest-invalid-query-params.md      |  4 +++
 .../server/web/servlet/OAIServlet.java        | 18 ++++++++--
 .../iq/dataverse/api/HarvestingServerIT.java  | 34 ++++++++++++++++---
 3 files changed, 48 insertions(+), 8 deletions(-)
 create mode 100644 doc/release-notes/9275-harvest-invalid-query-params.md

diff --git a/doc/release-notes/9275-harvest-invalid-query-params.md b/doc/release-notes/9275-harvest-invalid-query-params.md
new file mode 100644
index 00000000000..33d7c7bac13
--- /dev/null
+++ b/doc/release-notes/9275-harvest-invalid-query-params.md
@@ -0,0 +1,4 @@
+OAI-PMH error handling has been improved to display a machine-readable error in XML rather than a 500 error with no further information.
+
+- /oai?foo=bar will show "No argument 'verb' found"
+- /oai?verb=foo&verb=bar will show "Verb must be singular, given: '[foo, bar]'"
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
index 96a19acc0e8..34152a2d8bd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
@@ -31,8 +31,11 @@
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.MailUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
+import io.gdcc.xoai.exceptions.BadArgumentException;
+import io.gdcc.xoai.exceptions.BadVerbException;
 import io.gdcc.xoai.exceptions.OAIException;
 import io.gdcc.xoai.model.oaipmh.Granularity;
+import io.gdcc.xoai.model.oaipmh.verbs.Verb;
 import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat;
 import org.apache.commons.lang3.StringUtils;
 
@@ -256,9 +259,18 @@ private void processRequest(HttpServletRequest httpServletRequest, HttpServletRe
                         "Sorry. OAI Service is disabled on this Dataverse node.");
                 return;
             }
-                        
-            RawRequest rawRequest = RequestBuilder.buildRawRequest(httpServletRequest.getParameterMap());
-            
+
+            RawRequest rawRequest = null;
+            try {
+                rawRequest = RequestBuilder.buildRawRequest(httpServletRequest.getParameterMap());
+            } catch (BadVerbException bve) {
+                // Verb.Type is required. Hard-code one.
+                rawRequest = new RawRequest(Verb.Type.Identify);
+                // Ideally, withError would accept a BadVerbException.
+                BadArgumentException bae = new BadArgumentException(bve.getLocalizedMessage());
+                rawRequest.withError(bae);
+            }
+
             OAIPMH handle = dataProvider.handle(rawRequest);
             response.setContentType("text/xml;charset=UTF-8");
 
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index 07788eca6db..3936a240826 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -863,11 +863,35 @@ public void testMultiRecordOaiSet() throws InterruptedException {
 
     @Test
     public void testInvalidQueryParams() {
-        // "foo" is not a valid verb
-        String oaiVerbPath = "/oai?foo=bar";
-        Response identifyResponse = given().get(oaiVerbPath);
-        // TODO Why is this 500? https://github.com/IQSS/dataverse/issues/9275
-        identifyResponse.then().assertThat().statusCode(500);
+
+        // The query parameter "verb" must appear.
+        Response noVerbArg = given().get("/oai?foo=bar");
+        noVerbArg.prettyPrint();
+        noVerbArg.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                // This should be "badVerb"
+                .body("oai.error.@code", equalTo("badArgument"))
+                .body("oai.error", equalTo("No argument 'verb' found"));
+
+        // The query parameter "verb" cannot appear more than once.
+        Response repeated = given().get( "/oai?verb=foo&verb=bar");
+        repeated.prettyPrint();
+        repeated.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                // This should be "badVerb"
+                .body("oai.error.@code", equalTo("badArgument"))
+                .body("oai.error", equalTo("Verb must be singular, given: '[foo, bar]'"));
+
+    }
+
+    @Test
+    public void testNoSuchSetError() {
+        Response noSuchSet = given().get("/oai?verb=ListIdentifiers&set=census&metadataPrefix=dc");
+        noSuchSet.prettyPrint();
+        noSuchSet.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("oai.error.@code", equalTo("noRecordsMatch"))
+                .body("oai.error", equalTo("Requested set 'census' does not exist"));
     }
 
     // TODO: 

From 6db3e3b9c64a0163c52b3cf988669d9bfd3a919f Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Thu, 4 Jan 2024 16:42:16 -0500
Subject: [PATCH 440/546] Fix for "latest" dataset version

---
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java  | 2 +-
 .../impl/GetLatestAccessibleDatasetVersionCommand.java    | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 094f2b88c92..83b1a4e861b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -2796,7 +2796,7 @@ private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String
 
             @Override
             public Command<DatasetVersion> handleLatest() {
-                return new GetLatestAccessibleDatasetVersionCommand(req, ds, includeDeaccessioned);
+                return new GetLatestAccessibleDatasetVersionCommand(req, ds, includeDeaccessioned, checkPermsWhenDeaccessioned);
             }
 
             @Override
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java
index 96e8ee73a50..7bcc851bde2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java
@@ -25,15 +25,17 @@
 public class GetLatestAccessibleDatasetVersionCommand extends AbstractCommand<DatasetVersion> {
     private final Dataset ds;
     private final boolean includeDeaccessioned;
+    private boolean checkPerms;
 
     public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) {
-        this(aRequest, anAffectedDataset, false);
+        this(aRequest, anAffectedDataset, false, false);
     }
 
-    public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned) {
+    public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPerms) {
         super(aRequest, anAffectedDataset);
         ds = anAffectedDataset;
         this.includeDeaccessioned = includeDeaccessioned;
+        this.checkPerms = checkPerms;
     }
 
     @Override
@@ -41,6 +43,6 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException {
         if (ds.getLatestVersion().isDraft() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.ViewUnpublishedDataset)) {
             return ctxt.engine().submit(new GetDraftDatasetVersionCommand(getRequest(), ds));
         }
-        return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned, true));
+        return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned, checkPerms));
     }
 }

From d017bf6843189a0228ff1be229614ba7685fcf0b Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Fri, 5 Jan 2024 11:48:00 -0500
Subject: [PATCH 441/546] #9686 assign harvest client id to harvested files

---
 .../harvard/iq/dataverse/api/imports/ImportServiceBean.java  | 5 +++++
 .../harvest/client/HarvestingClientServiceBean.java          | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java
index c17ba909230..c5812403f31 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java
@@ -332,6 +332,11 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve
             
             Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId().asString());
 
+            //adding the harvesting client id to harvested files #9686
+            for (DataFile df : ds.getFiles()){
+                df.setHarvestedFrom(harvestingClient);
+            }  
+           
             if (existingDs != null) {
                 // If this dataset already exists IN ANOTHER DATAVERSE
                 // we are just going to skip it!
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java
index 7ec6d75a41c..5747c64d217 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java
@@ -199,8 +199,8 @@ public void recordHarvestJobStatus(Long hcId, Date finishTime, int harvestedCoun
     
     public Long getNumberOfHarvestedDatasetsByAllClients() {
         try {
-            return (Long) em.createNativeQuery("SELECT count(d.id) FROM dataset d "
-                    + " WHERE d.harvestingclient_id IS NOT NULL").getSingleResult();
+            return (Long) em.createNativeQuery("SELECT count(d.id) FROM dvobject d "
+                    + " WHERE d.harvestingclient_id IS NOT NULL and d.dtype = 'Dataset'").getSingleResult();
 
         } catch (Exception ex) {
             logger.info("Warning: exception looking up the total number of harvested datasets: " + ex.getMessage());

From e085ca926274a4688faeb61f842c319ffc41b538 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 5 Jan 2024 15:27:06 -0500
Subject: [PATCH 442/546] Adds test to cover latest, latest published and
 specific scenarios.

---
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 302 +++++++++++++++---
 1 file changed, 249 insertions(+), 53 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 200cfbaf1ff..9ac05ce5704 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -70,6 +70,7 @@
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.hasItems;
+import static org.hamcrest.CoreMatchers.not;
 import static org.hamcrest.CoreMatchers.startsWith;
 import static org.hamcrest.CoreMatchers.nullValue;
 import static org.hamcrest.Matchers.contains;
@@ -613,6 +614,7 @@ public void testCreatePublishDestroyDataset() {
     */
     @Test
     public void testDatasetVersionsAPI() {
+        
         // Create user
         String apiToken = UtilIT.createRandomUserGetToken();
 
@@ -650,6 +652,11 @@ public void testDatasetVersionsAPI() {
                 .statusCode(OK.getStatusCode())
                 .body("data.files", equalTo(null));
 
+        unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiTokenNoPerms, excludeFiles, false);
+        unpublishedDraft.prettyPrint();
+        unpublishedDraft.then().assertThat()
+                .statusCode(UNAUTHORIZED.getStatusCode());
+        
         excludeFiles = false; 
         unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, excludeFiles, false);
         unpublishedDraft.prettyPrint();
@@ -657,7 +664,11 @@ public void testDatasetVersionsAPI() {
                 .statusCode(OK.getStatusCode())
                 .body("data.files.size()", equalTo(1));
 
-        
+        unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiTokenNoPerms, excludeFiles, false);
+        unpublishedDraft.prettyPrint();
+        unpublishedDraft.then().assertThat()
+                .statusCode(UNAUTHORIZED.getStatusCode());
+
 
         // Publish collection and dataset
         UtilIT.publishDataverseViaNativeApi(collectionAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode());
@@ -680,7 +691,8 @@ public void testDatasetVersionsAPI() {
                 .body("data.size()", equalTo(2))
                 .body("data[0].files.size()", equalTo(2))
                 .body("data[1].files.size()", equalTo(1));
-        
+
+
         // Now call this api with the new (as of 6.1) pagination parameters
         Integer offset = 0;
         Integer howmany = 1;
@@ -690,15 +702,16 @@ public void testDatasetVersionsAPI() {
         versionsResponse.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body("data.size()", equalTo(1))
+                .body("data.versionState[0]", equalTo("DRAFT"))
                 .body("data[0].files.size()", equalTo(2));
                 
         // And now call it with an un-privileged token, to make sure only one 
-        // (the published) version is shown:
-        
+        // (the published) version is shown:    
         versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms);
         versionsResponse.prettyPrint();
         versionsResponse.then().assertThat()
                 .statusCode(OK.getStatusCode())
+                .body("data.versionState[0]", not("DRAFT"))
                 .body("data.size()", equalTo(1));
         
         // And now call the "short", no-files version of the same api
@@ -711,35 +724,98 @@ public void testDatasetVersionsAPI() {
 
 
         
-        //Set of tests on non-deaccesioned dataset
-                
-        boolean includeDeaccessioned = true;
-        excludeFiles = true;
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
-        
-        excludeFiles = false;
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));
+        //Set of tests on non-deaccesioned dataset 
+        String specificVersion = "1.0";        
+        boolean includeDeaccessioned = false;
+        Response datasetVersion = null;
         
-        includeDeaccessioned = false;
         excludeFiles = true;
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
+        //Latest published authorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files", equalTo(null));
+
+        //Latest published unauthorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files", equalTo(null));
+
+        //Latest authorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DRAFT"))
+            .body("data.files", equalTo(null));
+
+        //Latest unauthorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files", equalTo(null));
+
+        //Specific version authorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files", equalTo(null));
+
+        //Specific version unauthorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files", equalTo(null));
 
         excludeFiles = false;
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));
-
         
+        //Latest published authorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files.size()", equalTo(1));
+
+        //Latest published unauthorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files.size()", equalTo(1));
+
+        //Latest authorized token, user is authenticated should get the Draft version
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DRAFT"))
+            .body("data.files.size()", equalTo(2));
+
+        //Latest unauthorized token, user has no permissions should get the latest Published version
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files.size()", equalTo(1));
+
+        //Specific version authorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files.size()", equalTo(1));
+
+        //Specific version unauthorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("RELEASED"))
+            .body("data.files.size()", equalTo(1));
+
         //We deaccession the dataset
         Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken);
         deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode());
@@ -747,38 +823,158 @@ public void testDatasetVersionsAPI() {
         //Set of tests on deaccesioned dataset, only 3/9 should return OK message
 
         includeDeaccessioned = true;
-        excludeFiles = true;
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data[0].files", equalTo(null));
         excludeFiles = false;
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(OK.getStatusCode()).body("data.files.size()", equalTo(1));;
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(404);
-        
-        includeDeaccessioned = false;
-        excludeFiles = true;
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(404);
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(404);
-        excludeFiles = false;
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(404);
-        UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned).
-            then().assertThat().statusCode(404);
 
-      
+        //Latest published authorized token with deaccessioned dataset
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DEACCESSIONED"))
+            .body("data.files.size()", equalTo(1));
+
+        //Latest published requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Latest authorized token should get the DRAFT version
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DRAFT"))
+            .body("data.files.size()", equalTo(2));
+
+        //Latest unauthorized token requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Specific version authorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DEACCESSIONED"))
+            .body("data.files.size()", equalTo(1));
+
+        //Specific version unauthorized token requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets.
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
 
+        excludeFiles = true;
 
+        //Latest published exclude files authorized token with deaccessioned dataset
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DEACCESSIONED"))
+            .body("data.files", equalTo(null));
+
+        //Latest published exclude files, should get the DEACCESSIONED version
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DEACCESSIONED"))
+            .body("data.files", equalTo(null));
+
+        //Latest authorized token should get the DRAFT version with no files
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DRAFT"))
+            .body("data.files", equalTo(null));
+
+        //Latest unauthorized token excluding files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DEACCESSIONED"))
+            .body("data.files", equalTo(null));
+
+        //Specific version authorized token
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DEACCESSIONED"))
+            .body("data.files", equalTo(null));
+
+        //Specific version unauthorized token requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets.
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DEACCESSIONED"))
+            .body("data.files", equalTo(null));
+
+        //Set of test when we have a deaccessioned dataset but we don't include deaccessioned
+        includeDeaccessioned = false;
+        excludeFiles = false;
 
+        //Latest published authorized token with deaccessioned dataset not included
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Latest published unauthorized token with deaccessioned dataset not included
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Latest authorized token should get the DRAFT version
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DRAFT"))
+            .body("data.files.size()", equalTo(2));
+
+        //Latest unauthorized token one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Specific version authorized token, the version is DEACCESSIONED so shouldn't get any datasets
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Specific version unauthorized token, the version is DEACCESSIONED so shouldn't get any datasets
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
 
-        
-        
+        excludeFiles = true;
 
-        
+        //Latest published authorized token with deaccessioned dataset not included
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Latest published unauthorized token with deaccessioned dataset not included
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Latest authorized token should get the DRAFT version
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(OK.getStatusCode())
+            .body("data.versionState", equalTo("DRAFT"))
+            .body("data.files", equalTo(null));
+
+        //Latest unauthorized token one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Specific version authorized token, the version is DEACCESSIONED so shouldn't get any datasets
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+
+        //Specific version unauthorized token, the version is DEACCESSIONED so shouldn't get any datasets
+        datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned);
+        datasetVersion.prettyPrint();
+        datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode());
+       
     }
 
     

From 4db74b6e5ddd3cf7f2ee49b94b9b229e2746bd35 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 5 Jan 2024 16:20:27 -0500
Subject: [PATCH 443/546] how to write release note snippets #9264

---
 .../source/developers/making-releases.rst     | 10 ++--
 .../source/developers/version-control.rst     | 54 ++++++++++++++++---
 2 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst
index e73811a77e1..6b94282d55e 100755
--- a/doc/sphinx-guides/source/developers/making-releases.rst
+++ b/doc/sphinx-guides/source/developers/making-releases.rst
@@ -14,16 +14,18 @@ See :doc:`version-control` for background on our branching strategy.
 
 The steps below describe making both regular releases and hotfix releases.
 
+.. _write-release-notes:
+
 Write Release Notes
 -------------------
 
-Developers express the need for an addition to release notes by creating a file in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. 
+Developers express the need for an addition to release notes by creating a "release note snippet" in ``/doc/release-notes`` containing the name of the issue they're working on. The name of the branch could be used for the filename with ".md" appended (release notes are written in Markdown) such as ``5053-apis-custom-homepage.md``. See :ref:`writing-release-note-snippets` for how this is described for contributors.
 
-The task at or near release time is to collect these notes into a single doc.
+The task at or near release time is to collect these snippets into a single file.
 
 - Create an issue in GitHub to track the work of creating release notes for the upcoming release.
-- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the issue-specific release notes mentioned above.
-- Delete the previously-created, issue-specific release notes as the content is added to the main release notes file.
+- Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the release note snippets mentioned above.
+- Delete the release note snippets as the content is added to the main release notes file.
 - Include instructions to describe the steps required to upgrade the application from the previous version. These must be customized for release numbers and special circumstances such as changes to metadata blocks and infrastructure.
 - Take the release notes .md through the regular Code Review and QA process.
 
diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst
index 91f59c76e61..12f3d5b81fd 100644
--- a/doc/sphinx-guides/source/developers/version-control.rst
+++ b/doc/sphinx-guides/source/developers/version-control.rst
@@ -65,23 +65,65 @@ The example of creating a pull request below has to do with fixing an important
 Find or Create a GitHub Issue
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Issue is a bug (unexpected behavior) or a new feature in Dataverse, to know how to find or create an issue in dataverse please see https://github.com/IQSS/dataverse/blob/develop/CONTRIBUTING.md
+An issue represents a bug (unexpected behavior) or a new feature in Dataverse. We'll use the issue number in the branch we create for our pull request.
 
-For guidance on which issue to work on, please ask! with email to support@dataverse.org 
+Finding GitHub Issues to Work On
+********************************
 
-Let's say you want to tackle https://github.com/IQSS/dataverse/issues/3728 which points out a typo in a page of the Dataverse Software's documentation.
+Assuming this is your first contribution to Dataverse, you should start with something small. The following issue labels might be helpful in your search:
+
+- `good first issue <https://github.com/IQSS/dataverse/labels/good%20first%20issue>`_ (these appear at https://github.com/IQSS/dataverse/contribute )
+- `hacktoberfest <https://github.com/IQSS/dataverse/labels/hacktoberfest>`_
+- `Help Wanted: Code <https://github.com/IQSS/dataverse/labels/Help%20Wanted%3A%20Code>`_
+- `Help Wanted: Documentation <https://github.com/IQSS/dataverse/labels/Help%20Wanted%3A%20Documentation>`_
+
+For guidance on which issue to work on, please ask! :ref:`getting-help-developers` explains how to get in touch.
+
+Creating GitHub Issues to Work On
+*********************************
+
+You are very welcome to create a GitHub issue to work on. However, for significant changes, please reach out (see :ref:`getting-help-developers`) to make sure the team and community agree with the proposed change.
+
+For small changes and especially typo fixes, please don't worry about reaching out first.
+
+Communicate Which Issue You Are Working On
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the issue you can simply leave a comment to say you're working on it.
 
 If you tell us your GitHub username we are happy to add you to the "read only" team at https://github.com/orgs/IQSS/teams/dataverse-readonly/members so that we can assign the issue to you while you're working on it. You can also tell us if you'd like to be added to the `Dataverse Community Contributors spreadsheet <https://docs.google.com/spreadsheets/d/1o9DD-MQ0WkrYaEFTD5rF_NtyL8aUISgURsAXSL7Budk/edit?usp=sharing>`_.
 
-Create a New Branch off the develop Branch
+Create a New Branch Off the develop Branch
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing, and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells.
+Always create your feature branch from the latest code in develop, pulling the latest code if necessary. As mentioned above, your branch should have a name like "3728-doc-apipolicy-fix" that starts with the issue number you are addressing (e.g. `#3728 <https://github.com/IQSS/dataverse/issues/3728>`_) and ends with a short, descriptive name. Dashes ("-") and underscores ("_") in your branch name are ok, but please try to avoid other special characters such as ampersands ("&") that have special meaning in Unix shells.
 
 Commit Your Change to Your New Branch
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Making a commit (or several commits) to that branch, enter a description of the changes you have made. Ideally the first line of your commit message includes the number of the issue you are addressing, such as ``Fixed BlockedApiPolicy #3728``.
+For each commit to that branch, try to include the issue number along with a summary in the first line of the commit message, such as ``Fixed BlockedApiPolicy #3728``. You are welcome to write longer descriptions in the body as well!
+
+.. _writing-release-note-snippets:
+
+Writing a Release Note Snippet
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We highly value your insight as a contributor when in comes to describing your work in our release notes. Not every pull request will be mentioned in release notes but most are.
+
+As described at :ref:`write-release-notes`, at release time we compile together release note "snippets" into the final release notes.
+
+Here's how to add a release note snippet to your pull request:
+
+- Create a Markdown file under ``doc/release-notes``. You can reuse the name of your branch and append ".md" to it, e.g. ``3728-doc-apipolicy-fix.md``
+- Edit the snippet to include anything you think should be mentioned in the release notes, such as:
+
+  - Descriptions of new features
+  - Explanations of bugs fixed
+  - New configuration settings
+  - Upgrade instructions
+  - Etc.
+
+Release note snippets do not need to be long. For a new feature, a single line description might be enough. Please note that your release note will likely be edited (expanded or shortened) when the final release notes are being created.
 
 Push Your Branch to GitHub
 ~~~~~~~~~~~~~~~~~~~~~~~~~~

From 826d4bdcd2d0418c8d65c8409107de0d66f6dd19 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 5 Jan 2024 17:46:26 -0500
Subject: [PATCH 444/546] per QA

---
 doc/sphinx-guides/source/developers/globus-api.rst              | 1 +
 .../java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index de9df06a798..2f922fb1fc0 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -2,6 +2,7 @@ Globus Transfer API
 ===================
 
 The Globus API addresses three use cases:
+
 * Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector)
 * Reference of files that will remain in a remote Globus endpoint
 * Transfer from a Dataverse-managed Globus endpoint
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index 61884045f35..3e60441850b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -240,7 +240,7 @@ private int makeDir(GlobusEndpoint endpoint, String dir) {
         MakeRequestResponse result = null;
         String body = "{\"DATA_TYPE\":\"mkdir\",\"path\":\"" + dir + "\"}";
         try {
-            logger.info(body);
+            logger.fine(body);
             URL url = new URL(
                     "https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint.getId() + "/mkdir");
             result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", body);

From dbab6ca9269a93bd7d292b37b00c42dc0fbad55f Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 8 Jan 2024 10:30:25 -0500
Subject: [PATCH 445/546] use name@email.xyz to match citation block #2638

From datasetfieldtype.datasetContactEmail.watermark
---
 src/main/java/propertyFiles/Bundle.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index b1c38e52496..ece3f070cdd 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -155,7 +155,7 @@ contact.support=Support
 contact.from=From
 contact.from.required=User email is required.
 contact.from.invalid=Email is invalid.
-contact.from.emailPlaceholder=valid@email.org
+contact.from.emailPlaceholder=name@email.xyz
 contact.subject=Subject
 contact.subject.required=Subject is required.
 contact.subject.selectTab.top=Select subject...

From 88af3d4ed1316df681ce53fc0d4c00d03ac56e7d Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 8 Jan 2024 12:16:51 -0500
Subject: [PATCH 446/546] clean up error handling #9275

dataProvider.handle(params) allows us to return the correct error.
---
 .../harvest/server/web/servlet/OAIServlet.java   | 16 ++++++----------
 .../iq/dataverse/api/HarvestingServerIT.java     |  6 ++----
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
index 34152a2d8bd..233ca94f5fc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java
@@ -31,11 +31,9 @@
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.MailUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
-import io.gdcc.xoai.exceptions.BadArgumentException;
 import io.gdcc.xoai.exceptions.BadVerbException;
 import io.gdcc.xoai.exceptions.OAIException;
 import io.gdcc.xoai.model.oaipmh.Granularity;
-import io.gdcc.xoai.model.oaipmh.verbs.Verb;
 import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat;
 import org.apache.commons.lang3.StringUtils;
 
@@ -51,6 +49,7 @@
 import jakarta.servlet.http.HttpServlet;
 import jakarta.servlet.http.HttpServletRequest;
 import jakarta.servlet.http.HttpServletResponse;
+import java.util.Map;
 import javax.xml.stream.XMLStreamException;
 import org.eclipse.microprofile.config.Config;
 import org.eclipse.microprofile.config.ConfigProvider;
@@ -260,18 +259,15 @@ private void processRequest(HttpServletRequest httpServletRequest, HttpServletRe
                 return;
             }
 
-            RawRequest rawRequest = null;
+            Map<String, String[]> params = httpServletRequest.getParameterMap();
+            OAIPMH handle;
             try {
-                rawRequest = RequestBuilder.buildRawRequest(httpServletRequest.getParameterMap());
+                RawRequest rawRequest = RequestBuilder.buildRawRequest(params);
+                handle = dataProvider.handle(rawRequest);
             } catch (BadVerbException bve) {
-                // Verb.Type is required. Hard-code one.
-                rawRequest = new RawRequest(Verb.Type.Identify);
-                // Ideally, withError would accept a BadVerbException.
-                BadArgumentException bae = new BadArgumentException(bve.getLocalizedMessage());
-                rawRequest.withError(bae);
+                handle = dataProvider.handle(params);
             }
 
-            OAIPMH handle = dataProvider.handle(rawRequest);
             response.setContentType("text/xml;charset=UTF-8");
 
             try (XmlWriter xmlWriter = new XmlWriter(response.getOutputStream(), repositoryConfiguration);) {
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index 3936a240826..45dd0c08226 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -869,8 +869,7 @@ public void testInvalidQueryParams() {
         noVerbArg.prettyPrint();
         noVerbArg.then().assertThat()
                 .statusCode(OK.getStatusCode())
-                // This should be "badVerb"
-                .body("oai.error.@code", equalTo("badArgument"))
+                .body("oai.error.@code", equalTo("badVerb"))
                 .body("oai.error", equalTo("No argument 'verb' found"));
 
         // The query parameter "verb" cannot appear more than once.
@@ -878,8 +877,7 @@ public void testInvalidQueryParams() {
         repeated.prettyPrint();
         repeated.then().assertThat()
                 .statusCode(OK.getStatusCode())
-                // This should be "badVerb"
-                .body("oai.error.@code", equalTo("badArgument"))
+                .body("oai.error.@code", equalTo("badVerb"))
                 .body("oai.error", equalTo("Verb must be singular, given: '[foo, bar]'"));
 
     }

From 2b1e5dd4bda6788f644c2737cf56310e7eaefb7d Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Mon, 8 Jan 2024 16:10:58 -0500
Subject: [PATCH 447/546] Extend getVersionFiles API endpoint to include the
 total file count

---
 .../iq/dataverse/api/AbstractApiBean.java     |  64 +++-----
 .../harvard/iq/dataverse/api/Datasets.java    | 146 +++++-------------
 .../harvard/iq/dataverse/api/DatasetsIT.java  |  98 ++++++------
 3 files changed, 108 insertions(+), 200 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
index 58565bcc9d6..2a2843c0494 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
@@ -1,29 +1,6 @@
 package edu.harvard.iq.dataverse.api;
 
-import edu.harvard.iq.dataverse.DataFile;
-import edu.harvard.iq.dataverse.DataFileServiceBean;
-import edu.harvard.iq.dataverse.Dataset;
-import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
-import edu.harvard.iq.dataverse.DatasetFieldType;
-import edu.harvard.iq.dataverse.DatasetLinkingDataverse;
-import edu.harvard.iq.dataverse.DatasetLinkingServiceBean;
-import edu.harvard.iq.dataverse.DatasetServiceBean;
-import edu.harvard.iq.dataverse.DatasetVersionServiceBean;
-import edu.harvard.iq.dataverse.Dataverse;
-import edu.harvard.iq.dataverse.DataverseLinkingDataverse;
-import edu.harvard.iq.dataverse.DataverseLinkingServiceBean;
-import edu.harvard.iq.dataverse.DataverseRoleServiceBean;
-import edu.harvard.iq.dataverse.DataverseServiceBean;
-import edu.harvard.iq.dataverse.DvObject;
-import edu.harvard.iq.dataverse.DvObjectServiceBean;
-import edu.harvard.iq.dataverse.EjbDataverseEngine;
-import edu.harvard.iq.dataverse.GuestbookResponseServiceBean;
-import edu.harvard.iq.dataverse.MetadataBlock;
-import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
-import edu.harvard.iq.dataverse.PermissionServiceBean;
-import edu.harvard.iq.dataverse.RoleAssigneeServiceBean;
-import edu.harvard.iq.dataverse.UserNotificationServiceBean;
-import edu.harvard.iq.dataverse.UserServiceBean;
+import edu.harvard.iq.dataverse.*;
 import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean;
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.DataverseRole;
@@ -40,8 +17,8 @@
 import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
 import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean;
 import edu.harvard.iq.dataverse.license.LicenseServiceBean;
-import edu.harvard.iq.dataverse.metrics.MetricsServiceBean;
 import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean;
+import edu.harvard.iq.dataverse.metrics.MetricsServiceBean;
 import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
@@ -51,33 +28,30 @@
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
 import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
 import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean;
-import java.io.InputStream;
-import java.net.URI;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.UUID;
-import java.util.concurrent.Callable;
-import java.util.logging.Level;
-import java.util.logging.Logger;
 import jakarta.ejb.EJB;
 import jakarta.ejb.EJBException;
-import jakarta.json.Json;
-import jakarta.json.JsonArray;
-import jakarta.json.JsonArrayBuilder;
-import jakarta.json.JsonException;
-import jakarta.json.JsonObject;
-import jakarta.json.JsonObjectBuilder;
-import jakarta.json.JsonValue;
+import jakarta.json.*;
 import jakarta.json.JsonValue.ValueType;
 import jakarta.persistence.EntityManager;
 import jakarta.persistence.NoResultException;
 import jakarta.persistence.PersistenceContext;
 import jakarta.servlet.http.HttpServletRequest;
 import jakarta.ws.rs.container.ContainerRequestContext;
-import jakarta.ws.rs.core.*;
+import jakarta.ws.rs.core.Context;
+import jakarta.ws.rs.core.MediaType;
+import jakarta.ws.rs.core.Response;
 import jakarta.ws.rs.core.Response.ResponseBuilder;
 import jakarta.ws.rs.core.Response.Status;
 
+import java.io.InputStream;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.UUID;
+import java.util.concurrent.Callable;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
 import static org.apache.commons.lang3.StringUtils.isNumeric;
 
 /**
@@ -661,7 +635,13 @@ protected Response ok( JsonArrayBuilder bld ) {
             .add("data", bld).build())
             .type(MediaType.APPLICATION_JSON).build();
     }
-    
+    protected Response ok( JsonArrayBuilder bld , long totalCount) {
+        return Response.ok(Json.createObjectBuilder()
+                        .add("status", ApiConstants.STATUS_OK)
+                        .add("total_count", totalCount)
+                        .add("data", bld).build())
+                .type(MediaType.APPLICATION_JSON).build();
+    }
     protected Response ok( JsonArray ja ) {
         return Response.ok(Json.createObjectBuilder()
             .add("status", ApiConstants.STATUS_OK)
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 094f2b88c92..56b9e8df319 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -1,9 +1,11 @@
 package edu.harvard.iq.dataverse.api;
 
+import com.amazonaws.services.s3.model.PartETag;
 import edu.harvard.iq.dataverse.*;
 import edu.harvard.iq.dataverse.DatasetLock.Reason;
 import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord;
 import edu.harvard.iq.dataverse.api.auth.AuthRequired;
+import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO;
 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
 import edu.harvard.iq.dataverse.authorization.DataverseRole;
 import edu.harvard.iq.dataverse.authorization.Permission;
@@ -13,6 +15,7 @@
 import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
 import edu.harvard.iq.dataverse.authorization.users.User;
 import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode;
+import edu.harvard.iq.dataverse.dataaccess.*;
 import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil;
 import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse;
 import edu.harvard.iq.dataverse.dataset.DatasetThumbnail;
@@ -23,92 +26,47 @@
 import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams;
 import edu.harvard.iq.dataverse.engine.command.Command;
 import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
-import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.AddLockCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.CuratePublishedDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeaccessionDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetLinkingDataverseCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetPrivateUrlCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.ImportFromFileSystemCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.LinkDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.ListRoleAssignments;
-import edu.harvard.iq.dataverse.engine.command.impl.ListVersionsCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.MoveDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult;
-import edu.harvard.iq.dataverse.engine.command.impl.RemoveLockCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.ReturnDatasetToAuthorCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.SetDatasetCitationDateCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.SetCurationStatusCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetTargetURLCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException;
+import edu.harvard.iq.dataverse.engine.command.impl.*;
 import edu.harvard.iq.dataverse.export.DDIExportServiceBean;
 import edu.harvard.iq.dataverse.export.ExportService;
 import edu.harvard.iq.dataverse.externaltools.ExternalTool;
 import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler;
+import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
+import edu.harvard.iq.dataverse.globus.GlobusUtil;
 import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
-import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
-import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO;
-import edu.harvard.iq.dataverse.dataaccess.DataAccess;
-import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
-import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
-import edu.harvard.iq.dataverse.dataaccess.S3AccessIO;
-import edu.harvard.iq.dataverse.dataaccess.StorageIO;
-import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
-import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException;
-import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.UpdateDvObjectPIDMetadataCommand;
-import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitations;
-import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean;
-import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics;
-import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean;
-import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean;
+import edu.harvard.iq.dataverse.makedatacount.*;
 import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry;
 import edu.harvard.iq.dataverse.metrics.MetricsUtil;
-import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
 import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
+import edu.harvard.iq.dataverse.search.IndexServiceBean;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
-import edu.harvard.iq.dataverse.util.ArchiverUtil;
-import edu.harvard.iq.dataverse.util.BundleUtil;
-import edu.harvard.iq.dataverse.util.EjbUtil;
-import edu.harvard.iq.dataverse.util.FileUtil;
-import edu.harvard.iq.dataverse.util.MarkupChecker;
-import edu.harvard.iq.dataverse.util.SystemConfig;
-import edu.harvard.iq.dataverse.util.URLTokenUtil;
+import edu.harvard.iq.dataverse.util.*;
 import edu.harvard.iq.dataverse.util.bagit.OREMap;
-import edu.harvard.iq.dataverse.util.json.JSONLDUtil;
-import edu.harvard.iq.dataverse.util.json.JsonLDTerm;
-import edu.harvard.iq.dataverse.util.json.JsonParseException;
-import edu.harvard.iq.dataverse.util.json.JsonUtil;
-import edu.harvard.iq.dataverse.util.SignpostingResources;
-import edu.harvard.iq.dataverse.search.IndexServiceBean;
-import static edu.harvard.iq.dataverse.api.ApiConstants.*;
-import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*;
-import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
-import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
+import edu.harvard.iq.dataverse.util.json.*;
 import edu.harvard.iq.dataverse.workflow.Workflow;
 import edu.harvard.iq.dataverse.workflow.WorkflowContext;
-import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean;
 import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType;
-import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
-import edu.harvard.iq.dataverse.globus.GlobusUtil;
+import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean;
+import jakarta.ejb.EJB;
+import jakarta.ejb.EJBException;
+import jakarta.inject.Inject;
+import jakarta.json.*;
+import jakarta.json.stream.JsonParsingException;
+import jakarta.servlet.http.HttpServletRequest;
+import jakarta.servlet.http.HttpServletResponse;
+import jakarta.ws.rs.*;
+import jakarta.ws.rs.container.ContainerRequestContext;
+import jakarta.ws.rs.core.*;
+import jakarta.ws.rs.core.Response.Status;
+import org.apache.commons.lang3.StringUtils;
+import org.glassfish.jersey.media.multipart.FormDataBodyPart;
+import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
+import org.glassfish.jersey.media.multipart.FormDataParam;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
@@ -117,45 +75,21 @@
 import java.text.SimpleDateFormat;
 import java.time.LocalDate;
 import java.time.LocalDateTime;
-import java.util.*;
-import java.util.concurrent.*;
-import java.util.function.Predicate;
 import java.time.ZoneId;
 import java.time.format.DateTimeFormatter;
+import java.util.*;
 import java.util.Map.Entry;
+import java.util.concurrent.ExecutionException;
+import java.util.function.Predicate;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
-import jakarta.ejb.EJB;
-import jakarta.ejb.EJBException;
-import jakarta.inject.Inject;
-import jakarta.json.*;
-import jakarta.json.stream.JsonParsingException;
-import jakarta.servlet.http.HttpServletRequest;
-import jakarta.servlet.http.HttpServletResponse;
-import jakarta.ws.rs.BadRequestException;
-import jakarta.ws.rs.Consumes;
-import jakarta.ws.rs.DELETE;
-import jakarta.ws.rs.DefaultValue;
-import jakarta.ws.rs.GET;
-import jakarta.ws.rs.NotAcceptableException;
-import jakarta.ws.rs.POST;
-import jakarta.ws.rs.PUT;
-import jakarta.ws.rs.Path;
-import jakarta.ws.rs.PathParam;
-import jakarta.ws.rs.Produces;
-import jakarta.ws.rs.QueryParam;
-import jakarta.ws.rs.container.ContainerRequestContext;
-import jakarta.ws.rs.core.*;
-import jakarta.ws.rs.core.Response.Status;
+
+import static edu.harvard.iq.dataverse.api.ApiConstants.*;
+import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*;
+import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
 import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
-import org.apache.commons.lang3.StringUtils;
-import org.glassfish.jersey.media.multipart.FormDataBodyPart;
-import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
-import org.glassfish.jersey.media.multipart.FormDataParam;
-import com.amazonaws.services.s3.model.PartETag;
-import edu.harvard.iq.dataverse.settings.JvmSettings;
 
 @Path("datasets")
 public class Datasets extends AbstractApiBean {
@@ -546,7 +480,9 @@ public Response getVersionFiles(@Context ContainerRequestContext crc,
             } catch (IllegalArgumentException e) {
                 return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus)));
             }
-            return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)));
+            // TODO: should we count the total every time or only when offset = 0?
+            return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)),
+                    datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria));
         }, getRequestUser(crc));
     }
 
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 200cfbaf1ff..ace69a6c606 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -1,77 +1,66 @@
 package edu.harvard.iq.dataverse.api;
 
+import edu.harvard.iq.dataverse.DataFile;
 import edu.harvard.iq.dataverse.DatasetVersionFilesServiceBean;
 import edu.harvard.iq.dataverse.FileSearchCriteria;
-import io.restassured.RestAssured;
-import static edu.harvard.iq.dataverse.DatasetVersion.ARCHIVE_NOTE_MAX_LENGTH;
-import static edu.harvard.iq.dataverse.api.ApiConstants.*;
-import static io.restassured.RestAssured.given;
-import io.restassured.path.json.JsonPath;
-import io.restassured.http.ContentType;
-import io.restassured.response.Response;
-import java.time.LocalDate;
-import java.time.format.DateTimeFormatter;
-import java.util.*;
-import java.util.logging.Logger;
-import org.apache.commons.lang3.RandomStringUtils;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-import org.skyscreamer.jsonassert.JSONAssert;
-import org.junit.jupiter.api.Disabled;
-import jakarta.json.JsonObject;
-import static jakarta.ws.rs.core.Response.Status.CREATED;
-import static jakarta.ws.rs.core.Response.Status.FORBIDDEN;
-import static jakarta.ws.rs.core.Response.Status.OK;
-import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED;
-import static jakarta.ws.rs.core.Response.Status.NOT_FOUND;
-import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
-import static jakarta.ws.rs.core.Response.Status.METHOD_NOT_ALLOWED;
-import static jakarta.ws.rs.core.Response.Status.CONFLICT;
-import static jakarta.ws.rs.core.Response.Status.NO_CONTENT;
-import edu.harvard.iq.dataverse.DataFile;
-import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER;
 import edu.harvard.iq.dataverse.authorization.DataverseRole;
+import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers;
 import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
 import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO;
 import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIOTest;
-import edu.harvard.iq.dataverse.dataaccess.StorageIO;
-import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.lang3.exception.ExceptionUtils;
-import io.restassured.parsing.Parser;
-import static io.restassured.path.json.JsonPath.with;
-import io.restassured.path.xml.XmlPath;
-import static edu.harvard.iq.dataverse.api.UtilIT.equalToCI;
-import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers;
 import edu.harvard.iq.dataverse.datavariable.VarGroup;
 import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
 import edu.harvard.iq.dataverse.datavariable.VariableMetadataDDIParser;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import edu.harvard.iq.dataverse.util.json.JSONLDUtil;
 import edu.harvard.iq.dataverse.util.json.JsonUtil;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringReader;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.Files;
+import io.restassured.RestAssured;
+import io.restassured.http.ContentType;
+import io.restassured.parsing.Parser;
+import io.restassured.path.json.JsonPath;
+import io.restassured.path.xml.XmlPath;
+import io.restassured.response.Response;
 import jakarta.json.Json;
 import jakarta.json.JsonArray;
+import jakarta.json.JsonObject;
 import jakarta.json.JsonObjectBuilder;
 import jakarta.ws.rs.core.Response.Status;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.exception.ExceptionUtils;
+import org.hamcrest.CoreMatchers;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.skyscreamer.jsonassert.JSONAssert;
+
 import javax.xml.stream.XMLInputFactory;
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+import java.util.logging.Logger;
+
+import static edu.harvard.iq.dataverse.DatasetVersion.ARCHIVE_NOTE_MAX_LENGTH;
+import static edu.harvard.iq.dataverse.api.ApiConstants.*;
+import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER;
+import static edu.harvard.iq.dataverse.api.UtilIT.equalToCI;
+import static io.restassured.RestAssured.given;
+import static io.restassured.path.json.JsonPath.with;
+import static jakarta.ws.rs.core.Response.Status.*;
 import static java.lang.Thread.sleep;
-import org.hamcrest.CoreMatchers;
-import static org.hamcrest.CoreMatchers.containsString;
-import static org.hamcrest.CoreMatchers.equalTo;
-import static org.hamcrest.CoreMatchers.hasItems;
-import static org.hamcrest.CoreMatchers.startsWith;
-import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.CoreMatchers.*;
 import static org.hamcrest.Matchers.contains;
 import static org.junit.jupiter.api.Assertions.*;
 
@@ -3548,7 +3537,9 @@ public void getVersionFiles() throws IOException, InterruptedException {
         getVersionFilesResponsePaginated.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body("data[0].label", equalTo(testFileName1))
-                .body("data[1].label", equalTo(testFileName2));
+                .body("data[1].label", equalTo(testFileName2))
+                .body("total_count", equalTo(5));
+        String x = getVersionFilesResponsePaginated.prettyPrint();
 
         int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size();
         assertEquals(testPageSize, fileMetadatasCount);
@@ -3562,7 +3553,8 @@ public void getVersionFiles() throws IOException, InterruptedException {
         getVersionFilesResponsePaginated.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body("data[0].label", equalTo(testFileName3))
-                .body("data[1].label", equalTo(testFileName4));
+                .body("data[1].label", equalTo(testFileName4))
+                .body("total_count", equalTo(5));
 
         fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size();
         assertEquals(testPageSize, fileMetadatasCount);

From 0807b1fd64b076ef92029a16b1c3a946802c56b7 Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Mon, 8 Jan 2024 16:18:55 -0500
Subject: [PATCH 448/546] fix format

---
 src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
index 2a2843c0494..419132f7ba7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
@@ -635,6 +635,7 @@ protected Response ok( JsonArrayBuilder bld ) {
             .add("data", bld).build())
             .type(MediaType.APPLICATION_JSON).build();
     }
+
     protected Response ok( JsonArrayBuilder bld , long totalCount) {
         return Response.ok(Json.createObjectBuilder()
                         .add("status", ApiConstants.STATUS_OK)
@@ -642,6 +643,7 @@ protected Response ok( JsonArrayBuilder bld , long totalCount) {
                         .add("data", bld).build())
                 .type(MediaType.APPLICATION_JSON).build();
     }
+
     protected Response ok( JsonArray ja ) {
         return Response.ok(Json.createObjectBuilder()
             .add("status", ApiConstants.STATUS_OK)

From 53e525d7ddddcc4fd055f45debc126f8b2340ffc Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Mon, 8 Jan 2024 16:24:21 -0500
Subject: [PATCH 449/546] fix format

---
 src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index ace69a6c606..91aa33f6b1f 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -3539,7 +3539,6 @@ public void getVersionFiles() throws IOException, InterruptedException {
                 .body("data[0].label", equalTo(testFileName1))
                 .body("data[1].label", equalTo(testFileName2))
                 .body("total_count", equalTo(5));
-        String x = getVersionFilesResponsePaginated.prettyPrint();
 
         int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size();
         assertEquals(testPageSize, fileMetadatasCount);

From 622a676681a336fd78e89d1f6d21e3e703eb7d7a Mon Sep 17 00:00:00 2001
From: Steven Winship <steven_winship@iq.harvard.edu>
Date: Tue, 9 Jan 2024 10:32:12 -0500
Subject: [PATCH 450/546] updated per review comments

---
 ...-extend-getVersionFiles-api-to-include-total-file-count.md | 2 ++
 doc/sphinx-guides/source/api/native-api.rst                   | 4 +++-
 .../java/edu/harvard/iq/dataverse/api/AbstractApiBean.java    | 2 +-
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java      | 1 -
 src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java    | 4 ++--
 5 files changed, 8 insertions(+), 5 deletions(-)
 create mode 100644 doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md

diff --git a/doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md b/doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md
new file mode 100644
index 00000000000..80a71e9bb7e
--- /dev/null
+++ b/doc/release-notes/10202-extend-getVersionFiles-api-to-include-total-file-count.md
@@ -0,0 +1,2 @@
+The response for getVersionFiles (/api/datasets/{id}/versions/{versionId}/files) endpoint has been modified to include a total count of records available (totalCount:x).
+This will aid in pagination by allowing the caller to know how many pages can be iterated through. The existing API (getVersionFileCounts) to return the count will still be available.
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 6591c983824..48fc16bf141 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -1066,7 +1066,9 @@ The fully expanded example above (without environment variables) looks like this
  
   curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files"
 
-This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters:
+This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters.
+To aid in pagination the Json response also includes the total number of rows (totalCount) available.
+Usage example:
 
 .. code-block:: bash
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
index 419132f7ba7..bc94d7f0bcc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
@@ -639,7 +639,7 @@ protected Response ok( JsonArrayBuilder bld ) {
     protected Response ok( JsonArrayBuilder bld , long totalCount) {
         return Response.ok(Json.createObjectBuilder()
                         .add("status", ApiConstants.STATUS_OK)
-                        .add("total_count", totalCount)
+                        .add("totalCount", totalCount)
                         .add("data", bld).build())
                 .type(MediaType.APPLICATION_JSON).build();
     }
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 56b9e8df319..3a2497d9418 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -480,7 +480,6 @@ public Response getVersionFiles(@Context ContainerRequestContext crc,
             } catch (IllegalArgumentException e) {
                 return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus)));
             }
-            // TODO: should we count the total every time or only when offset = 0?
             return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)),
                     datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria));
         }, getRequestUser(crc));
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 91aa33f6b1f..5753550d564 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -3538,7 +3538,7 @@ public void getVersionFiles() throws IOException, InterruptedException {
                 .statusCode(OK.getStatusCode())
                 .body("data[0].label", equalTo(testFileName1))
                 .body("data[1].label", equalTo(testFileName2))
-                .body("total_count", equalTo(5));
+                .body("totalCount", equalTo(5));
 
         int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size();
         assertEquals(testPageSize, fileMetadatasCount);
@@ -3553,7 +3553,7 @@ public void getVersionFiles() throws IOException, InterruptedException {
                 .statusCode(OK.getStatusCode())
                 .body("data[0].label", equalTo(testFileName3))
                 .body("data[1].label", equalTo(testFileName4))
-                .body("total_count", equalTo(5));
+                .body("totalCount", equalTo(5));
 
         fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size();
         assertEquals(testPageSize, fileMetadatasCount);

From 291811e3e3c6f0f8c54dcd6b980444259e247d70 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 9 Jan 2024 11:42:34 -0500
Subject: [PATCH 451/546] #9686 add migration to harvested files

---
 .../migration/V6.1.0.1__9686-move-harvestingclient-id.sql   | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql
index 22142b8fc41..67ba026745f 100644
--- a/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql
+++ b/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql
@@ -1,8 +1,14 @@
 ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS harvestingclient_id BIGINT;
 
+--add harvesting client id to dvobject records of harvested datasets
 update dvobject dvo set harvestingclient_id = s.harvestingclient_id from
 (select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s
 where s.id = dvo.id; 
 
+--add harvesting client id to dvobject records of harvested files
+update dvobject dvo set harvestingclient_id = s.harvestingclient_id from
+(select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s
+where s.id = dvo.owner_id;
+
 ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id;
 

From dfb1795e1318d058c4b614894ce9cd1039da38d3 Mon Sep 17 00:00:00 2001
From: Guillermo Portas <hey@gportas.me>
Date: Tue, 9 Jan 2024 17:37:06 +0000
Subject: [PATCH 452/546] Added: minor docs formatting tweaks

---
 doc/sphinx-guides/source/api/native-api.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 48fc16bf141..09fc3c69693 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -1067,7 +1067,9 @@ The fully expanded example above (without environment variables) looks like this
   curl "https://demo.dataverse.org/api/datasets/24/versions/1.0/files"
 
 This endpoint supports optional pagination, through the ``limit`` and ``offset`` query parameters.
-To aid in pagination the Json response also includes the total number of rows (totalCount) available.
+
+To aid in pagination the JSON response also includes the total number of rows (totalCount) available.
+
 Usage example:
 
 .. code-block:: bash

From 03f4a06b5ed163d9252e6e868fa2e939fda0a2e0 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 9 Jan 2024 13:30:34 -0500
Subject: [PATCH 453/546] #9686 add a release note

---
 doc/release-notes/9686-move-harvesting-client-id.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/release-notes/9686-move-harvesting-client-id.md

diff --git a/doc/release-notes/9686-move-harvesting-client-id.md b/doc/release-notes/9686-move-harvesting-client-id.md
new file mode 100644
index 00000000000..110fcc6ca6e
--- /dev/null
+++ b/doc/release-notes/9686-move-harvesting-client-id.md
@@ -0,0 +1 @@
+With this release the harvesting client id will be available for harvested files. A database update will copy the id to previously harvested files./

From b9bcf995b42889af3333368b3264f49264df52ef Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva
 <142103991+jp-tosca@users.noreply.github.com>
Date: Tue, 9 Jan 2024 14:58:32 -0500
Subject: [PATCH 454/546] Update Kanban Board URL

The URL was pointing to the old board.
---
 doc/sphinx-guides/source/developers/intro.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/intro.rst b/doc/sphinx-guides/source/developers/intro.rst
index a01a8066897..f446b73de09 100755
--- a/doc/sphinx-guides/source/developers/intro.rst
+++ b/doc/sphinx-guides/source/developers/intro.rst
@@ -40,7 +40,7 @@ For the Dataverse Software development roadmap, please see https://www.iq.harvar
 Kanban Board
 ------------
 
-You can get a sense of what's currently in flight (in dev, in QA, etc.) by looking at https://github.com/orgs/IQSS/projects/2
+You can get a sense of what's currently in flight (in dev, in QA, etc.) by looking at https://github.com/orgs/IQSS/projects/34
 
 Issue Tracker
 -------------

From 94570f0c670e6d39594c5cfb9ca5233962834de0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 10 Jan 2024 10:59:21 -0500
Subject: [PATCH 455/546] add toc to docs #10200

---
 doc/sphinx-guides/source/developers/globus-api.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index 2f922fb1fc0..b5d420467aa 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -1,6 +1,9 @@
 Globus Transfer API
 ===================
 
+.. contents:: |toctitle|
+        :local:
+
 The Globus API addresses three use cases:
 
 * Transfer to a Dataverse-managed Globus endpoint (File-based or using the Globus S3 Connector)

From 67292840e9b6e2f701fd6bc0e09522b0b2d0ef07 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Wed, 10 Jan 2024 13:16:27 -0500
Subject: [PATCH 456/546] Add comments and makes the loop easier to understand.

---
 ...tLatestPublishedDatasetVersionCommand.java | 44 +++++++++++++------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
index a4952bbf524..dd9a8112afe 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
@@ -17,33 +17,51 @@
 public class GetLatestPublishedDatasetVersionCommand extends AbstractCommand<DatasetVersion> {
     private final Dataset ds;
     private final boolean includeDeaccessioned;
-    private boolean checkPerms;
+    private boolean checkPermsWhenDeaccessioned;
 
     public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) {
         this(aRequest, anAffectedDataset, false, false);
     }
 
-    public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPerms) {
+    public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) {
         super(aRequest, anAffectedDataset);
         ds = anAffectedDataset;
         this.includeDeaccessioned = includeDeaccessioned;
-        this.checkPerms = checkPerms;
+        this.checkPermsWhenDeaccessioned = checkPermsWhenDeaccessioned;
     }
 
+    /*
+    * This command depending on the requested parameters will return:
+    * 
+    * If the user requested to include a deaccessioned dataset with the files, the command will return the deaccessioned version if the user has permissions to view the files. Otherwise, it will return null. 
+    * If the user requested to include a deaccessioned dataset but did not request the files, the command will return the deaccessioned version. 
+    * If the user did not request to include a deaccessioned dataset, the command will return the latest published version.
+    *  
+    */
     @Override
     public DatasetVersion execute(CommandContext ctxt) throws CommandException {
 
-        for (DatasetVersion dsv : ds.getVersions()) {
-            if (dsv.isReleased() || (includeDeaccessioned && dsv.isDeaccessioned())) {
-                
-                if(dsv.isDeaccessioned() && checkPerms){
-                    if(!ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset)){
-                        return null;
-                    }
-                }
-                return dsv;
+        DatasetVersion dsv = null;
+
+        //We search of a released or deaccessioned version if it is requested.
+        for (DatasetVersion next : ds.getVersions()) {
+            if (next.isReleased() || (includeDeaccessioned && next.isDeaccessioned())){
+                dsv = next;
+                break;
+            }
+        }
+
+        //Checking permissions if the deaccessionedVersion was found and we are checking permissions because files were requested.
+        if(dsv != null && (dsv.isDeaccessioned() && checkPermsWhenDeaccessioned)){
+            //If the user has no permissions we return null
+            if(!ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset)){
+                dsv = null;
             }
         }
-        return null;
+
+        return dsv;
     }
+
+
+
 }

From 9d18da511af71dd4daeb1f76c330c5a25dbcca23 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Thu, 11 Jan 2024 11:01:08 +0000
Subject: [PATCH 457/546] Added: displayOrder and isRequired fields to
 DatasetFieldType payload

---
 .../harvard/iq/dataverse/util/json/JsonPrinter.java    |  2 ++
 .../edu/harvard/iq/dataverse/api/MetadataBlocksIT.java | 10 ++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
index cfc266f2ba7..a97ef9c12d1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
@@ -570,6 +570,8 @@ public static JsonObjectBuilder json(DatasetFieldType fld) {
         fieldsBld.add("multiple", fld.isAllowMultiples());
         fieldsBld.add("isControlledVocabulary", fld.isControlledVocabulary());
         fieldsBld.add("displayFormat", fld.getDisplayFormat());
+        fieldsBld.add("isRequired", fld.isRequired());
+        fieldsBld.add("displayOrder", fld.getDisplayOrder());
         if (fld.isControlledVocabulary()) {
             // If the field has a controlled vocabulary,
             // add all values to the resulting JSON
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java
index c301e158b4e..f1c3a9815f1 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java
@@ -25,7 +25,9 @@ void testGetCitationBlock() {
         getCitationBlock.prettyPrint();
         getCitationBlock.then().assertThat()
                 .statusCode(OK.getStatusCode())
-                .body("data.fields.subject.controlledVocabularyValues[0]", CoreMatchers.is("Agricultural Sciences"));
+                .body("data.fields.subject.controlledVocabularyValues[0]", CoreMatchers.is("Agricultural Sciences"))
+                .body("data.fields.title.displayOrder", CoreMatchers.is(0))
+                .body("data.fields.title.isRequired", CoreMatchers.is(true));
     }
     
     @Test
@@ -37,18 +39,18 @@ void testDatasetWithAllDefaultMetadata() {
             ", response=" + createUser.prettyPrint());
         String apiToken = UtilIT.getApiTokenFromResponse(createUser);
         assumeFalse(apiToken == null || apiToken.isBlank());
-        
+
         Response createCollection = UtilIT.createRandomDataverse(apiToken);
         assumeTrue(createCollection.statusCode() < 300,
             "code=" + createCollection.statusCode() +
             ", response=" + createCollection.prettyPrint());
         String dataverseAlias = UtilIT.getAliasFromResponse(createCollection);
         assumeFalse(dataverseAlias == null || dataverseAlias.isBlank());
-        
+
         // when
         String pathToJsonFile = "scripts/api/data/dataset-create-new-all-default-fields.json";
         Response createDataset = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken);
-        
+
         // then
         assertEquals(CREATED.getStatusCode(), createDataset.statusCode(),
            "code=" + createDataset.statusCode() +

From e8054138219ffc499c756ee9d77bdb77d7450a23 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Thu, 11 Jan 2024 11:06:16 +0000
Subject: [PATCH 458/546] Added: release notes for #10216

---
 doc/release-notes/10216-metadatablocks.md | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 doc/release-notes/10216-metadatablocks.md

diff --git a/doc/release-notes/10216-metadatablocks.md b/doc/release-notes/10216-metadatablocks.md
new file mode 100644
index 00000000000..8fbd4f37e14
--- /dev/null
+++ b/doc/release-notes/10216-metadatablocks.md
@@ -0,0 +1,4 @@
+The API endpoint `/api/metadatablocks/{block_id}` has been extended to include the following fields:
+
+- `isRequired` - Wether or not this field is required
+- `displayOrder`:  The display order of the field in create/edit forms

From 462d8f743ba96beb39a2d30ec49eb0ee3ae9d210 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Thu, 11 Jan 2024 10:17:18 -0500
Subject: [PATCH 459/546] #10216 typo in release note

---
 doc/release-notes/10216-metadatablocks.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/release-notes/10216-metadatablocks.md b/doc/release-notes/10216-metadatablocks.md
index 8fbd4f37e14..b3be7e76abc 100644
--- a/doc/release-notes/10216-metadatablocks.md
+++ b/doc/release-notes/10216-metadatablocks.md
@@ -1,4 +1,4 @@
 The API endpoint `/api/metadatablocks/{block_id}` has been extended to include the following fields:
 
-- `isRequired` - Wether or not this field is required
+- `isRequired` - Whether or not this field is required
 - `displayOrder`:  The display order of the field in create/edit forms

From b1bb6a047cc347a6d6c97ba9f56060d3805ec545 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 11 Jan 2024 11:35:34 -0500
Subject: [PATCH 460/546] minor doc tweaks #10200

---
 doc/sphinx-guides/source/developers/globus-api.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index b5d420467aa..96475f33230 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -72,7 +72,7 @@ The response includes the id for the Globus endpoint to use along with several s
 
 The getDatasetMetadata and getFileListing URLs are just signed versions of the standard Dataset metadata and file listing API calls. The other two are Globus specific.
 
-If called for a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a
+If called for, a dataset using a store that is configured with a remote Globus endpoint(s), the return response is similar but the response includes a
 the "managed" parameter will be false, the "endpoint" parameter is replaced with a JSON array of "referenceEndpointsWithPaths" and the
 requestGlobusTransferPaths and addGlobusFiles URLs are replaced with ones for requestGlobusReferencePaths and addFiles. All of these calls are
 described further below.
@@ -91,7 +91,7 @@ The returned response includes the same getDatasetMetadata and getFileListing UR
 Performing an Upload/Transfer In
 --------------------------------
 
-The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer or to reference files (based on the "managed" parameter). 
+The information from the API call above can be used to provide a user with information about the dataset and to prepare to transfer (managed=true) or to reference files (managed=false).
 
 Once the user identifies which files are to be added, the requestGlobusTransferPaths or requestGlobusReferencePaths URLs can be called. These both reference the same API call but must be used with different entries in the JSON body sent:
 

From 1c3162f01cb921b21a72042ea03b1e9ca94c6da9 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 11 Jan 2024 11:49:01 -0500
Subject: [PATCH 461/546] typo #10200

---
 doc/sphinx-guides/source/developers/globus-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index 96475f33230..57748d0afc9 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -170,7 +170,7 @@ In the managed case, once a Globus transfer has been initiated a final API call
 
   curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA"
 
-Note that the mimetype is multipart/form-data, matching the /addFiles API call. ALso note that the API_TOKEN is not needed when using a signed URL.
+Note that the mimetype is multipart/form-data, matching the /addFiles API call. Also note that the API_TOKEN is not needed when using a signed URL.
 
 With this information, Dataverse will begin to monitor the transfer and when it completes, will add all files for which the transfer succeeded.
 As the transfer can take significant time and the API call is asynchronous, the only way to determine if the transfer succeeded via API is to use the standard calls to check the dataset lock state and contents.

From 8cc2e7c0e5ba16b2f380f8fd31531e1f90271c12 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 11 Jan 2024 11:56:50 -0500
Subject: [PATCH 462/546] fix path in globus endpoint docs #10200

---
 doc/sphinx-guides/source/developers/globus-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index 57748d0afc9..a9cfe5aedff 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -102,7 +102,7 @@ Once the user identifies which files are to be added, the requestGlobusTransferP
   export PERSISTENT_IDENTIFIER=doi:10.5072/FK27U7YBV
   export LOCALE=en-US
  
-  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUpload"
+  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:application/json" -X POST "$SERVER_URL/api/datasets/:persistentId/requestGlobusUploadPaths"
 
 Note that when using the dataverse-globus app or the return from the previous call, the URL for this call will be signed and no API_TOKEN is needed. 
   

From c3556e012a03b1e131146821faabb183b1a62a87 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 11 Jan 2024 12:14:24 -0500
Subject: [PATCH 463/546] add missing trailing double quote #10200

---
 doc/sphinx-guides/source/developers/globus-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index a9cfe5aedff..5a90243bd93 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -168,7 +168,7 @@ In the managed case, once a Globus transfer has been initiated a final API call
                     "files": [{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b3972213f-f6b5c2221423", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "1234"}}, \
                     {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"globusm://18b39722140-50eb7d3c5ece", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "MD5", "@value": "2345"}}]}'
 
-  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles -F "jsonData=$JSON_DATA"
+  curl -H "X-Dataverse-key:$API_TOKEN" -H "Content-type:multipart/form-data" -X POST "$SERVER_URL/api/datasets/:persistentId/addGlobusFiles" -F "jsonData=$JSON_DATA"
 
 Note that the mimetype is multipart/form-data, matching the /addFiles API call. Also note that the API_TOKEN is not needed when using a signed URL.
 

From 50425d3f6e063b7f54d5a49b7bcb758f0ffde3b6 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 11 Jan 2024 14:20:03 -0500
Subject: [PATCH 464/546] only list the OAI sets that have associated records
 #3322

---
 .../harvest/server/OAISetServiceBean.java     | 20 +++++++++++++++++++
 .../xoai/DataverseXoaiSetRepository.java      |  4 ++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java
index 2bd666401c7..d5c78c36b98 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java
@@ -25,6 +25,7 @@
 import jakarta.inject.Named;
 import jakarta.persistence.EntityManager;
 import jakarta.persistence.PersistenceContext;
+import jakarta.persistence.Query;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.BaseHttpSolrClient.RemoteSolrException;
@@ -121,6 +122,25 @@ public List<OAISet> findAllNamedSets() {
         }
     }
     
+    /**
+     * "Active" sets are the ones that have been successfully exported, and contain
+     * a non-zero number of records. (Although a set that contains a number of 
+     * records that are all marked as "deleted" is still an active set!)
+     * @return list of OAISets
+     */
+    public List<OAISet> findAllActiveNamedSets() {
+        String jpaQueryString = "select object(o) "
+                + "from OAISet as o, OAIRecord as r "
+                + "where r.setName = o.spec "
+                + "and o.spec != '' "
+                + "group by o order by o.spec";
+        
+        Query query = em.createQuery(jpaQueryString);
+        List<OAISet> queryResults = query.getResultList();
+        
+        return queryResults;
+    }
+    
     @Asynchronous
     public void remove(Long setId) {
         OAISet oaiSet = find(setId);
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java
index b4e275b6059..1e713b08adb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java
@@ -35,7 +35,7 @@ public void setSetService(OAISetServiceBean setService) {
 
     @Override
     public boolean supportSets() {
-        List<OAISet> dataverseOAISets = setService.findAllNamedSets();
+        List<OAISet> dataverseOAISets = setService.findAllActiveNamedSets();
         
         if (dataverseOAISets == null || dataverseOAISets.isEmpty()) {
             return false;
@@ -46,7 +46,7 @@ public boolean supportSets() {
     @Override
     public List<Set> getSets() { 
         logger.fine("calling retrieveSets()");
-        List<OAISet> dataverseOAISets = setService.findAllNamedSets();
+        List<OAISet> dataverseOAISets = setService.findAllActiveNamedSets();
         List<Set> XOAISets = new ArrayList<Set>();
         
         if (dataverseOAISets != null) {

From 15ad04ee96164806036a974dbe5bf41ea2a7f0fa Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 11 Jan 2024 14:52:24 -0500
Subject: [PATCH 465/546] A test for the new "don't list until exported" OAI
 set feature (#3322)

---
 .../iq/dataverse/api/HarvestingServerIT.java  | 32 +++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index e02964ef28f..e0f121305e0 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -288,7 +288,7 @@ public void testNativeSetAPI() {
     }
     
     @Test
-    public void testSetEditAPIandOAIlistSets() {
+    public void testSetEditAPIandOAIlistSets() throws InterruptedException {
         // This test focuses on testing the Edit functionality of the Dataverse
         // OAI Set API and the ListSets method of the Dataverse OAI server.
         
@@ -299,7 +299,8 @@ public void testSetEditAPIandOAIlistSets() {
         // expected HTTP result codes. 
         
         String setName = UtilIT.getRandomString(6);
-        String setDef = "*";
+        String persistentId = extraDatasetsIdentifiers.get(0); 
+        String setDef = "dsPersistentId:"+persistentId;
 
         // Make sure the set does not exist
         String setPath = String.format("/api/harvest/server/oaisets/%s", setName);
@@ -369,16 +370,35 @@ public void testSetEditAPIandOAIlistSets() {
         
         XmlPath responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets");
         
-        // 2. Validate the payload of the response, by confirming that the set 
+        // 2. The set hasn't been exported yet, so it shouldn't be listed in 
+        // ListSets (#3322). Let's confirm that: 
+        
+        List<Node> listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class);
+        // 2a. Confirm that our set is listed:
+        assertNotNull(listSets, "Unexpected response from ListSets");
+        assertEquals(0, listSets.size(), "An unexported OAI set is listed in ListSets");
+        
+        // export the set: 
+        
+        Response exportSetResponse = UtilIT.exportOaiSet(setName);
+        assertEquals(200, exportSetResponse.getStatusCode());
+        Thread.sleep(1000L); // sleep for a sec to be sure
+        
+        // ... try again: 
+        
+        listSetsResponse = UtilIT.getOaiListSets();
+        responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets");
+        
+        // 3. Validate the payload of the response, by confirming that the set 
         // we created and modified, above, is being listed by the OAI server 
         // and its xml record is properly formatted
         
-        List<Node> listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class);
+        listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class);
         
-        // 2a. Confirm that our set is listed:
+        // 3a. Confirm that our set is listed:
         assertNotNull(listSets, "Unexpected response from ListSets");
         assertEquals(1, listSets.size(), "Newly-created set isn't properly listed by the OAI server");
-        // 2b. Confirm that the set entry contains the updated description: 
+        // 3b. Confirm that the set entry contains the updated description: 
         assertEquals(newDescription, listSets.get(0).getPath("setDescription.metadata.element.field", String.class), "Incorrect description in the ListSets entry");
         
         // ok, the xml record looks good! 

From 3a81926980edc7c8228dddf18a8f1305b32fc2c8 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 11 Jan 2024 15:40:14 -0500
Subject: [PATCH 466/546] add requestGlobusUploadPaths to UtilIT #10200

---
 src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index e29677c2252..33dda05b4d7 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -3718,4 +3718,12 @@ static Response requestGlobusDownload(Integer datasetId, JsonObject body, String
                 .post("/api/datasets/" + datasetId + "/requestGlobusDownload");
     }
 
+    static Response requestGlobusUploadPaths(Integer datasetId, JsonObject body, String apiToken) {
+        return given()
+                .header(API_TOKEN_HTTP_HEADER, apiToken)
+                .body(body.toString())
+                .contentType("application/json")
+                .post("/api/datasets/" + datasetId + "/requestGlobusUploadPaths");
+    }
+
 }

From 83120012480ce12ef8db3d33d3a1c93c4605945a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 11 Jan 2024 15:47:17 -0500
Subject: [PATCH 467/546] clarify where taskIdentifier comes from #10200

---
 doc/sphinx-guides/source/developers/globus-api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst
index 5a90243bd93..834db8161f0 100644
--- a/doc/sphinx-guides/source/developers/globus-api.rst
+++ b/doc/sphinx-guides/source/developers/globus-api.rst
@@ -157,7 +157,7 @@ In the remote/reference case, the map is from the initially supplied endpoint/pa
 Adding Files to the Dataset
 ---------------------------
 
-In the managed case, once a Globus transfer has been initiated a final API call is made to Dataverse to provide it with the task identifier of the transfer and information about the files being transferred:
+In the managed case, you must initiate a Globus transfer and take note of its task identifier. As in the JSON example below, you will pass it as ``taskIdentifier`` along with details about the files you are transferring:
 
 .. code-block:: bash
 

From 2f571e23c7b1b98ce530d5a87ed20c8797810175 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 11 Jan 2024 16:38:18 -0500
Subject: [PATCH 468/546] Got rid of some unnecessary database lookups that
 were made when rendering the harvesting server page. #3322

---
 .../iq/dataverse/HarvestingSetsPage.java      | 60 +++++++++++++++++--
 src/main/java/propertyFiles/Bundle.properties |  2 +-
 2 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java
index 6dbba34920b..0b66b652e0c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java
@@ -30,6 +30,8 @@
 import jakarta.faces.view.ViewScoped;
 import jakarta.inject.Inject;
 import jakarta.inject.Named;
+import java.util.HashMap;
+import java.util.Map;
 import org.apache.commons.lang3.StringUtils;
 
 /**
@@ -430,44 +432,92 @@ public boolean isSessionUserAuthenticated() {
         return false;
     }
     
+    // The numbers of datasets and deleted/exported records below are used 
+    // in rendering rules on the page. They absolutely need to be cached
+    // on the first lookup. 
+    
+    Map<String, Integer> cachedSetInfoNumDatasets = new HashMap<>();
+    
     public int getSetInfoNumOfDatasets(OAISet oaiSet) {
         if (oaiSet.isDefaultSet()) {
             return getSetInfoNumOfExported(oaiSet);
         }
         
+        if (cachedSetInfoNumDatasets.get(oaiSet.getSpec()) != null) {
+            return cachedSetInfoNumDatasets.get(oaiSet.getSpec());
+        }
+        
         String query = oaiSet.getDefinition();
         
         try {
             int num = oaiSetService.validateDefinitionQuery(query);
             if (num > -1) {
+                cachedSetInfoNumDatasets.put(oaiSet.getSpec(), num);
                 return num;
             }
         } catch (OaiSetException ose) {
-            // do notghin - will return zero.
+            // do nothing - will return zero.
         }
+        cachedSetInfoNumDatasets.put(oaiSet.getSpec(), 0);
         return 0;
     }
     
+    Map<String, Integer> cachedSetInfoNumExported = new HashMap<>();
+    Integer defaultSetNumExported = null; 
+    
     public int getSetInfoNumOfExported(OAISet oaiSet) {
+        if (oaiSet.isDefaultSet() && defaultSetNumExported != null) {
+            return defaultSetNumExported;
+        } else if (cachedSetInfoNumExported.get(oaiSet.getSpec()) != null) {
+            return cachedSetInfoNumExported.get(oaiSet.getSpec());
+        }
+        
         List<OAIRecord> records = oaiRecordService.findActiveOaiRecordsBySetName(oaiSet.getSpec());
         
+        int num; 
+        
         if (records == null || records.isEmpty()) {
-            return 0; 
+            num = 0; 
+        } else {
+            num = records.size();
         }
         
-        return records.size();
+        if (oaiSet.isDefaultSet()) {
+            defaultSetNumExported = num;
+        } else {
+            cachedSetInfoNumExported.put(oaiSet.getSpec(), num);
+        }
         
+        return num;
     }
     
+    Map<String, Integer> cachedSetInfoNumDeleted = new HashMap<>();
+    Integer defaultSetNumDeleted = null;
+    
     public int getSetInfoNumOfDeleted(OAISet oaiSet) {
+        if (oaiSet.isDefaultSet() && defaultSetNumDeleted != null) {
+            return defaultSetNumDeleted;
+        } else if (cachedSetInfoNumDeleted.get(oaiSet.getSpec()) != null) {
+            return cachedSetInfoNumDeleted.get(oaiSet.getSpec());
+        }
+        
         List<OAIRecord> records = oaiRecordService.findDeletedOaiRecordsBySetName(oaiSet.getSpec());
         
+        int num; 
+        
         if (records == null || records.isEmpty()) {
-            return 0; 
+            num = 0; 
+        } else {
+            num = records.size();
         }
         
-        return records.size();
+        if (oaiSet.isDefaultSet()) {
+            defaultSetNumDeleted = num;
+        } else {
+            cachedSetInfoNumDeleted.put(oaiSet.getSpec(), num);
+        }
         
+        return num;
     }
     
     public void validateSetQuery() {
diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index ece3f070cdd..157f2ecaf54 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -631,7 +631,7 @@ harvestserver.tab.header.description=Description
 harvestserver.tab.header.definition=Definition Query
 harvestserver.tab.col.definition.default=All Published Local Datasets
 harvestserver.tab.header.stats=Datasets
-harvestserver.tab.col.stats.empty=No records (empty set)
+harvestserver.tab.col.stats.empty=No active records ({2} {2, choice, 0#records|1#record|2#records} marked as deleted)
 harvestserver.tab.col.stats.results={0} {0, choice, 0#datasets|1#dataset|2#datasets} ({1} {1, choice, 0#records|1#record|2#records} exported, {2} marked as deleted)
 harvestserver.tab.header.action=Actions
 harvestserver.tab.header.action.btn.export=Run Export

From d86ab1587cb5088330c2df6565744769cc859119 Mon Sep 17 00:00:00 2001
From: Vera Clemens <clemens@zbmed.de>
Date: Fri, 12 Jan 2024 11:36:30 +0100
Subject: [PATCH 469/546] test: use curator role in testListRoleAssignments

---
 scripts/api/data/role-contributor-plus.json   | 12 ----------
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 22 ++++---------------
 2 files changed, 4 insertions(+), 30 deletions(-)
 delete mode 100644 scripts/api/data/role-contributor-plus.json

diff --git a/scripts/api/data/role-contributor-plus.json b/scripts/api/data/role-contributor-plus.json
deleted file mode 100644
index ef9ba3aaff6..00000000000
--- a/scripts/api/data/role-contributor-plus.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "alias":"contributorPlus",
-    "name":"ContributorPlus",
-    "description":"For datasets, a person who can edit License + Terms, then submit them for review, and add collaborators.",
-    "permissions":[
-        "ViewUnpublishedDataset",
-        "EditDataset",
-        "DownloadFile",
-        "DeleteDatasetDraft",
-        "ManageDatasetPermissions"
-    ]
-}
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index b51d400d2d4..787b9b018a9 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -1349,17 +1349,11 @@ public void testListRoleAssignments() {
         Response notPermittedToListRoleAssignmentOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken);
         assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataset.getStatusCode());
 
-        // We create a new role that includes "ManageDatasetPermissions" which are required for listing role assignments
-        // of a dataset and assign it to the contributor user
+        // We assign the curator role to the contributor user
+        // (includes "ManageDatasetPermissions" which are required for listing role assignments of a dataset, but not
+        // "ManageDataversePermissions")
 
-        String pathToJsonFile = "scripts/api/data/role-contributor-plus.json";
-        Response addDataverseRoleResponse = UtilIT.addDataverseRole(pathToJsonFile, dataverseAlias, adminApiToken);
-        addDataverseRoleResponse.prettyPrint();
-        String body = addDataverseRoleResponse.getBody().asString();
-        String status = JsonPath.from(body).getString("status");
-        assertEquals("OK", status);
-
-        Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "contributorPlus", "@" + contributorUsername, adminApiToken);
+        Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "curator", "@" + contributorUsername, adminApiToken);
         giveRandoPermission.prettyPrint();
         assertEquals(200, giveRandoPermission.getStatusCode());
 
@@ -1373,14 +1367,6 @@ public void testListRoleAssignments() {
 
         notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken);
         assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode());
-
-        // Finally, we clean up and delete the role we created
-
-        Response deleteDataverseRoleResponse = UtilIT.deleteDataverseRole("contributorPlus", adminApiToken);
-        deleteDataverseRoleResponse.prettyPrint();
-        body = deleteDataverseRoleResponse.getBody().asString();
-        status = JsonPath.from(body).getString("status");
-        assertEquals("OK", status);
     }
 
     @Test

From 5e9cc2ff4764915324ffc3c990f02e09738101c0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 12 Jan 2024 13:57:59 -0500
Subject: [PATCH 470/546] fix bad SQL query in guestbook #10232

---
 .../edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
index b0cc41eb448..01e6ecf7ff2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
@@ -928,7 +928,7 @@ public Long getDownloadCountByDatasetId(Long datasetId, LocalDate date) {
         if(date != null) {
             query = em.createNativeQuery("select count(o.id) from GuestbookResponse  o  where o.dataset_id  = " + datasetId + " and responsetime < '" + date.toString() + "' and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
         }else {
-            query = em.createNativeQuery("select count(o.id) from GuestbookResponse  o  where o.dataset_id  = " + datasetId+ "and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
+            query = em.createNativeQuery("select count(o.id) from GuestbookResponse  o  where o.dataset_id  = " + datasetId+ " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
         }
         return (Long) query.getSingleResult();
     }    

From d3f3eb9219fa101db8ebfea34ee62ccd3111194a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 12 Jan 2024 14:18:25 -0500
Subject: [PATCH 471/546] Update docker-compose-dev.yml better explain presence
 of settings #9275

---
 docker-compose-dev.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index ce9f39a418a..10fe62ff6df 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -19,7 +19,7 @@ services:
       DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8
       DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test
       DATAVERSE_JSF_REFRESH_PERIOD: "1"
-      # to get HarvestingServerIT to pass
+      # These two oai settings are here to get HarvestingServerIT to pass
       dataverse_oai_server_maxidentifiers: "2"
       dataverse_oai_server_maxrecords: "2"
       JVM_ARGS: -Ddataverse.files.storage-driver-id=file1

From 74b45e1d7d24b621a7368c517e687df0b21f199c Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 16 Jan 2024 10:21:42 -0500
Subject: [PATCH 472/546] QA Guide general update

---
 doc/sphinx-guides/source/qa/index.md          |  6 +--
 doc/sphinx-guides/source/qa/overview.md       | 22 ++++++----
 .../source/qa/performance-tests.md            |  8 ++++
 .../{other-approaches.md => qa-workflow.md}   | 41 ++++---------------
 ...{manual-testing.md => testing-approach.md} |  9 +++-
 5 files changed, 42 insertions(+), 44 deletions(-)
 rename doc/sphinx-guides/source/qa/{other-approaches.md => qa-workflow.md} (58%)
 rename doc/sphinx-guides/source/qa/{manual-testing.md => testing-approach.md} (84%)

diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md
index 6027f07574f..c7582a2169f 100644
--- a/doc/sphinx-guides/source/qa/index.md
+++ b/doc/sphinx-guides/source/qa/index.md
@@ -3,9 +3,9 @@
 ```{toctree}
 overview.md
 testing-infrastructure.md
-performance-tests.md
-manual-testing.md
+qa-workflow.md
+testing-approach.md
 test-automation.md
-other-approaches.md
 jenkins.md
+performance-tests.md
 ```
diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md
index c4f66446ca3..08740e9345d 100644
--- a/doc/sphinx-guides/source/qa/overview.md
+++ b/doc/sphinx-guides/source/qa/overview.md
@@ -11,19 +11,27 @@ This guide describes the testing process used by QA at IQSS and provides a refer
 
 ## Workflow
 
-The basic workflow is as follows. Bugs or feature requests are submitted to GitHub by the community or by team members as issues. These issues are prioritized and added to a two-week sprint that is reflected on the GitHub {ref}`kanban-board`. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common {ref}`develop branch <develop-branch>` and ultimately released as part of the product. Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements.  Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Done column and the process repeats with the next PR until it is decided to {doc}`make a release </developers/making-releases>`.
+The basic workflow is as follows. Bugs or feature requests are submitted to GitHub by the community or by team members as [issues](https://github.com/IQSS/dataverse/issues). These issues are prioritized and added to a two-week sprint that is reflected on the GitHub {ref}`kanban-board`. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common {ref}`develop branch <develop-branch>` and ultimately released as part of the product.
 
-## Release Cadence and Sprints
+Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements.  
 
-A release likely spans multiple two-week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time.
+Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Done column and the process repeats with the next PR until it is decided to {doc}`make a release </developers/making-releases>`.
 
-The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub.
+## Tips and Tricks
 
-## Performance Testing and Deployment
+- Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing.
+- When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice.
+- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure.
+- When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done.
+- When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked.
+- When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it.
+- Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup.
 
-The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named {ref}`Locust <locust>`, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds, it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product, and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
+## Release Cadence and Sprints
 
-Once the performance has been tested and recorded in a [Google spreadsheet](https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit?usp=sharing) for this proposed version, the release will be prepared and posted.
+A release likely spans multiple two-week sprints. Each sprint represents the priorities for that time and is sized so that the team can reasonably complete most of the work on time. This is a goal to help with planning, it is not a strict requirement. Some issues from the previous sprint may remain and likely be included in the next sprint but occasionally may be deprioritized and deferred to another time.
+
+The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub.
 
 ## Making a Release
 
diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md
index ad7972bd75e..3fab0386eb0 100644
--- a/doc/sphinx-guides/source/qa/performance-tests.md
+++ b/doc/sphinx-guides/source/qa/performance-tests.md
@@ -7,8 +7,16 @@
 
 ## Introduction
 
+The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named {ref}`Locust <locust>`, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. 
+
+Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds, it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product, and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
+
+## Testing Environment
+
 To run performance tests, we have a performance test cluster on AWS that employs web, database, and Solr. The database contains a copy of production that is updated weekly on Sundays. To ensure the homepage content is consistent between test runs across releases, two scripts set the datasets that will appear on the homepage. There is a script on the web server in the default CentOS user dir and one on the database server in the default CentOS user dir. Run these scripts before conducting the tests. 
 
+Once the performance has been tested and recorded in a [Google spreadsheet](https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit?usp=sharing) for this proposed version, the release will be prepared and posted.
+
 ## Access
 
 Access to performance cluster instances requires ssh keys. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. 
diff --git a/doc/sphinx-guides/source/qa/other-approaches.md b/doc/sphinx-guides/source/qa/qa-workflow.md
similarity index 58%
rename from doc/sphinx-guides/source/qa/other-approaches.md
rename to doc/sphinx-guides/source/qa/qa-workflow.md
index 2e2ef906191..78dcd1b6322 100644
--- a/doc/sphinx-guides/source/qa/other-approaches.md
+++ b/doc/sphinx-guides/source/qa/qa-workflow.md
@@ -1,24 +1,10 @@
-# Other Approaches to Deploying and Testing
+# QA workflow for Pull Requests
 
 ```{contents} Contents:
 :local: 
 :depth: 3
 ```
 
-This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built.
-
-## Tips and Tricks
-
-- Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing.
-- When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice.
-- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure.
-- When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done.
-- When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked.
-- When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it.
-- Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup.
-
-## Workflow for Completing QA on a PR
-
 1. Assign the PR you are working on to yourself.
 
 1. What does it do?
@@ -98,24 +84,13 @@ This workflow is fine for a single person testing a PR, one at a time. It would
 
 1. Merge PR
     
-    Click merge to include this PR into the common develop branch.
+    Click the "Merge pull request" button and be sure to use the "Create a merge commit" option to include this PR into the common develop branch.
+
+    Some of the reasons why we encourage using option over Rebase or Squash are:
+        -Preserving commit hitory
+        -Clearer context and treaceability
+        -Easier collaboration, bug tracking and reverting
 
 1. Delete merged branch
     
-    Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions.
-
-
-## Checklist for Completing QA on a PR
-
-1. Build the docs 
-1. Smoke test the pr 
-1. Test the new functionality
-1. Regression test 
-1. Test any upgrade instructions
-
-## Checklist for QA on Release
-
-1. Review Consolidated Release Notes, in particular upgrade instructions.
-1. Conduct performance testing and compare with the previous release.
-1. Perform clean install and smoke test.
-1. Potentially follow upgrade instructions. Though they have been performed incrementally for each PR, the sequence may need checking
+    Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions.
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/manual-testing.md b/doc/sphinx-guides/source/qa/testing-approach.md
similarity index 84%
rename from doc/sphinx-guides/source/qa/manual-testing.md
rename to doc/sphinx-guides/source/qa/testing-approach.md
index 580e5153394..21039c10b1f 100644
--- a/doc/sphinx-guides/source/qa/manual-testing.md
+++ b/doc/sphinx-guides/source/qa/testing-approach.md
@@ -1,4 +1,4 @@
-# Manual Testing Approach
+# Testing Approach
 
 ```{contents} Contents:
 :local: 
@@ -41,3 +41,10 @@ Think about risk. Is the feature or function part of a critical area such as per
 1. Upload 3 different types of files: You can use a tabular file, 50by1000.dta, an image file, and a text file.
 1. Publish the dataset.
 1. Download a file.
+
+
+## Alternative deployment and testing
+
+This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? 
+
+An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built.
\ No newline at end of file

From ff044632aff9c2b98aea01da934cfbf63476dc40 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 16 Jan 2024 11:32:17 -0500
Subject: [PATCH 473/546] add release note #9926

---
 doc/release-notes/9926-list-role-assignments-permissions.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/release-notes/9926-list-role-assignments-permissions.md

diff --git a/doc/release-notes/9926-list-role-assignments-permissions.md b/doc/release-notes/9926-list-role-assignments-permissions.md
new file mode 100644
index 00000000000..43cd83dc5c9
--- /dev/null
+++ b/doc/release-notes/9926-list-role-assignments-permissions.md
@@ -0,0 +1 @@
+Listing collction/dataverse role assignments via API still requires ManageDataversePermissions, but listing dataset role assignments via API now requires only ManageDatasetPermissions.

From 30e357bcfba66a2c7c2044beb4f03d88e532b96a Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 16 Jan 2024 12:37:10 -0500
Subject: [PATCH 474/546] expect noSetHierarchy rather than noRecordsMatch
 #9275

---
 .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index 45dd0c08226..ac28e7a3605 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -888,7 +888,7 @@ public void testNoSuchSetError() {
         noSuchSet.prettyPrint();
         noSuchSet.then().assertThat()
                 .statusCode(OK.getStatusCode())
-                .body("oai.error.@code", equalTo("noRecordsMatch"))
+                .body("oai.error.@code", equalTo("noSetHierarchy"))
                 .body("oai.error", equalTo("Requested set 'census' does not exist"));
     }
 

From dc08219cc6f7a2b1152c0acfe67b26844daa5abe Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 16 Jan 2024 12:46:32 -0500
Subject: [PATCH 475/546] Changes after talking to Phil at 12:00 on Jan 16

---
 doc/sphinx-guides/source/qa/index.md          |  1 -
 doc/sphinx-guides/source/qa/jenkins.md        | 59 -------------------
 doc/sphinx-guides/source/qa/overview.md       |  8 ++-
 doc/sphinx-guides/source/qa/qa-workflow.md    |  5 +-
 .../source/qa/test-automation.md              | 58 ++++++++++++++++--
 .../source/qa/testing-approach.md             |  2 +-
 6 files changed, 65 insertions(+), 68 deletions(-)
 delete mode 100644 doc/sphinx-guides/source/qa/jenkins.md

diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md
index c7582a2169f..937b352bccb 100644
--- a/doc/sphinx-guides/source/qa/index.md
+++ b/doc/sphinx-guides/source/qa/index.md
@@ -6,6 +6,5 @@ testing-infrastructure.md
 qa-workflow.md
 testing-approach.md
 test-automation.md
-jenkins.md
 performance-tests.md
 ```
diff --git a/doc/sphinx-guides/source/qa/jenkins.md b/doc/sphinx-guides/source/qa/jenkins.md
deleted file mode 100644
index 9259284beb9..00000000000
--- a/doc/sphinx-guides/source/qa/jenkins.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Jenkins
-
-```{contents} Contents:
-:local: 
-:depth: 3
-```
-
-## Introduction
-
-Jenkins is our primary tool for knowing if our API tests are passing. (Unit tests are executed locally by developers.)
-
-You can find our Jenkins installation at <https://jenkins.dataverse.org>.
-
-Please note that while it has been open to the public in the past, it is currently firewalled off. We can poke a hole in the firewall for your IP address if necessary. Please get in touch. (You might also be interested in <https://github.com/IQSS/dataverse/issues/9916> which is about restoring the ability of contributors to see if their pull requests are passing API tests or not.)
-
-## Jobs
-
-Jenkins is organized into jobs. We'll highlight a few.
-
-### IQSS-dataverse-develop
-
-<https://jenkins.dataverse.org/job/IQSS-dataverse-develop>, which we will refer to as the "develop" job runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it!
-
-You can get to this job from the README at <https://github.com/IQSS/dataverse>.
-
-### IQSS-Dataverse-Develop-PR
-
-<https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/> can be thought of as "PR jobs". It's a collection of jobs run on pull requests. Typically, you will navigate directly into the job (and it's particular build number) from a pull request. For example, from <https://github.com/IQSS/dataverse/pull/10044>, look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like <https://jenkins.dataverse.org/blue/organizations/jenkins/IQSS-Dataverse-Develop-PR/detail/PR-10044/10/pipeline> (build #10).
-
-### guides.dataverse.org
-
-<https://jenkins.dataverse.org/job/guides.dataverse.org/> is what we use to build guides. See {doc}`/developers/making-releases` in the Developer Guide.
-
-## Checking if API Tests are Passing
-
-If API tests are failing, you should not merge the pull request.
-
-How can you know if API tests are passing? Here are the steps, by way of example.
-
-- From the pull request, navigate to the build. For example from <https://github.com/IQSS/dataverse/pull/10044>, look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like <https://jenkins.dataverse.org/blue/organizations/jenkins/IQSS-Dataverse-Develop-PR/detail/PR-10044/10/pipeline> (build #10).
-- You are now on the new "blue" interface for Jenkins. Click the button with an arrow on the right side of the header called "go to classic" which should take you to (for example) <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10044/10/>.
-- Click "Test Result".
-- Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run.
-- Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error.
-
-## Diagnosing Failures
-
-API test failures can have multiple causes. As described above, from the "Test Result" page, you might see the failure under "All Failed Tests". However, the test could have failed because of some underlying system issue.
-
-If you have determined that the API tests have not run at all, your next step should be to click on "Console Output". For example, <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10109/26/console>. Click "Full log" to see the full log in the browser or navigate to <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10109/26/consoleText> (for example) to get a plain text version.
-
-Go to the end of the log and then scroll up, looking for the failure. A failed Ansible task can look like this:
-
-```
-TASK [dataverse : download payara zip] *****************************************
-fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: <urlopen error timed out>", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip"}
-```
-
-In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it!
diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md
index 08740e9345d..01ab629db8c 100644
--- a/doc/sphinx-guides/source/qa/overview.md
+++ b/doc/sphinx-guides/source/qa/overview.md
@@ -33,6 +33,12 @@ A release likely spans multiple two-week sprints. Each sprint represents the pri
 
 The decision to make a release can be based on the time since the last release, some important feature needed by the community or contractual deadline, or some other logical reason to package the work completed into a named release and posted to the releases section on GitHub.
 
+## Test API
+
+The API test suite is added to and maintained by development. (See {doc}`/developers/testing` in the Developer Guide.) It is generally advisable for code contributors to add API tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base's code paths as possible, every time to catch bugs. 
+
+This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency. (See {doc}`/qa/test-automation` in the Developer Guide.)
+
 ## Making a Release
 
-See {doc}`/developers/making-releases` in the Developer Guide.
+See {doc}`/developers/making-releases` in the Developer Guide.
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md
index 78dcd1b6322..df274d2405d 100644
--- a/doc/sphinx-guides/source/qa/qa-workflow.md
+++ b/doc/sphinx-guides/source/qa/qa-workflow.md
@@ -1,4 +1,4 @@
-# QA workflow for Pull Requests
+# QA Workflow for Pull Requests
 
 ```{contents} Contents:
 :local: 
@@ -87,7 +87,8 @@
     Click the "Merge pull request" button and be sure to use the "Create a merge commit" option to include this PR into the common develop branch.
 
     Some of the reasons why we encourage using option over Rebase or Squash are:
-        -Preserving commit hitory
+
+        -Preserving commit history
         -Clearer context and treaceability
         -Easier collaboration, bug tracking and reverting
 
diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md
index c2b649df498..c996b4cea8f 100644
--- a/doc/sphinx-guides/source/qa/test-automation.md
+++ b/doc/sphinx-guides/source/qa/test-automation.md
@@ -1,15 +1,36 @@
 # Test Automation
-
 ```{contents} Contents:
 :local: 
 :depth: 3
 ```
 
-The API test suite is added to and maintained by development. (See {doc}`/developers/testing` in the Developer Guide.) It is generally advisable for code contributors to add API tests when adding new functionality. The approach here is one of code coverage: exercise as much of the code base's code paths as possible, every time to catch bugs. 
+## Introduction
+
+Jenkins is our primary tool for knowing if our API tests are passing. (Unit tests are executed locally by developers.)
+
+You can find our Jenkins installation at <https://jenkins.dataverse.org>.
+
+Please note that while it has been open to the public in the past, it is currently firewalled off. We can poke a hole in the firewall for your IP address if necessary. Please get in touch. (You might also be interested in <https://github.com/IQSS/dataverse/issues/9916> which is about restoring the ability of contributors to see if their pull requests are passing API tests or not.)
+
+## Jobs
+
+Jenkins is organized into jobs. We'll highlight a few.
+
+### IQSS-dataverse-develop
 
-This type of approach is often used to give contributing developers confidence that their code didn’t introduce any obvious, major issues and is run on each commit. Since it is a broad set of tests, it is not clear whether any specific, conceivable test is run but it does add a lot of confidence that the code base is functioning due to its reach and consistency.
+<https://jenkins.dataverse.org/job/IQSS-dataverse-develop>, which we will refer to as the "develop" job runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it!
 
-## Building and Deploying a Pull Request from Jenkins to Dataverse-Internal
+You can get to this job from the README at <https://github.com/IQSS/dataverse>.
+
+### IQSS-Dataverse-Develop-PR
+
+<https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/> can be thought of as "PR jobs". It's a collection of jobs run on pull requests. Typically, you will navigate directly into the job (and it's particular build number) from a pull request. For example, from <https://github.com/IQSS/dataverse/pull/10044>, look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like <https://jenkins.dataverse.org/blue/organizations/jenkins/IQSS-Dataverse-Develop-PR/detail/PR-10044/10/pipeline> (build #10).
+
+### guides.dataverse.org
+
+<https://jenkins.dataverse.org/job/guides.dataverse.org/> is what we use to build guides. See {doc}`/developers/making-releases` in the Developer Guide.
+
+### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal
 
 
 1. Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. 
@@ -34,3 +55,32 @@ This type of approach is often used to give contributing developers confidence t
 1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide.
 
 1.	Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting.
+
+
+
+## Checking if API Tests are Passing
+
+If API tests are failing, you should not merge the pull request.
+
+How can you know if API tests are passing? Here are the steps, by way of example.
+
+- From the pull request, navigate to the build. For example from <https://github.com/IQSS/dataverse/pull/10044>, look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like <https://jenkins.dataverse.org/blue/organizations/jenkins/IQSS-Dataverse-Develop-PR/detail/PR-10044/10/pipeline> (build #10).
+- You are now on the new "blue" interface for Jenkins. Click the button with an arrow on the right side of the header called "go to classic" which should take you to (for example) <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10044/10/>.
+- Click "Test Result".
+- Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run.
+- Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error.
+
+## Diagnosing Failures
+
+API test failures can have multiple causes. As described above, from the "Test Result" page, you might see the failure under "All Failed Tests". However, the test could have failed because of some underlying system issue.
+
+If you have determined that the API tests have not run at all, your next step should be to click on "Console Output". For example, <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10109/26/console>. Click "Full log" to see the full log in the browser or navigate to <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/job/PR-10109/26/consoleText> (for example) to get a plain text version.
+
+Go to the end of the log and then scroll up, looking for the failure. A failed Ansible task can look like this:
+
+```
+TASK [dataverse : download payara zip] *****************************************
+fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "elapsed": 10, "msg": "Request failed: <urlopen error timed out>", "url": "https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/6.2023.8/payara-6.2023.8.zip"}
+```
+
+In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it!
diff --git a/doc/sphinx-guides/source/qa/testing-approach.md b/doc/sphinx-guides/source/qa/testing-approach.md
index 21039c10b1f..2c7241999a8 100644
--- a/doc/sphinx-guides/source/qa/testing-approach.md
+++ b/doc/sphinx-guides/source/qa/testing-approach.md
@@ -43,7 +43,7 @@ Think about risk. Is the feature or function part of a critical area such as per
 1. Download a file.
 
 
-## Alternative deployment and testing
+## Alternative Deployment and Testing
 
 This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? 
 

From 95cc8cbffb79f8f91ba2e9137c2b3106e4c1f6b5 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 16 Jan 2024 14:57:15 -0500
Subject: [PATCH 476/546] remove assertion about census not existing (doesn't
 appear) #9275

---
 .../java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index ac28e7a3605..60e4f623992 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -888,8 +888,7 @@ public void testNoSuchSetError() {
         noSuchSet.prettyPrint();
         noSuchSet.then().assertThat()
                 .statusCode(OK.getStatusCode())
-                .body("oai.error.@code", equalTo("noSetHierarchy"))
-                .body("oai.error", equalTo("Requested set 'census' does not exist"));
+                .body("oai.error.@code", equalTo("noSetHierarchy"));
     }
 
     // TODO: 

From edd6fc861f899b7ddb07c51fb5d900dbd0096a6c Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 16 Jan 2024 16:15:42 -0500
Subject: [PATCH 477/546] drop "no such set test" #9275

---
 .../edu/harvard/iq/dataverse/api/HarvestingServerIT.java | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index 60e4f623992..e77853d6495 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -882,15 +882,6 @@ public void testInvalidQueryParams() {
 
     }
 
-    @Test
-    public void testNoSuchSetError() {
-        Response noSuchSet = given().get("/oai?verb=ListIdentifiers&set=census&metadataPrefix=dc");
-        noSuchSet.prettyPrint();
-        noSuchSet.then().assertThat()
-                .statusCode(OK.getStatusCode())
-                .body("oai.error.@code", equalTo("noSetHierarchy"));
-    }
-
     // TODO: 
     // What else can we test? 
     // Some ideas: 

From 2adbabb31e9206eb1518048a66f98e5853502707 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Wed, 17 Jan 2024 12:24:04 +0000
Subject: [PATCH 478/546] Added: typeClass field to DatasetFieldType payload

---
 doc/release-notes/10216-metadatablocks.md                    | 5 +++--
 .../java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java | 1 +
 .../java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java  | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/release-notes/10216-metadatablocks.md b/doc/release-notes/10216-metadatablocks.md
index b3be7e76abc..59d9c1640a5 100644
--- a/doc/release-notes/10216-metadatablocks.md
+++ b/doc/release-notes/10216-metadatablocks.md
@@ -1,4 +1,5 @@
 The API endpoint `/api/metadatablocks/{block_id}` has been extended to include the following fields:
 
-- `isRequired` - Whether or not this field is required
-- `displayOrder`:  The display order of the field in create/edit forms
+- `isRequired`: Whether or not this field is required
+- `displayOrder`: The display order of the field in create/edit forms
+- `typeClass`: The type class of this field ("controlledVocabulary", "compound", or "primitive")
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
index a97ef9c12d1..2eaf6b64579 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java
@@ -565,6 +565,7 @@ public static JsonObjectBuilder json(DatasetFieldType fld) {
         fieldsBld.add("displayName", fld.getDisplayName());
         fieldsBld.add("title", fld.getTitle());
         fieldsBld.add("type", fld.getFieldType().toString());
+        fieldsBld.add("typeClass", typeClassString(fld));
         fieldsBld.add("watermark", fld.getWatermark());
         fieldsBld.add("description", fld.getDescription());
         fieldsBld.add("multiple", fld.isAllowMultiples());
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java
index f1c3a9815f1..39152bccad8 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java
@@ -27,6 +27,7 @@ void testGetCitationBlock() {
                 .statusCode(OK.getStatusCode())
                 .body("data.fields.subject.controlledVocabularyValues[0]", CoreMatchers.is("Agricultural Sciences"))
                 .body("data.fields.title.displayOrder", CoreMatchers.is(0))
+                .body("data.fields.title.typeClass", CoreMatchers.is("primitive"))
                 .body("data.fields.title.isRequired", CoreMatchers.is(true));
     }
     

From ebe95fdb2d81321e9de2d9e3fd3c41aacb474447 Mon Sep 17 00:00:00 2001
From: Katie Mika <katiemika1@gmail.com>
Date: Wed, 17 Jan 2024 11:35:33 -0500
Subject: [PATCH 479/546] Update native-api.rst

Added clarification to what is affected in Set Citation Data Field Type for a Dataset
---
 doc/sphinx-guides/source/api/native-api.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst
index 09fc3c69693..dbe769e2fd1 100644
--- a/doc/sphinx-guides/source/api/native-api.rst
+++ b/doc/sphinx-guides/source/api/native-api.rst
@@ -1572,8 +1572,8 @@ The fully expanded example above (without environment variables) looks like this
 Set Citation Date Field Type for a Dataset
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Sets the dataset citation date field type for a given dataset. ``:publicationDate`` is the default.
-Note that the dataset citation date field type must be a date field.
+Sets the dataset citation date field type for a given dataset. ``:publicationDate`` is the default. 
+Note that the dataset citation date field type must be a date field. This change applies to all versions of the dataset that have an entry for the new date field. It also applies to all file citations in the dataset. 
 
 .. code-block:: bash
 

From 598c40b8e5ccb2bb3db7a839e4549ac4d00ff8e1 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 17 Jan 2024 16:10:03 -0500
Subject: [PATCH 480/546] replace project 2 with 34 #9157

---
 CONTRIBUTING.md                                         | 2 +-
 doc/sphinx-guides/source/admin/integrations.rst         | 2 +-
 doc/sphinx-guides/source/developers/documentation.rst   | 2 +-
 doc/sphinx-guides/source/developers/version-control.rst | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b2be8f531c4..44f8ae65135 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -56,7 +56,7 @@ If you are interested in working on the main Dataverse code, great! Before you s
 
 Please read http://guides.dataverse.org/en/latest/developers/version-control.html to understand how we use the "git flow" model of development and how we will encourage you to create a GitHub issue (if it doesn't exist already) to associate with your pull request. That page also includes tips on making a pull request.
 
-After making your pull request, your goal should be to help it advance through our kanban board at https://github.com/orgs/IQSS/projects/2 . If no one has moved your pull request to the code review column in a timely manner, please reach out. Note that once a pull request is created for an issue, we'll remove the issue from the board so that we only track one card (the pull request).
+After making your pull request, your goal should be to help it advance through our kanban board at https://github.com/orgs/IQSS/projects/34 . If no one has moved your pull request to the code review column in a timely manner, please reach out. Note that once a pull request is created for an issue, we'll remove the issue from the board so that we only track one card (the pull request).
 
 Thanks for your contribution!
 
diff --git a/doc/sphinx-guides/source/admin/integrations.rst b/doc/sphinx-guides/source/admin/integrations.rst
index db566106b49..cae44d42dbf 100644
--- a/doc/sphinx-guides/source/admin/integrations.rst
+++ b/doc/sphinx-guides/source/admin/integrations.rst
@@ -245,7 +245,7 @@ Future Integrations
 
 The `Dataverse Project Roadmap <https://www.iq.harvard.edu/roadmap-dataverse-project>`_ is a good place to see integrations that the core Dataverse Project team is working on.
 
-The `Community Dev <https://github.com/orgs/IQSS/projects/2#column-5298405>`_ column of our project board is a good way to track integrations that are being worked on by the Dataverse Community but many are not listed and if you have an idea for an integration, please ask on the `dataverse-community <https://groups.google.com/forum/#!forum/dataverse-community>`_ mailing list if someone is already working on it.
+If you have an idea for an integration, please ask on the `dataverse-community <https://groups.google.com/forum/#!forum/dataverse-community>`_ mailing list if someone is already working on it.
 
 Many integrations take the form of "external tools". See the :doc:`external-tools` section for details. External tool makers should check out the :doc:`/api/external-tools` section of the API Guide.
 
diff --git a/doc/sphinx-guides/source/developers/documentation.rst b/doc/sphinx-guides/source/developers/documentation.rst
index d07b5b63f72..4ec011f2b24 100755
--- a/doc/sphinx-guides/source/developers/documentation.rst
+++ b/doc/sphinx-guides/source/developers/documentation.rst
@@ -18,7 +18,7 @@ If you find a typo or a small error in the documentation you can fix it using Gi
 - Under the **Write** tab, delete the long welcome message and write a few words about what you fixed.
 - Click **Create Pull Request**.
 
-That's it! Thank you for your contribution! Your pull request will be added manually to the main Dataverse Project board at https://github.com/orgs/IQSS/projects/2 and will go through code review and QA before it is merged into the "develop" branch. Along the way, developers might suggest changes or make them on your behalf. Once your pull request has been merged you will be listed as a contributor at https://github.com/IQSS/dataverse/graphs/contributors
+That's it! Thank you for your contribution! Your pull request will be added manually to the main Dataverse Project board at https://github.com/orgs/IQSS/projects/34 and will go through code review and QA before it is merged into the "develop" branch. Along the way, developers might suggest changes or make them on your behalf. Once your pull request has been merged you will be listed as a contributor at https://github.com/IQSS/dataverse/graphs/contributors
 
 Please see https://github.com/IQSS/dataverse/pull/5857 for an example of a quick fix that was merged (the "Files changed" tab shows how a typo was fixed).
 
diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst
index 12f3d5b81fd..c36c7d1e963 100644
--- a/doc/sphinx-guides/source/developers/version-control.rst
+++ b/doc/sphinx-guides/source/developers/version-control.rst
@@ -142,7 +142,7 @@ Feedback on the pull request template we use is welcome! Here's an example of a
 Make Sure Your Pull Request Has Been Advanced to Code Review
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Now that you've made your pull request, your goal is to make sure it appears in the "Code Review" column at https://github.com/orgs/IQSS/projects/2. 
+Now that you've made your pull request, your goal is to make sure it appears in the "Code Review" column at https://github.com/orgs/IQSS/projects/34.
 
 Look at https://github.com/IQSS/dataverse/blob/master/CONTRIBUTING.md for various ways to reach out to developers who have enough access to the GitHub repo to move your issue and pull request to the "Code Review" column.
 

From 2593310b4746fa7022d62c6955db3e69b4d03471 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 17 Jan 2024 16:13:50 -0500
Subject: [PATCH 481/546] use "Community Backlog" as "dev efforts" #9157

---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 44f8ae65135..1430ba951a6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -64,4 +64,4 @@ Thanks for your contribution!
 [Community Call]: https://dataverse.org/community-calls
 [dataverse-dev Google Group]: https://groups.google.com/group/dataverse-dev
 [community contributors]: https://docs.google.com/spreadsheets/d/1o9DD-MQ0WkrYaEFTD5rF_NtyL8aUISgURsAXSL7Budk/edit?usp=sharing
-[dev efforts]: https://github.com/orgs/IQSS/projects/2#column-5298405
+[dev efforts]: https://github.com/orgs/IQSS/projects/34/views/6

From 4f3a6ac3c038d920b7eb687a1eae6b7871e6eba8 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Thu, 18 Jan 2024 12:43:43 -0500
Subject: [PATCH 482/546] Add fix for SQL on guestbook service bean

---
 .../edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
index 01e6ecf7ff2..04f1ebf4bd0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
@@ -914,7 +914,7 @@ public void save(GuestbookResponse guestbookResponse) {
         
     public Long getDownloadCountByDataFileId(Long dataFileId) {
         // datafile id is null, will return 0
-        Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse  o  where o.datafile_id  = " + dataFileId + "and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
+        Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse  o  where o.datafile_id  = " + dataFileId + " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
         return (Long) query.getSingleResult();
     }
     

From eb6da705e1c2dcf4e657326a09646a47bec8cb88 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Thu, 18 Jan 2024 14:11:37 -0500
Subject: [PATCH 483/546] Add fix for same issue on another query reported by
 Jim Myers

---
 .../edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
index 04f1ebf4bd0..6c043b78941 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
@@ -432,7 +432,7 @@ public Long findCountByGuestbookId(Long guestbookId, Long dataverseId) {
             Query query = em.createNativeQuery(queryString);
             return (Long) query.getSingleResult();
         } else  {
-            String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + "and o.guestbook_id = " + guestbookId;
+            String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + " and o.guestbook_id = " + guestbookId;
             Query query = em.createNativeQuery(queryString);
             return (Long) query.getSingleResult();            
         }

From 867b7dcc8244e0ea4396ef1ef0dcadec40ce6b2c Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Thu, 18 Jan 2024 14:58:14 -0500
Subject: [PATCH 484/546] a better test setup (#3322)

---
 .../harvard/iq/dataverse/api/HarvestingServerIT.java   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
index e0f121305e0..ed9cbdaaed0 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingServerIT.java
@@ -299,8 +299,7 @@ public void testSetEditAPIandOAIlistSets() throws InterruptedException {
         // expected HTTP result codes. 
         
         String setName = UtilIT.getRandomString(6);
-        String persistentId = extraDatasetsIdentifiers.get(0); 
-        String setDef = "dsPersistentId:"+persistentId;
+        String setDefinition = "title:Sample";
 
         // Make sure the set does not exist
         String setPath = String.format("/api/harvest/server/oaisets/%s", setName);
@@ -313,20 +312,21 @@ public void testSetEditAPIandOAIlistSets() throws InterruptedException {
         // Create the set as admin user
         Response createSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
-                .body(jsonForTestSpec(setName, setDef))
+                .body(jsonForTestSpec(setName, setDefinition))
                 .post(createPath);
         assertEquals(201, createSetResponse.getStatusCode());
 
         // I. Test the Modify/Edit (POST method) functionality of the 
         // Dataverse OAI Sets API
         
-        String newDefinition = "title:New";
+        String persistentId = extraDatasetsIdentifiers.get(0); 
+        String newDefinition = "dsPersistentId:"+persistentId;
         String newDescription = "updated";
         
         // API Test 1. Try to modify the set as normal user, should fail
         Response editSetResponse = given()
                 .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey)
-                .body(jsonForEditSpec(setName, setDef, ""))
+                .body(jsonForEditSpec(setName, newDefinition, ""))
                 .put(setPath);
         logger.info("non-admin user editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode());
         assertEquals(400, editSetResponse.getStatusCode());

From 091629a6b9db2a3d1b879817a162b4309c040d15 Mon Sep 17 00:00:00 2001
From: "Balazs E. Pataki" <pataki@gmail.com>
Date: Fri, 19 Jan 2024 12:28:41 +0100
Subject: [PATCH 485/546] Add configuration for automatic XHTML/CSS/etc.
 reloading in IDEA in docker

When running Dataverse in Docker we still want to be able to just edit
things under src/main/webapp and then just reload the web page to see the
changes. To do this:

1. Mapped Payara /opt/payara/appserver/glassfish/domains/domain1/applications
folder to ./docker-dev-volumes/glassfish/applications
2. Added watchers.xml File watcher configuration, which can be imported into
IDEA to ...
3. ... run cpwebapp.sh to copy changed files under src/main/webapp to
./docker-dev-volumes/glassfish/applications/dataverse-{current version}
---
 docker-compose-dev.yml        |  2 ++
 scripts/intellij/cpwebapp.sh  | 33 +++++++++++++++++++++++++++++++++
 scripts/intellij/watchers.xml | 22 ++++++++++++++++++++++
 3 files changed, 57 insertions(+)
 create mode 100755 scripts/intellij/cpwebapp.sh
 create mode 100644 scripts/intellij/watchers.xml

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 10fe62ff6df..76a4c8a745d 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -60,6 +60,8 @@ services:
     volumes:
       - ./docker-dev-volumes/app/data:/dv
       - ./docker-dev-volumes/app/secrets:/secrets
+      # Map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh
+      - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications
       # Uncomment for changes to xhtml to be deployed immediately (if supported your IDE or toolchain).
       # Replace 6.0 with the current version.
       # - ./target/dataverse-6.0:/opt/payara/deployments/dataverse
diff --git a/scripts/intellij/cpwebapp.sh b/scripts/intellij/cpwebapp.sh
new file mode 100755
index 00000000000..6ecad367048
--- /dev/null
+++ b/scripts/intellij/cpwebapp.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+#
+# cpwebapp <project dir> <file in webapp>
+#
+# Usage:
+#
+# Add a File watcher by importing watchers.xml into IntelliJ IDEA, and let it do the copying whenever you save a
+# file under webapp.
+#
+#     https://www.jetbrains.com/help/idea/settings-tools-file-watchers.html
+#
+# Alternatively, you can add an External tool and trigger via menu or shortcut to do the copying manually:
+#
+#     https://www.jetbrains.com/help/idea/configuring-third-party-tools.html
+#
+
+PROJECT_DIR=$1
+FILE_TO_COPY=$2
+RELATIVE_PATH="${FILE_TO_COPY#$PROJECT_DIR/}"
+
+# Check if RELATIVE_PATH starts with 'src/main/webapp', otherwise ignore
+if [[ $RELATIVE_PATH == src/main/webapp* ]]; then
+    # Get current version. Any other way to do this? A simple VERSION file would help.
+    VERSION=`perl -ne 'print $1 if /<revision>(.*?)<\/revision>/' ./modules/dataverse-parent/pom.xml`
+    RELATIVE_PATH_WITHOUT_WEBAPP="${RELATIVE_PATH#src/main/webapp/}"
+    TARGET_DIR=./docker-dev-volumes/glassfish/applications/dataverse-$VERSION
+    TARGET_PATH="${TARGET_DIR}/${RELATIVE_PATH_WITHOUT_WEBAPP}"
+
+    mkdir -p "$(dirname "$TARGET_PATH")"
+    cp "$FILE_TO_COPY" "$TARGET_PATH"
+
+    echo "File $FILE_TO_COPY copied to $TARGET_PATH"
+fi
diff --git a/scripts/intellij/watchers.xml b/scripts/intellij/watchers.xml
new file mode 100644
index 00000000000..e118fea558f
--- /dev/null
+++ b/scripts/intellij/watchers.xml
@@ -0,0 +1,22 @@
+<TaskOptions>
+  <TaskOptions>
+    <option name="arguments" value="$ProjectFileDir$ $FilePath$" />
+    <option name="checkSyntaxErrors" value="false" />
+    <option name="description" />
+    <option name="exitCodeBehavior" value="ERROR" />
+    <option name="fileExtension" value="*" />
+    <option name="immediateSync" value="false" />
+    <option name="name" value="Dataverse webapp file copy on save" />
+    <option name="output" value="" />
+    <option name="outputFilters">
+      <array />
+    </option>
+    <option name="outputFromStdout" value="false" />
+    <option name="program" value="$ProjectFileDir$/scripts/intellij/cpwebapp.sh" />
+    <option name="runOnExternalChanges" value="true" />
+    <option name="scopeName" value="Current File" />
+    <option name="trackOnlyRoot" value="false" />
+    <option name="workingDir" value="$ProjectFileDir$" />
+    <envs />
+  </TaskOptions>
+</TaskOptions>
\ No newline at end of file

From cb08667a77a2ea2a51093c81e6048ee9b5b1ef30 Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Fri, 19 Jan 2024 15:10:17 -0500
Subject: [PATCH 486/546] #10249 correct typo in search API documentation

---
 doc/sphinx-guides/source/api/search.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst
index b941064f173..e8d0a0b3ea7 100755
--- a/doc/sphinx-guides/source/api/search.rst
+++ b/doc/sphinx-guides/source/api/search.rst
@@ -25,7 +25,7 @@ Parameters
 Name             Type     Description
 ===============  =======  ===========
 q                string   The search term or terms. Using "title:data" will search only the "title" field. "*" can be used as a wildcard either alone or adjacent to a term (i.e. "bird*"). For example, https://demo.dataverse.org/api/search?q=title:data . For a list of fields to search, please see https://github.com/IQSS/dataverse/issues/2558 (for now).
-type             string   Can be either "Dataverse", "dataset", or "file". Multiple "type" parameters can be used to include multiple types (i.e. ``type=dataset&type=file``). If omitted, all types will be returned.  For example, https://demo.dataverse.org/api/search?q=*&type=dataset
+type             string   Can be either "dataverse", "dataset", or "file". Multiple "type" parameters can be used to include multiple types (i.e. ``type=dataset&type=file``). If omitted, all types will be returned.  For example, https://demo.dataverse.org/api/search?q=*&type=dataset
 subtree          string   The identifier of the Dataverse collection to which the search should be narrowed. The subtree of this Dataverse collection and all its children will be searched.  Multiple "subtree" parameters can be used to include multiple Dataverse collections. For example, https://demo.dataverse.org/api/search?q=data&subtree=birds&subtree=cats .
 sort             string   The sort field. Supported values include "name" and "date". See example under "order".
 order            string   The order in which to sort. Can either be "asc" or "desc".  For example, https://demo.dataverse.org/api/search?q=data&sort=name&order=asc

From fc28b37a9bdc847f04f1988f922a1414b1c70527 Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Mon, 22 Jan 2024 13:17:38 -0500
Subject: [PATCH 487/546] bump google.library.version to 26.30.0 per Jim

---
 modules/dataverse-parent/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml
index e2d1ceec539..386d4934cb1 100644
--- a/modules/dataverse-parent/pom.xml
+++ b/modules/dataverse-parent/pom.xml
@@ -152,7 +152,7 @@
         <postgresql.version>42.6.0</postgresql.version>
         <solr.version>9.3.0</solr.version>
         <aws.version>1.12.290</aws.version>
-        <google.library.version>26.29.0</google.library.version>
+        <google.library.version>26.30.0</google.library.version>
     
         <!-- Basic libs, logging -->
         <jakartaee-api.version>8.0.0</jakartaee-api.version>

From a28e15a9316cb1f4d726ddd0afee6cd817324c3b Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 23 Jan 2024 10:22:55 -0500
Subject: [PATCH 488/546] #9686 display harvesting client info on cards of
 harvested objects

---
 .../iq/dataverse/DatasetServiceBean.java      | 48 -------------------
 .../iq/dataverse/DvObjectServiceBean.java     | 48 +++++++++++++++++++
 .../search/SearchIncludeFragment.java         | 41 ++++++++++------
 .../harvard/iq/dataverse/api/DatasetsIT.java  |  2 +
 4 files changed, 76 insertions(+), 63 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
index c6df2a2e1ab..4c4aafdd1ec 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
@@ -583,54 +583,6 @@ public Long getDatasetVersionCardImage(Long versionId, User user) {
         return null;
     }
 
-    /**
-     * Used to identify and properly display Harvested objects on the dataverse page.
-     *
-     * @param datasetIds
-     * @return
-     */
-    public Map<Long, String> getArchiveDescriptionsForHarvestedDatasets(Set<Long> datasetIds){
-        if (datasetIds == null || datasetIds.size() < 1) {
-            return null;
-        }
-
-        String datasetIdStr = StringUtils.join(datasetIds, ", ");
-
-        String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" + datasetIdStr + ")";
-        List<Object[]> searchResults;
-
-        try {
-            searchResults = em.createNativeQuery(qstr).getResultList();
-        } catch (Exception ex) {
-            searchResults = null;
-        }
-
-        if (searchResults == null) {
-            return null;
-        }
-
-        Map<Long, String> ret = new HashMap<>();
-
-        for (Object[] result : searchResults) {
-            Long dsId;
-            if (result[0] != null) {
-                try {
-                    dsId = (Long)result[0];
-                } catch (Exception ex) {
-                    dsId = null;
-                }
-                if (dsId == null) {
-                    continue;
-                }
-
-                ret.put(dsId, (String)result[1]);
-            }
-        }
-
-        return ret;
-    }
-
-
 
     public boolean isDatasetCardImageAvailable(DatasetVersion datasetVersion, User user) {
         if (datasetVersion == null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
index d4219c36149..58a246b364a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
@@ -383,6 +383,54 @@ public Map<Long, String> getObjectPathsByIds(Set<Long> objectIds){
         return ret;        
     }
     
+    /**
+     * Used to identify and properly display Harvested objects on the dataverse page.
+     *
+     * @param dvObjectIds
+     * @return
+     */
+    public Map<Long, String> getArchiveDescriptionsForHarvestedDvObjects(Set<Long> dvObjectIds){
+         
+        if (dvObjectIds == null || dvObjectIds.size() < 1) {
+            return null;
+        }
+        
+        String dvObjectIsString = StringUtils.join(dvObjectIds, ", ");
+        String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, DvObject d WHERE d.harvestingClient_id = h.id AND d.id IN (" + dvObjectIsString + ")";
+        List<Object[]> searchResults;
+
+        try {
+            searchResults = em.createNativeQuery(qstr).getResultList();
+        } catch (Exception ex) {
+            searchResults = null;
+        }
+
+        if (searchResults == null) {
+            return null;
+        }
+
+        Map<Long, String> ret = new HashMap<>();
+
+        for (Object[] result : searchResults) {
+            Long dvObjId;
+            if (result[0] != null) {
+                try {
+                    Integer castResult = (Integer) result[0];
+                    dvObjId =  Long.valueOf(castResult);                    
+                } catch (Exception ex) {
+                    dvObjId = null;
+                }
+                if (dvObjId == null) {
+                    continue;
+                }
+                ret.put(dvObjId, (String)result[1]);
+            }
+        }
+
+        return ret;
+    }
+
+    
     public String generateNewIdentifierByStoredProcedure() {
         StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure");
         query.execute();
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 5a5d8781726..939b39b94ef 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -1367,6 +1367,7 @@ public boolean canPublishDataset(Long datasetId){
     public void setDisplayCardValues() {
 
         Set<Long> harvestedDatasetIds = null;
+        Set<Long> harvestedFileIds = null;
         for (SolrSearchResult result : searchResultsList) {
             //logger.info("checking DisplayImage for the search result " + i++);
             if (result.getType().equals("dataverses")) {
@@ -1392,10 +1393,10 @@ public void setDisplayCardValues() {
             } else if (result.getType().equals("files")) {
                 result.setImageUrl(thumbnailServiceWrapper.getFileCardImageAsBase64Url(result));
                 if (result.isHarvested()) {
-                    if (harvestedDatasetIds == null) {
-                        harvestedDatasetIds = new HashSet<>();
+                    if (harvestedFileIds == null) {
+                        harvestedFileIds = new HashSet<>();
                     }
-                    harvestedDatasetIds.add(result.getParentIdAsLong());
+                    harvestedFileIds.add(result.getEntityId());
                 }
             }
         }
@@ -1407,25 +1408,35 @@ public void setDisplayCardValues() {
         // SQL query:
         
         if (harvestedDatasetIds != null) {
-            Map<Long, String> descriptionsForHarvestedDatasets = datasetService.getArchiveDescriptionsForHarvestedDatasets(harvestedDatasetIds);
-            if (descriptionsForHarvestedDatasets != null && descriptionsForHarvestedDatasets.size() > 0) {
+            Map<Long, String> descriptionsForHarvestedDatasets = dvObjectService.getArchiveDescriptionsForHarvestedDvObjects(harvestedDatasetIds);
+            if (descriptionsForHarvestedDatasets != null && !descriptionsForHarvestedDatasets.isEmpty()) {
                 for (SolrSearchResult result : searchResultsList) {
-                    if (result.isHarvested()) {
-                        if (result.getType().equals("files")) { 
-                            if (descriptionsForHarvestedDatasets.containsKey(result.getParentIdAsLong())) {
-                                result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getParentIdAsLong()));
-                            }
-                        } else if (result.getType().equals("datasets")) {
-                            if (descriptionsForHarvestedDatasets.containsKey(result.getEntityId())) {
-                                result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getEntityId()));
-                            }
-                        }
+                    if (result.isHarvested() && result.getType().equals("datasets") && descriptionsForHarvestedDatasets.containsKey(result.getEntityId())) {
+                        result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getEntityId()));
                     }
                 }
             }
             descriptionsForHarvestedDatasets = null;
             harvestedDatasetIds = null;
         }
+
+        if (harvestedFileIds != null) {
+
+            Map<Long, String> descriptionsForHarvestedFiles = dvObjectService.getArchiveDescriptionsForHarvestedDvObjects(harvestedFileIds);
+            if (descriptionsForHarvestedFiles != null && !descriptionsForHarvestedFiles.isEmpty()) {
+                for (SolrSearchResult result : searchResultsList) {
+                    if (result.isHarvested() && result.getType().equals("files") && descriptionsForHarvestedFiles.containsKey(result.getEntityId())) {
+
+                        result.setHarvestingDescription(descriptionsForHarvestedFiles.get(result.getEntityId()));
+
+                    }
+                }
+            }
+            descriptionsForHarvestedFiles = null;
+            harvestedDatasetIds = null;
+
+        }
+        
         
         // determine which of the objects are linked:
         
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 9b51be4b365..087db4858b2 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -2548,6 +2548,8 @@ public void testLinkingDatasets() {
                 EntityManager entityManager = entityManagerFactory.createEntityManager();
         entityManager.getTransaction().begin();
         // Do stuff...
+        //SEK 01/22/2024 - as of 6.2 harvestingclient_id will be on the dv object table
+        // so if this is ever implemented change will probably need to happen in the updatequery below
         entityManager.createNativeQuery("UPDATE dataset SET harvestingclient_id=1 WHERE id="+datasetId2).executeUpdate();
         entityManager.getTransaction().commit();
         entityManager.close();

From 88bae3bb295c26e7eda57d1ad5fbb34b67788542 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 23 Jan 2024 10:59:46 -0500
Subject: [PATCH 489/546] #9686 fix script names

---
 ...emetadata.sql => V6.1.0.1__9728-universe-variablemetadata.sql} | 0
 ...gclient-id.sql => V6.1.0.2__9686-move-harvestingclient-id.sql} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V5.13.0.3__9728-universe-variablemetadata.sql => V6.1.0.1__9728-universe-variablemetadata.sql} (100%)
 rename src/main/resources/db/migration/{V6.1.0.1__9686-move-harvestingclient-id.sql => V6.1.0.2__9686-move-harvestingclient-id.sql} (100%)

diff --git a/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql b/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql
similarity index 100%
rename from src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql
rename to src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql
diff --git a/src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql
similarity index 100%
rename from src/main/resources/db/migration/V6.1.0.1__9686-move-harvestingclient-id.sql
rename to src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql

From 7d27a9b64736780314ed3a203990d701db2ab399 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 23 Jan 2024 11:17:50 -0500
Subject: [PATCH 490/546] #10255 fix script name

---
 ...emetadata.sql => V6.1.0.1__9728-universe-variablemetadata.sql} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/main/resources/db/migration/{V5.13.0.3__9728-universe-variablemetadata.sql => V6.1.0.1__9728-universe-variablemetadata.sql} (100%)

diff --git a/src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql b/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql
similarity index 100%
rename from src/main/resources/db/migration/V5.13.0.3__9728-universe-variablemetadata.sql
rename to src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql

From 89b7f277ccddfc849611d7e08c16fcd3b2af3dcc Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 23 Jan 2024 13:46:16 -0500
Subject: [PATCH 491/546] Fix the issue with the thumbnail size

---
 src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java | 2 +-
 src/main/webapp/resources/css/structure.css                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
index ccf861ebdc8..03a0044a987 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
@@ -464,7 +464,7 @@ public static InputStream getLogoAsInputStream(Dataset dataset) {
 
             try {
                 in = ImageThumbConverter.getImageThumbnailAsInputStream(thumbnailFile.getStorageIO(),
-                        ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE).getInputStream();
+                        ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE).getInputStream();
             } catch (IOException ioex) {
                 logger.warning("getLogo(): Failed to get logo from DataFile for " + dataset.getStorageIdentifier()
                         + " (" + ioex.getMessage() + ")");
diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css
index 470c07d4534..b81cf2a2c47 100644
--- a/src/main/webapp/resources/css/structure.css
+++ b/src/main/webapp/resources/css/structure.css
@@ -483,7 +483,7 @@ span.search-term-match {font-weight: bold;}
 [id$='resultsTable'] div.card-title-icon-block span.label {vertical-align:15%}
 [id$='resultsTable'] div.card-preview-icon-block {width:48px; float:left; margin:4px 12px 6px 0;}
 [id$='resultsTable'] div.card-preview-icon-block a {display:block; height:48px; line-height:48px;}
-[id$='resultsTable'] div.card-preview-icon-block img {vertical-align:middle;}
+[id$='resultsTable'] div.card-preview-icon-block img {vertical-align:middle; max-width: 64px; max-height: 48px; padding-right: 10px;}
 [id$='resultsTable'] div.card-preview-icon-block span[class^='icon'],
 [id$='resultsTable'] div.card-preview-icon-block span[class^='glyphicon'] {font-size:2.8em;}
 

From 59690d4c9a2b5686e3b38f07c634fb32323400ff Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 24 Jan 2024 09:55:46 -0500
Subject: [PATCH 492/546] emphasize need to check flyway number before merging
 #10101

---
 .../source/developers/sql-upgrade-scripts.rst             | 2 ++
 doc/sphinx-guides/source/qa/qa-workflow.md                | 8 +++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst
index bace682b1b8..4689aeec0f2 100644
--- a/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst
+++ b/doc/sphinx-guides/source/developers/sql-upgrade-scripts.rst
@@ -21,6 +21,8 @@ If you are creating a new database table (which maps to an ``@Entity`` in JPA),
 
 If you are doing anything other than creating a new database table such as adding a column to an existing table, you must create or update a SQL upgrade script.
 
+.. _create-sql-script:
+
 How to Create a SQL Upgrade Script
 ----------------------------------
 
diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md
index df274d2405d..cb047a3086a 100644
--- a/doc/sphinx-guides/source/qa/qa-workflow.md
+++ b/doc/sphinx-guides/source/qa/qa-workflow.md
@@ -27,9 +27,11 @@
 
     Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed.
 
-1. Does it use a DB, Flyway script?
+1. Does it include a database migration script (Flyway)?
     
-    Good to know since it may collide with another existing one by version or it could be a one way transform of your DB so back up your test DB before. Also, happens during deployment so be on the lookout for any issues.
+    First, check the numbering in the filename of the script. It must be in line with the rules defined at {ref}`create-sql-script`. If the number is out of date (very common for older pull requests), do not merge and ask the developer to rename the script. Otherwise, deployment will fail.
+
+    Once you're sure the numbering is ok (the next available number, basically), back up your database and proceeed with testing.
 
 1. Validate the documentation.
 
@@ -94,4 +96,4 @@
 
 1. Delete merged branch
     
-    Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions.
\ No newline at end of file
+    Just a housekeeping move if the PR is from IQSS. Click the delete branch button where the merge button had been. There is no deletion for outside contributions.

From 5292682d6724e1b24cb4001768ce82d97d8dc771 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 24 Jan 2024 12:05:09 -0500
Subject: [PATCH 493/546] fix for #10251 - sync terms popup required code

---
 .../harvard/iq/dataverse/util/FileUtil.java   | 30 +++----------------
 1 file changed, 4 insertions(+), 26 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
index 776d04e98cc..8decf74fe13 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
@@ -1199,34 +1199,12 @@ public static boolean isGuestbookPopupRequired(DatasetVersion datasetVersion) {
     }
 
     public static boolean isTermsPopupRequired(DatasetVersion datasetVersion) {
-
-        if (datasetVersion == null) {
-            logger.fine("TermsPopup not required because datasetVersion is null.");
-            return false;
-        }
-        //0. if version is draft then Popup "not required"
-        if (!datasetVersion.isReleased()) {
-            logger.fine("TermsPopup not required because datasetVersion has not been released.");
+        Boolean answer = popupDueToStateOrTerms(datasetVersion);
+        if(answer == null) {
+            logger.fine("TermsPopup is not required.");
             return false;
         }
-        // 1. License and Terms of Use:
-        if (datasetVersion.getTermsOfUseAndAccess() != null) {
-            if (!License.CC0.equals(datasetVersion.getTermsOfUseAndAccess().getLicense())
-                    && !(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse() == null
-                    || datasetVersion.getTermsOfUseAndAccess().getTermsOfUse().equals(""))) {
-                logger.fine("TermsPopup required because of license or terms of use.");
-                return true;
-            }
-
-            // 2. Terms of Access:
-            if (!(datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess() == null) && !datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess().equals("")) {
-                logger.fine("TermsPopup required because of terms of access.");
-                return true;
-            }
-        }
-
-        logger.fine("TermsPopup is not required.");
-        return false;
+        return answer;
     }
     
     /**

From 51984163525453b7360dd0b89db8746b8d55c031 Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Wed, 24 Jan 2024 13:04:33 -0500
Subject: [PATCH 494/546] fix null issue found in #10251

---
 .../java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
index ca3f5b4bded..de3f4d2ab56 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
@@ -316,7 +316,7 @@ private void redirectToDownloadAPI(String downloadType, Long fileId, boolean gue
             Long fileMetadataId) {
         String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten,
                 fileMetadataId);
-        if (downloadType.equals("GlobusTransfer")) {
+        if ("GlobusTransfer".equals(downloadType)) {
             PrimeFaces.current().executeScript(URLTokenUtil.getScriptForUrl(fileDownloadUrl));
         } else {
             logger.fine("Redirecting to file download url: " + fileDownloadUrl);

From 96f2c95a26f6bf9d153a0b95f6cea7bdac7bd4ea Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 24 Jan 2024 14:40:12 -0500
Subject: [PATCH 495/546] minor tweaks #10101

---
 .../source/developers/making-releases.rst     |  2 ++
 doc/sphinx-guides/source/qa/overview.md       | 12 +++----
 .../source/qa/performance-tests.md            |  6 ++--
 doc/sphinx-guides/source/qa/qa-workflow.md    | 14 ++++----
 .../source/qa/test-automation.md              | 35 ++++++++++---------
 .../source/qa/testing-approach.md             | 14 ++++----
 .../source/qa/testing-infrastructure.md       |  4 +--
 7 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst
index 6b94282d55e..18ae34ee656 100755
--- a/doc/sphinx-guides/source/developers/making-releases.rst
+++ b/doc/sphinx-guides/source/developers/making-releases.rst
@@ -83,6 +83,8 @@ To test these images against our API test suite, go to the "alpha" workflow at h
 
 If there are failures, additional dependencies or settings may have been added to the "develop" workflow. Copy them over and try again.
 
+.. _build-guides:
+
 Build the Guides for the Release
 --------------------------------
 
diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md
index 01ab629db8c..f8eb7b19297 100644
--- a/doc/sphinx-guides/source/qa/overview.md
+++ b/doc/sphinx-guides/source/qa/overview.md
@@ -15,17 +15,17 @@ The basic workflow is as follows. Bugs or feature requests are submitted to GitH
 
 Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements.  
 
-Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Done column and the process repeats with the next PR until it is decided to {doc}`make a release </developers/making-releases>`.
+Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop branch, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Merged column (where it might be discussed at the next standup) and the process repeats with the next PR until it is decided to {doc}`make a release </developers/making-releases>`.
 
 ## Tips and Tricks
 
 - Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing.
 - When in doubt, ask someone. If you are confused about how something is working, it may be something you have missed, or it could be a documentation issue, or it could be a bug! Talk to the code reviewer and the contributor/developer for their opinion and advice.
-- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you act and makes it easier to identify any stack trace on failure.
-- When overloaded, do the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done.
-- When testing a bug fix, try reproducing the bug on the demo before testing the fix, that way you know you are taking the correct steps to verify that the fix worked.
+- Always tail the server.log file while testing. Open a terminal window to the test instance and `tail -F server.log`. This helps you get a real-time sense of what the server is doing when you interact with the application and makes it easier to identify any stack trace on failure.
+- When overloaded, QA the simple pull requests first to reduce the queue. It gives you a mental boost to complete something and reduces the perception of the amount of work still to be done.
+- When testing a bug fix, try reproducing the bug on the demo server before testing the fix. That way you know you are taking the correct steps to verify that the fix worked.
 - When testing an optional feature that requires configuration, do a smoke test without the feature configured and then with it configured. That way you know that folks using the standard config are unaffected by the option if they choose not to configure it.
-- Back up your DB before applying an irreversible DB update and you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup.
+- Back up your DB before applying an irreversible DB update when you are using a persistent/reusable platform. Just in case it fails, and you need to carry on testing something else you can use the backup.
 
 ## Release Cadence and Sprints
 
@@ -41,4 +41,4 @@ This type of approach is often used to give contributing developers confidence t
 
 ## Making a Release
 
-See {doc}`/developers/making-releases` in the Developer Guide.
\ No newline at end of file
+See {doc}`/developers/making-releases` in the Developer Guide.
diff --git a/doc/sphinx-guides/source/qa/performance-tests.md b/doc/sphinx-guides/source/qa/performance-tests.md
index 3fab0386eb0..404188735a2 100644
--- a/doc/sphinx-guides/source/qa/performance-tests.md
+++ b/doc/sphinx-guides/source/qa/performance-tests.md
@@ -7,7 +7,7 @@
 
 ## Introduction
 
-The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming it is done once near the end. Using a load-generating tool named {ref}`Locust <locust>`, it loads the statistically most loaded pages, according to Google Analytics, that is 50% homepage and 50% some type of dataset page. 
+The final testing activity before producing a release is performance testing. This could be done throughout the release cycle but since it is time-consuming, it is done once near the end. Using a load-generating tool named {ref}`Locust <locust>`, our scripts load the statistically most-loaded pages (according to Google Analytics): 50% homepage and 50% some type of dataset page. 
 
 Since dataset page weight also varies by the number of files, a selection of about 10 datasets with varying file counts is used. The pages are called randomly as a guest user with increasing levels of user load, from 1 user to 250 users. Typical daily loads in production are around the 50-user level. Though the simulated user level does have a modest amount of random think time before repeated calls, from 5-20 seconds, it is not a real-world load so direct comparisons to production are not reliable. Instead, we compare performance to prior versions of the product, and based on how that performed in production we have some idea whether this might be similar in performance or whether there is some undetected issue that appears under load, such as inefficient or too many DB queries per page.
 
@@ -19,11 +19,11 @@ Once the performance has been tested and recorded in a [Google spreadsheet](http
 
 ## Access
 
-Access to performance cluster instances requires ssh keys. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir. Access to the demo requires an ssh key, see Leonid. 
+Access to performance cluster instances requires ssh keys. The cluster itself is normally not running to reduce costs. To turn on the cluster, log on to the demo server and run the perfenv scripts from the centos default user dir.
 
 ## Special Notes ⚠️ 
 
-Please note the performance database is also used occasionally by Julian and the Curation team to generate prod reports so a courtesy check with Julian would be good before taking over the env.
+Please note the performance database is also used occasionally by members of the Curation team to generate prod reports so a courtesy check with them would be good before taking over the env.
 
 
 Executing the Performance Script
diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md
index cb047a3086a..3db17ecb8a4 100644
--- a/doc/sphinx-guides/source/qa/qa-workflow.md
+++ b/doc/sphinx-guides/source/qa/qa-workflow.md
@@ -23,9 +23,9 @@
 
     Small changes or fixes usually don’t have docs but new features or extensions of a feature or new configuration options should have documentation.
 
-1. Does it have or need release notes?
+1. Does it have or need a release note snippet?
 
-    Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed.
+    Same as for doc, just a heads up to an admin for something of note or especially upgrade instructions as needed. See also {ref}`writing-release-note-snippets` for what to expect in a release note snippet.
 
 1. Does it include a database migration script (Flyway)?
     
@@ -35,7 +35,7 @@
 
 1. Validate the documentation.
 
-    Build the doc using Jenkins, does it build without errors?
+    Build the doc using Jenkins or read the automated Read the Docs preview. Does it build without errors?
     Read it through for sense.
     Use it for test cases and to understand the feature.
 
@@ -88,11 +88,11 @@
     
     Click the "Merge pull request" button and be sure to use the "Create a merge commit" option to include this PR into the common develop branch.
 
-    Some of the reasons why we encourage using option over Rebase or Squash are:
+    Some of the reasons why we encourage using this option over Rebase or Squash are:
 
-        -Preserving commit history
-        -Clearer context and treaceability
-        -Easier collaboration, bug tracking and reverting
+    - Preservation of commit history
+    - Clearer context and treaceability
+    - Easier collaboration, bug tracking and reverting
 
 1. Delete merged branch
     
diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md
index c996b4cea8f..e4b3b12ec43 100644
--- a/doc/sphinx-guides/source/qa/test-automation.md
+++ b/doc/sphinx-guides/source/qa/test-automation.md
@@ -4,7 +4,7 @@
 :depth: 3
 ```
 
-## Introduction
+## Jenkins
 
 Jenkins is our primary tool for knowing if our API tests are passing. (Unit tests are executed locally by developers.)
 
@@ -12,28 +12,27 @@ You can find our Jenkins installation at <https://jenkins.dataverse.org>.
 
 Please note that while it has been open to the public in the past, it is currently firewalled off. We can poke a hole in the firewall for your IP address if necessary. Please get in touch. (You might also be interested in <https://github.com/IQSS/dataverse/issues/9916> which is about restoring the ability of contributors to see if their pull requests are passing API tests or not.)
 
-## Jobs
+### Jenkins Jobs
 
 Jenkins is organized into jobs. We'll highlight a few.
 
-### IQSS-dataverse-develop
+#### IQSS-dataverse-develop
 
-<https://jenkins.dataverse.org/job/IQSS-dataverse-develop>, which we will refer to as the "develop" job runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it!
+<https://jenkins.dataverse.org/job/IQSS-dataverse-develop>, which we will refer to as the "develop" job, runs after pull requests are merged. It is crucial that this job stays green (passing) because we always want to stay in a "release ready" state. If you notice that this job is failing, make noise about it!
 
-You can get to this job from the README at <https://github.com/IQSS/dataverse>.
+You can access this job from the README at <https://github.com/IQSS/dataverse>.
 
-### IQSS-Dataverse-Develop-PR
+#### IQSS-Dataverse-Develop-PR
 
 <https://jenkins.dataverse.org/job/IQSS-Dataverse-Develop-PR/> can be thought of as "PR jobs". It's a collection of jobs run on pull requests. Typically, you will navigate directly into the job (and it's particular build number) from a pull request. For example, from <https://github.com/IQSS/dataverse/pull/10044>, look for a check called "continuous-integration/jenkins/pr-merge". Clicking it will bring you to a particular build like <https://jenkins.dataverse.org/blue/organizations/jenkins/IQSS-Dataverse-Develop-PR/detail/PR-10044/10/pipeline> (build #10).
 
-### guides.dataverse.org
+#### guides.dataverse.org
 
-<https://jenkins.dataverse.org/job/guides.dataverse.org/> is what we use to build guides. See {doc}`/developers/making-releases` in the Developer Guide.
+<https://jenkins.dataverse.org/job/guides.dataverse.org/> is what we use to build guides. See {ref}`build-guides` in the Developer Guide for how this job is used at release time.
 
-### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal
+#### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal
 
-
-1. Log on to GitHub, go to projects, dataverse to see Kanban board, select a pull request to test from the QA queue. 
+1. Go to the QA column on our [project board](https://github.com/orgs/IQSS/projects/34), and select a pull request to test.
 
 1. From the pull request page, click the copy icon next to the pull request branch name.
 
@@ -50,15 +49,13 @@ You can get to this job from the README at <https://github.com/IQSS/dataverse>.
 
 1. Once complete, go to <https://dataverse-internal.iq.harvard.edu> and check that the deployment succeeded, and that the homepage displays the latest build number.
 
-1. If for some reason it didn’t deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara5/bin/asadmin list-applications; /usr/local/payara5/bin/asadmin undeploy dataverse-5.11.1; /usr/local/payara5/bin/asadmin deploy /tmp/dataverse-5.11.1.war`)
+1. If for some reason it didn't deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara6/bin/asadmin list-applications; /usr/local/payara6/bin/asadmin undeploy dataverse-6.1; /usr/local/payara6/bin/asadmin deploy /tmp/dataverse-6.1.war`)
 
-1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide.
+1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. In the case of a collision, ask the developer to rename the script.
 
 1.	Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting.
 
-
-
-## Checking if API Tests are Passing
+### Checking if API Tests are Passing on Jenkins
 
 If API tests are failing, you should not merge the pull request.
 
@@ -70,7 +67,7 @@ How can you know if API tests are passing? Here are the steps, by way of example
 - Under "All Tests", look at the duration for "edu.harvard.iq.dataverse.api". It should be ten minutes or higher. If it was only a few seconds, tests did not run.
 - Assuming tests ran, if there were failures, they should appear at the top under "All Failed Tests". Inform the author of the pull request about the error.
 
-## Diagnosing Failures
+### Diagnosing Failures on Jenkins
 
 API test failures can have multiple causes. As described above, from the "Test Result" page, you might see the failure under "All Failed Tests". However, the test could have failed because of some underlying system issue.
 
@@ -84,3 +81,7 @@ fatal: [localhost]: FAILED! => {"changed": false, "dest": "/tmp/payara.zip", "el
 ```
 
 In the example above, if Payara can't be downloaded, we're obviously going to have problems deploying Dataverse to it!
+
+## GitHub Actions
+
+We also use GitHub Actions. See <https://github.com/IQSS/dataverse/tree/develop/.github/workflows> for a list of actions.
diff --git a/doc/sphinx-guides/source/qa/testing-approach.md b/doc/sphinx-guides/source/qa/testing-approach.md
index 2c7241999a8..817161d02a0 100644
--- a/doc/sphinx-guides/source/qa/testing-approach.md
+++ b/doc/sphinx-guides/source/qa/testing-approach.md
@@ -8,25 +8,25 @@
 
 We use a risk-based, manual testing approach to achieve the most benefit with limited resources. This means we want to catch bugs where they are likely to exist, ensure core functions work, and failures do not have catastrophic results. In practice this means we do a brief positive check of core functions on each build called a smoke test, we test the most likely place for new bugs to exist, the area where things have changed, and attempt to prevent catastrophic failure by asking about the scope and reach of the code and how failures may occur. 
 
-If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by the Design group and by the community.
+If it seems possible through user error or some other occurrence that such a serious failure will occur, we try to make it happen in the test environment. If the code has a UI component, we also do a limited amount of browser compatibility testing using Chrome, Firefox, and Safari browsers. We do not currently do UX or accessibility testing on a regular basis, though both have been done product-wide by a Design group (in the past) and by the community.
 
 ## Examining a Pull Request for Test Cases
 
 ### What Problem Does It Solve?
 
-Read the top part of the pull request for a description, notes for reviewers, and usually a "how to test" section. Does it make sense? If not, read the underlying issue it closes, and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it.
+Read the top part of the pull request for a description, notes for reviewers, and usually a "how to test" section. Does it make sense? If not, read the underlying issue it closes and any release notes or documentation. Knowing in general what it does helps you to think about how to approach it.
 
 ### How is It Configured?
  
-Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. A sysadmin or superuser will need to follow these instructions so try them out. Plus, that is the only way you will get it working to test it! 
+Most pull requests do not have any special configuration and are enabled on deployment, but some do. Configuration is part of testing. A sysadmin or superuser will need to follow these instructions so make sure they are in the release note snippet and try them out. Plus, that is the only way you will get it working to test it! 
 
-Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields are added, try the export feature. Of course, try the suggestions under "how to test." Those may be sufficient, but you should always think about the pull request based on what it does.
+Identify test cases by examining the problem report or feature description and any documentation of functionality. Look for statements or assertions about functions, what it does, as well as conditions or conditional behavior. These become your test cases. Think about how someone might make a mistake using it and try it. Does it fail gracefully or in a confusing, or worse, damaging manner? Also, consider whether this pull request may interact with other functionality and try some spot checks there. For instance, if new metadata fields have been added, try the export feature. Of course, try the suggestions under "how to test." Those may be sufficient, but you should always think about the pull request based on what it does.
 
-Try adding, modifying, and deleting any objects involved. This is probably covered by using the feature but a good basic approach to keep in mind.
+Try adding, modifying, and deleting any objects involved. This is probably covered by using the feature, but this is a good basic approach to keep in mind.
 
 Make sure any server logging is appropriate. You should tail the server log while running your tests. Watch for unreported errors or stack traces especially chatty logging. If you do find a bug you will need to report the stack trace from the server.log. Err on the side of providing the developer too much of server.log rather than too little.
 
-Exercise the UI if there is one. We tend to use Chrome for most of my basic testing as it's used twice as much as the next most commonly used browser, according to our site's Google Analytics. First go through all the options in the UI. Then, if all works, spot-check using Firefox and Safari.
+Exercise the UI if there is one. We tend to use Chrome for most of our basic testing as it's used twice as much as the next most commonly-used browser, according to our site's Google Analytics. First go through all the options in the UI. Then, if all works, spot-check using Firefox and Safari.
 
 Check permissions. Is this feature limited to a specific set of users? Can it be accessed by a guest or by a non-privileged user? How about pasting a privileged page URL into a non-privileged user’s browser?
 
@@ -47,4 +47,4 @@ Think about risk. Is the feature or function part of a critical area such as per
 
 This workflow is fine for a single person testing a PR, one at a time. It would be awkward or impossible if there were multiple people wanting to test different PRs at the same time. If a developer is testing, they would likely just deploy to their dev environment. That might be ok, but is the env is fully configured enough to offer a real-world testing scenario? 
 
-An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built.
\ No newline at end of file
+An alternative might be to spin an EC2 branch on AWS, potentially using sample data. This can take some time so another option might be to spin up a few, persistent AWS instances with sample data this way, one per tester, and just deploy new builds there when you want to test. You could even configure Jenkins projects for each if desired to maintain consistency in how they’re built.
diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md
index 7a4bda626fc..c099076c458 100644
--- a/doc/sphinx-guides/source/qa/testing-infrastructure.md
+++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md
@@ -7,11 +7,11 @@
 
 ## Dataverse Internal
 
-To build and test a PR, we use a build named `IQSS_Dataverse_Internal` on <https://jenkins.dataverse.org>, which deploys the .war file to an AWS instance named <https://dataverse-internal.iq.harvard.edu>.
+To build and test a PR, we use a job called `IQSS_Dataverse_Internal` on <https://jenkins.dataverse.org> (see {doc}`test-automation`), which deploys the .war file to an AWS instance named <https://dataverse-internal.iq.harvard.edu>.
 
 ## Guides Server
 
-There is also a guides build project named `guides.dataverse.org`. Any test builds of guides are deployed to a named directory on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. 
+There is also a guides job called `guides.dataverse.org` (see {doc}`test-automation`). Any test builds of guides are deployed to a named directory on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. 
 
 Note that changes to guides can also be previewed on Read the Docs. In the pull request, look for a link like <https://dataverse-guide--10103.org.readthedocs.build/en/10103/qa/index.html>. This Read the Docs preview is also mentioned under also {doc}`/developers/documentation`.
 

From d06ded15c9da2024f75250bcc8a25c363ae1cdc9 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 24 Jan 2024 14:51:57 -0500
Subject: [PATCH 496/546] move "deploy to internal" out of "test automation"
 #10101

---
 doc/sphinx-guides/source/qa/qa-workflow.md    |  2 +-
 .../source/qa/test-automation.md              | 25 ------------------
 .../source/qa/testing-infrastructure.md       | 26 +++++++++++++++++++
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md
index 3db17ecb8a4..4654a7456d2 100644
--- a/doc/sphinx-guides/source/qa/qa-workflow.md
+++ b/doc/sphinx-guides/source/qa/qa-workflow.md
@@ -41,7 +41,7 @@
 
 1. Build and deploy the pull request.
 
-    Normally this is done using Jenkins and automatically deployed to the QA test machine.
+    Normally this is done using Jenkins and automatically deployed to the QA test machine. See {ref}`deploy-to-internal`.
 
 1. Configure if required
 
diff --git a/doc/sphinx-guides/source/qa/test-automation.md b/doc/sphinx-guides/source/qa/test-automation.md
index e4b3b12ec43..708d0f88e23 100644
--- a/doc/sphinx-guides/source/qa/test-automation.md
+++ b/doc/sphinx-guides/source/qa/test-automation.md
@@ -30,31 +30,6 @@ You can access this job from the README at <https://github.com/IQSS/dataverse>.
 
 <https://jenkins.dataverse.org/job/guides.dataverse.org/> is what we use to build guides. See {ref}`build-guides` in the Developer Guide for how this job is used at release time.
 
-#### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal
-
-1. Go to the QA column on our [project board](https://github.com/orgs/IQSS/projects/34), and select a pull request to test.
-
-1. From the pull request page, click the copy icon next to the pull request branch name.
-
-1. Log on to <https://jenkins.dataverse.org>, select the `IQSS_Dataverse_Internal` project, and configure the repository URL and branch specifier to match the ones from the pull request. For example:
-
-    * 8372-gdcc-xoai-library has IQSS implied
-        - **Repository URL:** https://github.com/IQSS/dataverse.git 
-        - **Branch specifier:** */8372-gdcc-xoai-library
-    * GlobalDataverseCommunityConsortium:GDCC/DC-3B
-        - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git 
-        - **Branch specifier:** */GDCC/DC-3B. 
-
-1. Click "Build Now" and note the build number in progress.
-
-1. Once complete, go to <https://dataverse-internal.iq.harvard.edu> and check that the deployment succeeded, and that the homepage displays the latest build number.
-
-1. If for some reason it didn't deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara6/bin/asadmin list-applications; /usr/local/payara6/bin/asadmin undeploy dataverse-6.1; /usr/local/payara6/bin/asadmin deploy /tmp/dataverse-6.1.war`)
-
-1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. In the case of a collision, ask the developer to rename the script.
-
-1.	Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting.
-
 ### Checking if API Tests are Passing on Jenkins
 
 If API tests are failing, you should not merge the pull request.
diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md
index c099076c458..804e4c0afe6 100644
--- a/doc/sphinx-guides/source/qa/testing-infrastructure.md
+++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md
@@ -9,6 +9,32 @@
 
 To build and test a PR, we use a job called `IQSS_Dataverse_Internal` on <https://jenkins.dataverse.org> (see {doc}`test-automation`), which deploys the .war file to an AWS instance named <https://dataverse-internal.iq.harvard.edu>.
 
+(deploy-to-internal)=
+### Building and Deploying a Pull Request from Jenkins to Dataverse-Internal
+
+1. Go to the QA column on our [project board](https://github.com/orgs/IQSS/projects/34), and select a pull request to test.
+
+1. From the pull request page, click the copy icon next to the pull request branch name.
+
+1. Log on to <https://jenkins.dataverse.org>, select the `IQSS_Dataverse_Internal` project, and configure the repository URL and branch specifier to match the ones from the pull request. For example:
+
+    * 8372-gdcc-xoai-library has IQSS implied
+        - **Repository URL:** https://github.com/IQSS/dataverse.git
+        - **Branch specifier:** */8372-gdcc-xoai-library
+    * GlobalDataverseCommunityConsortium:GDCC/DC-3B
+        - **Repository URL:** https://github.com/GlobalDataverseCommunityConsortium/dataverse.git
+        - **Branch specifier:** */GDCC/DC-3B.
+
+1. Click "Build Now" and note the build number in progress.
+
+1. Once complete, go to <https://dataverse-internal.iq.harvard.edu> and check that the deployment succeeded, and that the homepage displays the latest build number.
+
+1. If for some reason it didn't deploy, check the server.log file. It may just be a caching issue so try un-deploying, deleting cache, restarting, and re-deploying on the server (`su - dataverse` then `/usr/local/payara6/bin/asadmin list-applications; /usr/local/payara6/bin/asadmin undeploy dataverse-6.1; /usr/local/payara6/bin/asadmin deploy /tmp/dataverse-6.1.war`)
+
+1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. In the case of a collision, ask the developer to rename the script.
+
+1.	Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting.
+
 ## Guides Server
 
 There is also a guides job called `guides.dataverse.org` (see {doc}`test-automation`). Any test builds of guides are deployed to a named directory on guides.dataverse.org and can be found and tested by going to the existing guides, removing the part of the URL that contains the version, and browsing the resulting directory listing for the latest change. 

From 5ffc0589c75fe2fcf2584050ae5a477ddce27e06 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 24 Jan 2024 15:06:42 -0500
Subject: [PATCH 497/546] move testing approaches just below overview #10101

---
 doc/sphinx-guides/source/qa/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/qa/index.md b/doc/sphinx-guides/source/qa/index.md
index 937b352bccb..f16cd1d38fc 100644
--- a/doc/sphinx-guides/source/qa/index.md
+++ b/doc/sphinx-guides/source/qa/index.md
@@ -2,9 +2,9 @@
 
 ```{toctree}
 overview.md
+testing-approach.md
 testing-infrastructure.md
 qa-workflow.md
-testing-approach.md
 test-automation.md
 performance-tests.md
 ```

From 61abe519a429be60616cd61a56df4ad4f4aa52dd Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 24 Jan 2024 15:12:01 -0500
Subject: [PATCH 498/546] minor edits #10101

---
 doc/sphinx-guides/source/qa/overview.md    | 2 ++
 doc/sphinx-guides/source/qa/qa-workflow.md | 1 +
 2 files changed, 3 insertions(+)

diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md
index f8eb7b19297..64796357831 100644
--- a/doc/sphinx-guides/source/qa/overview.md
+++ b/doc/sphinx-guides/source/qa/overview.md
@@ -17,6 +17,8 @@ Before a pull request is moved to QA, it must be reviewed by a member of the dev
 
 Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop branch, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Merged column (where it might be discussed at the next standup) and the process repeats with the next PR until it is decided to {doc}`make a release </developers/making-releases>`.
 
+The complete suggested workflow can be found at {doc}`qa-workflow`.
+
 ## Tips and Tricks
 
 - Start testing simply, with the most obvious test. You don’t need to know all your tests upfront. As you gain comfort and understanding of how it works, try more tests until you are done. If it is a complex feature, jot down your tests in an outline format, some beforehand as a guide, and some after as things occur to you. Save the doc in a testing folder (on Google Drive). This potentially will help with future testing.
diff --git a/doc/sphinx-guides/source/qa/qa-workflow.md b/doc/sphinx-guides/source/qa/qa-workflow.md
index 4654a7456d2..9915fe97d98 100644
--- a/doc/sphinx-guides/source/qa/qa-workflow.md
+++ b/doc/sphinx-guides/source/qa/qa-workflow.md
@@ -4,6 +4,7 @@
 :local: 
 :depth: 3
 ```
+## Checklist
 
 1. Assign the PR you are working on to yourself.
 

From cad9e583732a568ff083999aba16941505a207f4 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Wed, 24 Jan 2024 15:20:17 -0500
Subject: [PATCH 499/546] add release note #10101

---
 doc/release-notes/10101-qa-guide.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/release-notes/10101-qa-guide.md

diff --git a/doc/release-notes/10101-qa-guide.md b/doc/release-notes/10101-qa-guide.md
new file mode 100644
index 00000000000..11fbd7df2c4
--- /dev/null
+++ b/doc/release-notes/10101-qa-guide.md
@@ -0,0 +1 @@
+A new QA Guide is intended mostly for the core development team but may be of interest to contributors.

From 743dbbc6655fd9e8bcab9db7b9df71a2fa4758db Mon Sep 17 00:00:00 2001
From: beep <pataki@gmail.com>
Date: Thu, 25 Jan 2024 08:37:24 +0100
Subject: [PATCH 500/546] Update docker-compose-dev.yml

Co-authored-by: Philip Durbin <philipdurbin@gmail.com>
---
 docker-compose-dev.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml
index 76a4c8a745d..6eab84092ed 100644
--- a/docker-compose-dev.yml
+++ b/docker-compose-dev.yml
@@ -60,8 +60,8 @@ services:
     volumes:
       - ./docker-dev-volumes/app/data:/dv
       - ./docker-dev-volumes/app/secrets:/secrets
-      # Map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh
-      - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications
+      # Uncomment to map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh
+      # - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications
       # Uncomment for changes to xhtml to be deployed immediately (if supported your IDE or toolchain).
       # Replace 6.0 with the current version.
       # - ./target/dataverse-6.0:/opt/payara/deployments/dataverse

From 9d124e760bba83b7baa46bb1f88ec453a6bf6e6a Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Thu, 25 Jan 2024 11:51:12 +0000
Subject: [PATCH 501/546] Refactor: GetLatestPublishedDatasetVersionCommand

---
 ...tLatestPublishedDatasetVersionCommand.java | 50 +++++++++----------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
index dd9a8112afe..9ba02ef750b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
@@ -17,7 +17,7 @@
 public class GetLatestPublishedDatasetVersionCommand extends AbstractCommand<DatasetVersion> {
     private final Dataset ds;
     private final boolean includeDeaccessioned;
-    private boolean checkPermsWhenDeaccessioned;
+    private final boolean checkPermsWhenDeaccessioned;
 
     public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) {
         this(aRequest, anAffectedDataset, false, false);
@@ -31,37 +31,35 @@ public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Datase
     }
 
     /*
-    * This command depending on the requested parameters will return:
-    * 
-    * If the user requested to include a deaccessioned dataset with the files, the command will return the deaccessioned version if the user has permissions to view the files. Otherwise, it will return null. 
-    * If the user requested to include a deaccessioned dataset but did not request the files, the command will return the deaccessioned version. 
-    * If the user did not request to include a deaccessioned dataset, the command will return the latest published version.
-    *  
-    */
+     * This command depending on the requested parameters will return:
+     *
+     * If the user requested to include a deaccessioned dataset with the files, the command will return the deaccessioned version if the user has permissions to view the files. Otherwise, it will return null.
+     * If the user requested to include a deaccessioned dataset but did not request the files, the command will return the deaccessioned version.
+     * If the user did not request to include a deaccessioned dataset, the command will return the latest published version.
+     *
+     */
     @Override
     public DatasetVersion execute(CommandContext ctxt) throws CommandException {
-
-        DatasetVersion dsv = null;
-
-        //We search of a released or deaccessioned version if it is requested.
-        for (DatasetVersion next : ds.getVersions()) {
-            if (next.isReleased() || (includeDeaccessioned && next.isDeaccessioned())){
-                dsv = next;
-                break;
-            }
+        DatasetVersion dsVersionResult = getReleaseOrDeaccessionedVersion();
+        if (dsVersionResult != null && userHasPermissionsOnDatasetVersion(dsVersionResult, checkPermsWhenDeaccessioned, ctxt, ds)) {
+            return dsVersionResult;
         }
+        return null;
+    }
 
-        //Checking permissions if the deaccessionedVersion was found and we are checking permissions because files were requested.
-        if(dsv != null && (dsv.isDeaccessioned() && checkPermsWhenDeaccessioned)){
-            //If the user has no permissions we return null
-            if(!ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset)){
-                dsv = null;
+    private DatasetVersion getReleaseOrDeaccessionedVersion() {
+        for (DatasetVersion dsVersion : ds.getVersions()) {
+            if (dsVersion.isReleased() || (includeDeaccessioned && dsVersion.isDeaccessioned())) {
+                return dsVersion;
             }
         }
-
-        return dsv;
+        return null;
     }
 
-
-
+    private boolean userHasPermissionsOnDatasetVersion(DatasetVersion dsVersionResult, boolean checkPermsWhenDeaccessioned, CommandContext ctxt, Dataset ds) {
+        if (dsVersionResult.isDeaccessioned() && checkPermsWhenDeaccessioned) {
+            return ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset);
+        }
+        return true;
+    }
 }

From e59907bf76553701c8d7ff16428a9cea9f132d96 Mon Sep 17 00:00:00 2001
From: GPortas <hey@gportas.me>
Date: Thu, 25 Jan 2024 11:55:13 +0000
Subject: [PATCH 502/546] Refactor: method name

---
 .../command/impl/GetLatestPublishedDatasetVersionCommand.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
index 9ba02ef750b..0afcbe2d0bb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
@@ -40,14 +40,14 @@ public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Datase
      */
     @Override
     public DatasetVersion execute(CommandContext ctxt) throws CommandException {
-        DatasetVersion dsVersionResult = getReleaseOrDeaccessionedVersion();
+        DatasetVersion dsVersionResult = getReleaseOrDeaccessionedDatasetVersion();
         if (dsVersionResult != null && userHasPermissionsOnDatasetVersion(dsVersionResult, checkPermsWhenDeaccessioned, ctxt, ds)) {
             return dsVersionResult;
         }
         return null;
     }
 
-    private DatasetVersion getReleaseOrDeaccessionedVersion() {
+    private DatasetVersion getReleaseOrDeaccessionedDatasetVersion() {
         for (DatasetVersion dsVersion : ds.getVersions()) {
             if (dsVersion.isReleased() || (includeDeaccessioned && dsVersion.isDeaccessioned())) {
                 return dsVersion;

From 252672ab68a52cd9b9d8e84b80ddb3f23df769b3 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 24 Jan 2024 14:44:52 -0500
Subject: [PATCH 503/546] Proposed fix in #10220 comments

---
 .../iq/dataverse/ThumbnailServiceWrapper.java | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index ae81a9326c4..7f56ce0cb27 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -5,11 +5,14 @@
  */
 package edu.harvard.iq.dataverse;
 
+import edu.harvard.iq.dataverse.dataaccess.DataAccess;
 import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
-
+import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.dataset.DatasetUtil;
 import edu.harvard.iq.dataverse.search.SolrSearchResult;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.logging.Logger;
@@ -170,17 +173,30 @@ public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean
 
         if (thumbnailFile == null) {
 
-            // We attempt to auto-select via the optimized, native query-based method
+            boolean hasDatasetLogo = false;
+            StorageIO<DvObject> storageIO = null;
+            try {
+                storageIO = DataAccess.getStorageIO(dataset);
+                if (!storageIO.isAuxObjectCached(DatasetUtil.datasetLogoFilenameFinal)) {
+                    // If not, return null/use the default, otherwise pass the logo URL
+                    hasDatasetLogo = true;
+                }
+            } catch (IOException ioex) {
+                logger.warning("getDatasetCardImageAsUrl(): Failed to initialize dataset StorageIO for "
+                        + dataset.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
+            }
+            // If no other logo we attempt to auto-select via the optimized, native
+            // query-based method
             // from the DatasetVersionService:
-            if (datasetVersionService.getThumbnailByVersionId(versionId) == null) {
+            if (!hasDatasetLogo && datasetVersionService.getThumbnailByVersionId(versionId) == null) {
                 return null;
             }
         }
-
         String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo";
         logger.fine("getDatasetCardImageAsUrl: " + url);
         this.dvobjectThumbnailsMap.put(datasetId,url);
         return url;
+        
     }
     
     // it's the responsibility of the user - to make sure the search result

From 2c989923fba155ef0fe56f46489c3eec77abb213 Mon Sep 17 00:00:00 2001
From: Jim Myers <qqmyers@hotmail.com>
Date: Wed, 24 Jan 2024 17:10:43 -0500
Subject: [PATCH 504/546] reverse logic

---
 .../java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index 7f56ce0cb27..b6ab23848e2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -177,7 +177,7 @@ public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean
             StorageIO<DvObject> storageIO = null;
             try {
                 storageIO = DataAccess.getStorageIO(dataset);
-                if (!storageIO.isAuxObjectCached(DatasetUtil.datasetLogoFilenameFinal)) {
+                if (storageIO.isAuxObjectCached(DatasetUtil.datasetLogoFilenameFinal)) {
                     // If not, return null/use the default, otherwise pass the logo URL
                     hasDatasetLogo = true;
                 }

From 77ba2932551c4a1015745ef2f911fbb5ff7c730d Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Thu, 25 Jan 2024 11:23:19 -0500
Subject: [PATCH 505/546] Revert "9686 move harvesting client"

---
 .../9686-move-harvesting-client-id.md         |  1 -
 .../edu/harvard/iq/dataverse/Dataset.java     | 14 ++++-
 .../iq/dataverse/DatasetServiceBean.java      | 48 +++++++++++++++++
 .../edu/harvard/iq/dataverse/DvObject.java    | 17 ------
 .../iq/dataverse/DvObjectServiceBean.java     | 48 -----------------
 .../api/imports/ImportServiceBean.java        |  5 --
 .../client/HarvestingClientServiceBean.java   |  4 +-
 .../dataverse/metrics/MetricsServiceBean.java | 52 +++++++++----------
 .../search/SearchIncludeFragment.java         | 41 ++++++---------
 ...6.1.0.2__9686-move-harvestingclient-id.sql | 14 -----
 .../harvard/iq/dataverse/api/DatasetsIT.java  |  2 -
 .../harvard/iq/dataverse/api/MetricsIT.java   | 17 +++---
 12 files changed, 112 insertions(+), 151 deletions(-)
 delete mode 100644 doc/release-notes/9686-move-harvesting-client-id.md
 delete mode 100644 src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql

diff --git a/doc/release-notes/9686-move-harvesting-client-id.md b/doc/release-notes/9686-move-harvesting-client-id.md
deleted file mode 100644
index 110fcc6ca6e..00000000000
--- a/doc/release-notes/9686-move-harvesting-client-id.md
+++ /dev/null
@@ -1 +0,0 @@
-With this release the harvesting client id will be available for harvested files. A database update will copy the id to previously harvested files./
diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
index e2788e6acc6..a2f560bc959 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
@@ -752,9 +752,21 @@ public void setDatasetExternalCitations(List<DatasetExternalCitations> datasetEx
         this.datasetExternalCitations = datasetExternalCitations;
     }
 
+    @ManyToOne
+    @JoinColumn(name="harvestingClient_id")
+    private  HarvestingClient harvestedFrom;
 
-    
+    public HarvestingClient getHarvestedFrom() {
+        return this.harvestedFrom;
+    }
 
+    public void setHarvestedFrom(HarvestingClient harvestingClientConfig) {
+        this.harvestedFrom = harvestingClientConfig;
+    }
+    
+    public boolean isHarvested() {
+        return this.harvestedFrom != null;
+    }
 
     private String harvestIdentifier;
      
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
index 4c4aafdd1ec..c6df2a2e1ab 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
@@ -583,6 +583,54 @@ public Long getDatasetVersionCardImage(Long versionId, User user) {
         return null;
     }
 
+    /**
+     * Used to identify and properly display Harvested objects on the dataverse page.
+     *
+     * @param datasetIds
+     * @return
+     */
+    public Map<Long, String> getArchiveDescriptionsForHarvestedDatasets(Set<Long> datasetIds){
+        if (datasetIds == null || datasetIds.size() < 1) {
+            return null;
+        }
+
+        String datasetIdStr = StringUtils.join(datasetIds, ", ");
+
+        String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, dataset d WHERE d.harvestingClient_id = h.id AND d.id IN (" + datasetIdStr + ")";
+        List<Object[]> searchResults;
+
+        try {
+            searchResults = em.createNativeQuery(qstr).getResultList();
+        } catch (Exception ex) {
+            searchResults = null;
+        }
+
+        if (searchResults == null) {
+            return null;
+        }
+
+        Map<Long, String> ret = new HashMap<>();
+
+        for (Object[] result : searchResults) {
+            Long dsId;
+            if (result[0] != null) {
+                try {
+                    dsId = (Long)result[0];
+                } catch (Exception ex) {
+                    dsId = null;
+                }
+                if (dsId == null) {
+                    continue;
+                }
+
+                ret.put(dsId, (String)result[1]);
+            }
+        }
+
+        return ret;
+    }
+
+
 
     public boolean isDatasetCardImageAvailable(DatasetVersion datasetVersion, User user) {
         if (datasetVersion == null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
index 46955f52878..cc5d7620969 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -1,7 +1,6 @@
 package edu.harvard.iq.dataverse;
 
 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
-import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
 import edu.harvard.iq.dataverse.pidproviders.PidUtil;
 import edu.harvard.iq.dataverse.storageuse.StorageQuota;
 
@@ -372,22 +371,6 @@ public GlobalId getGlobalId() {
         return globalId;
     }
     
-    @ManyToOne
-    @JoinColumn(name="harvestingClient_id")
-    private  HarvestingClient harvestedFrom;
-
-    public HarvestingClient getHarvestedFrom() {
-        return this.harvestedFrom;
-    }
-
-    public void setHarvestedFrom(HarvestingClient harvestingClientConfig) {
-        this.harvestedFrom = harvestingClientConfig;
-    }
-    
-    public boolean isHarvested() {
-        return this.harvestedFrom != null;
-    }
-    
     public abstract <T> T accept(Visitor<T> v);
 
     @Override
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
index 58a246b364a..d4219c36149 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
@@ -383,54 +383,6 @@ public Map<Long, String> getObjectPathsByIds(Set<Long> objectIds){
         return ret;        
     }
     
-    /**
-     * Used to identify and properly display Harvested objects on the dataverse page.
-     *
-     * @param dvObjectIds
-     * @return
-     */
-    public Map<Long, String> getArchiveDescriptionsForHarvestedDvObjects(Set<Long> dvObjectIds){
-         
-        if (dvObjectIds == null || dvObjectIds.size() < 1) {
-            return null;
-        }
-        
-        String dvObjectIsString = StringUtils.join(dvObjectIds, ", ");
-        String qstr = "SELECT d.id, h.archiveDescription FROM harvestingClient h, DvObject d WHERE d.harvestingClient_id = h.id AND d.id IN (" + dvObjectIsString + ")";
-        List<Object[]> searchResults;
-
-        try {
-            searchResults = em.createNativeQuery(qstr).getResultList();
-        } catch (Exception ex) {
-            searchResults = null;
-        }
-
-        if (searchResults == null) {
-            return null;
-        }
-
-        Map<Long, String> ret = new HashMap<>();
-
-        for (Object[] result : searchResults) {
-            Long dvObjId;
-            if (result[0] != null) {
-                try {
-                    Integer castResult = (Integer) result[0];
-                    dvObjId =  Long.valueOf(castResult);                    
-                } catch (Exception ex) {
-                    dvObjId = null;
-                }
-                if (dvObjId == null) {
-                    continue;
-                }
-                ret.put(dvObjId, (String)result[1]);
-            }
-        }
-
-        return ret;
-    }
-
-    
     public String generateNewIdentifierByStoredProcedure() {
         StoredProcedureQuery query = this.em.createNamedStoredProcedureQuery("Dataset.generateIdentifierFromStoredProcedure");
         query.execute();
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java
index c5812403f31..c17ba909230 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java
@@ -332,11 +332,6 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve
             
             Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId().asString());
 
-            //adding the harvesting client id to harvested files #9686
-            for (DataFile df : ds.getFiles()){
-                df.setHarvestedFrom(harvestingClient);
-            }  
-           
             if (existingDs != null) {
                 // If this dataset already exists IN ANOTHER DATAVERSE
                 // we are just going to skip it!
diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java
index 5747c64d217..7ec6d75a41c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java
@@ -199,8 +199,8 @@ public void recordHarvestJobStatus(Long hcId, Date finishTime, int harvestedCoun
     
     public Long getNumberOfHarvestedDatasetsByAllClients() {
         try {
-            return (Long) em.createNativeQuery("SELECT count(d.id) FROM dvobject d "
-                    + " WHERE d.harvestingclient_id IS NOT NULL and d.dtype = 'Dataset'").getSingleResult();
+            return (Long) em.createNativeQuery("SELECT count(d.id) FROM dataset d "
+                    + " WHERE d.harvestingclient_id IS NOT NULL").getSingleResult();
 
         } catch (Exception ex) {
             logger.info("Warning: exception looking up the total number of harvested datasets: " + ex.getMessage());
diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
index 9ae0c7cbb8f..1b5619c53e0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java
@@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
                 + "from datasetversion\n"
                 + "where versionstate='RELEASED' \n"
                 + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n")
-                + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
-                + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n "  : "")
+                + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "")
+                + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n "  : "")
                 + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
                 + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n")
                 + "group by dataset_id) as subq group by subq.date order by date;"
@@ -156,11 +156,11 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat
      * @param d
      */
     public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
-        String dataLocationLine = "(date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n";
+        String dataLocationLine = "(date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
-            String harvestBaseLine = "(date_trunc('month', createtime) <=  to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n";
+            String harvestBaseLine = "(date_trunc('month', createtime) <=  to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n";
             if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
                 dataLocationLine = harvestBaseLine; // replace
             } else if (DATA_LOCATION_ALL.equals(dataLocation)) {
@@ -189,7 +189,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
                         + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
                         + "from datasetversion\n"
                         + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                        + "join dvobject on dvobject.id = dataset.id\n"
+                        + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
                         + "where versionstate='RELEASED' \n"
                         + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
                         + "and \n"
@@ -198,6 +198,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
             +") sub_temp"
         );
         logger.log(Level.FINE, "Metric query: {0}", query);
+
         return (long) query.getSingleResult();
     }
 
@@ -206,17 +207,16 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
         // A published local datasets may have more than one released version!
         // So that's why we have to jump through some extra hoops below
         // in order to select the latest one:
-        String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n"
-                + "(\n"
-                + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
-                + "       from datasetversion\n"
-                + "       join dataset on dataset.id = datasetversion.dataset_id\n"
-                + "       join dvobject on dataset.id = dvobject.id\n"
-                + "       where versionstate='RELEASED'\n"
-                + "       and dvobject.harvestingclient_id is null"
-                + "       and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
-                + "       group by dataset_id\n"
-                + "))\n";
+        String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" +
+                "(\n" +
+                "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" +
+                "       from datasetversion\n" +
+                "       join dataset on dataset.id = datasetversion.dataset_id\n" +
+                "       where versionstate='RELEASED'\n" +
+                "       	     and dataset.harvestingclient_id is null\n" +
+                "       	     and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
+                "       group by dataset_id\n" +
+                "))\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
@@ -225,7 +225,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
             // so the query is simpler:
             String harvestOriginClause = "(\n" +
                     "   datasetversion.dataset_id = dataset.id\n" +
-                    "   AND dvobject.harvestingclient_id IS NOT null \n" +
+                    "   AND dataset.harvestingclient_id IS NOT null \n" +
                     "   AND date_trunc('month', datasetversion.createtime) <=  to_date('" + yyyymm + "','YYYY-MM')\n" +
                     ")\n";
 
@@ -244,7 +244,7 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
                 + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n"
                 + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n"
                 + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n"
-                + "JOIN dvobject ON dvobject.id = dataset.id\n"
+                + ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n")
                 + "WHERE\n"
                 + originClause
                 + "AND datasetfieldtype.name = 'subject'\n"
@@ -258,11 +258,11 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
     }
 
     public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
-        String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n";
+        String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n";
 
         if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
             //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated
-            String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n";
+            String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n";
             if (DATA_LOCATION_REMOTE.equals(dataLocation)) {
                 dataLocationLine = harvestBaseLine; // replace
             } else if (DATA_LOCATION_ALL.equals(dataLocation)) {
@@ -276,7 +276,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
                         + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n"
                         + "from datasetversion\n"
                         + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                        + "join dvobject on dvobject.id = dataset.id\n"
+                        + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
                         + "where versionstate='RELEASED' \n"
                         + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
                         + "and \n"
@@ -304,7 +304,7 @@ public JsonArray filesTimeSeries(Dataverse d) {
                         + "where datasetversion.id=filemetadata.datasetversion_id\n"
                         + "and versionstate='RELEASED' \n"
                         + "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n"
-                        + "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n "
+                        + "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n "
                         + ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ")
                         + "group by filemetadata.id) as subq group by subq.date order by date;");
         logger.log(Level.FINE, "Metric query: {0}", query);
@@ -327,11 +327,11 @@ public long filesToMonth(String yyyymm, Dataverse d) {
                 + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
                 + "from datasetversion\n"
                 + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                +  "join dvobject on dvobject.id = dataset.id\n"
+                + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
                 + "where versionstate='RELEASED'\n"
                 + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
                 + "and date_trunc('month', releasetime) <=  to_date('" + yyyymm + "','YYYY-MM')\n"
-                + "and dvobject.harvestingclient_id is null\n"
+                + "and dataset.harvestingclient_id is null\n"
                 + "group by dataset_id \n"
                 + ");"
         );
@@ -350,11 +350,11 @@ public long filesPastDays(int days, Dataverse d) {
                 + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
                 + "from datasetversion\n"
                 + "join dataset on dataset.id = datasetversion.dataset_id\n"
-                + "join dvobject on dvobject.id = dataset.id\n"
+                + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
                 + "where versionstate='RELEASED'\n"
                 + "and releasetime > current_date - interval '" + days + "' day\n"
                 + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
-                + "and dvobject.harvestingclient_id is null\n"
+                + "and dataset.harvestingclient_id is null\n"
                 + "group by dataset_id \n"
                 + ");"
         );
diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
index 939b39b94ef..5a5d8781726 100644
--- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
+++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java
@@ -1367,7 +1367,6 @@ public boolean canPublishDataset(Long datasetId){
     public void setDisplayCardValues() {
 
         Set<Long> harvestedDatasetIds = null;
-        Set<Long> harvestedFileIds = null;
         for (SolrSearchResult result : searchResultsList) {
             //logger.info("checking DisplayImage for the search result " + i++);
             if (result.getType().equals("dataverses")) {
@@ -1393,10 +1392,10 @@ public void setDisplayCardValues() {
             } else if (result.getType().equals("files")) {
                 result.setImageUrl(thumbnailServiceWrapper.getFileCardImageAsBase64Url(result));
                 if (result.isHarvested()) {
-                    if (harvestedFileIds == null) {
-                        harvestedFileIds = new HashSet<>();
+                    if (harvestedDatasetIds == null) {
+                        harvestedDatasetIds = new HashSet<>();
                     }
-                    harvestedFileIds.add(result.getEntityId());
+                    harvestedDatasetIds.add(result.getParentIdAsLong());
                 }
             }
         }
@@ -1408,35 +1407,25 @@ public void setDisplayCardValues() {
         // SQL query:
         
         if (harvestedDatasetIds != null) {
-            Map<Long, String> descriptionsForHarvestedDatasets = dvObjectService.getArchiveDescriptionsForHarvestedDvObjects(harvestedDatasetIds);
-            if (descriptionsForHarvestedDatasets != null && !descriptionsForHarvestedDatasets.isEmpty()) {
+            Map<Long, String> descriptionsForHarvestedDatasets = datasetService.getArchiveDescriptionsForHarvestedDatasets(harvestedDatasetIds);
+            if (descriptionsForHarvestedDatasets != null && descriptionsForHarvestedDatasets.size() > 0) {
                 for (SolrSearchResult result : searchResultsList) {
-                    if (result.isHarvested() && result.getType().equals("datasets") && descriptionsForHarvestedDatasets.containsKey(result.getEntityId())) {
-                        result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getEntityId()));
+                    if (result.isHarvested()) {
+                        if (result.getType().equals("files")) { 
+                            if (descriptionsForHarvestedDatasets.containsKey(result.getParentIdAsLong())) {
+                                result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getParentIdAsLong()));
+                            }
+                        } else if (result.getType().equals("datasets")) {
+                            if (descriptionsForHarvestedDatasets.containsKey(result.getEntityId())) {
+                                result.setHarvestingDescription(descriptionsForHarvestedDatasets.get(result.getEntityId()));
+                            }
+                        }
                     }
                 }
             }
             descriptionsForHarvestedDatasets = null;
             harvestedDatasetIds = null;
         }
-
-        if (harvestedFileIds != null) {
-
-            Map<Long, String> descriptionsForHarvestedFiles = dvObjectService.getArchiveDescriptionsForHarvestedDvObjects(harvestedFileIds);
-            if (descriptionsForHarvestedFiles != null && !descriptionsForHarvestedFiles.isEmpty()) {
-                for (SolrSearchResult result : searchResultsList) {
-                    if (result.isHarvested() && result.getType().equals("files") && descriptionsForHarvestedFiles.containsKey(result.getEntityId())) {
-
-                        result.setHarvestingDescription(descriptionsForHarvestedFiles.get(result.getEntityId()));
-
-                    }
-                }
-            }
-            descriptionsForHarvestedFiles = null;
-            harvestedDatasetIds = null;
-
-        }
-        
         
         // determine which of the objects are linked:
         
diff --git a/src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql b/src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql
deleted file mode 100644
index 67ba026745f..00000000000
--- a/src/main/resources/db/migration/V6.1.0.2__9686-move-harvestingclient-id.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS harvestingclient_id BIGINT;
-
---add harvesting client id to dvobject records of harvested datasets
-update dvobject dvo set harvestingclient_id = s.harvestingclient_id from
-(select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s
-where s.id = dvo.id; 
-
---add harvesting client id to dvobject records of harvested files
-update dvobject dvo set harvestingclient_id = s.harvestingclient_id from
-(select id, harvestingclient_id from dataset d where d.harvestingclient_id is not null) s
-where s.id = dvo.owner_id;
-
-ALTER TABLE dataset drop COLUMN IF EXISTS harvestingclient_id;
-
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 087db4858b2..9b51be4b365 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -2548,8 +2548,6 @@ public void testLinkingDatasets() {
                 EntityManager entityManager = entityManagerFactory.createEntityManager();
         entityManager.getTransaction().begin();
         // Do stuff...
-        //SEK 01/22/2024 - as of 6.2 harvestingclient_id will be on the dv object table
-        // so if this is ever implemented change will probably need to happen in the updatequery below
         entityManager.createNativeQuery("UPDATE dataset SET harvestingclient_id=1 WHERE id="+datasetId2).executeUpdate();
         entityManager.getTransaction().commit();
         entityManager.close();
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
index 1425b7bc5d9..e3328eefb4a 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java
@@ -5,8 +5,6 @@
 import edu.harvard.iq.dataverse.metrics.MetricsUtil;
 import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
 import static jakarta.ws.rs.core.Response.Status.OK;
-import java.time.LocalDate;
-import java.time.format.DateTimeFormatter;
 import org.junit.jupiter.api.AfterAll;
 
 import org.junit.jupiter.api.BeforeAll;
@@ -18,13 +16,10 @@
 //To improve these tests we should try adding data and see if the number DOESN'T
 //go up to show that the caching worked
 public class MetricsIT {
-    
-    private static String yyyymm;
 
     @BeforeAll
     public static void setUpClass() {
         RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
-        yyyymm = LocalDate.now().format(DateTimeFormatter.ofPattern(MetricsUtil.YEAR_AND_MONTH_PATTERN));
         UtilIT.clearMetricCache();
     }
 
@@ -35,7 +30,8 @@ public static void cleanUpClass() {
 
     @Test
     public void testGetDataversesToMonth() {
-
+        String yyyymm = "2018-04";
+//        yyyymm = null;
         Response response = UtilIT.metricsDataversesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
         response.then().assertThat()
@@ -58,7 +54,8 @@ public void testGetDataversesToMonth() {
 
     @Test
     public void testGetDatasetsToMonth() {
-
+        String yyyymm = "2018-04";
+//        yyyymm = null;
         Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null);
         String precache = response.prettyPrint();
         response.then().assertThat()
@@ -80,7 +77,8 @@ public void testGetDatasetsToMonth() {
 
     @Test
     public void testGetFilesToMonth() {
-
+        String yyyymm = "2018-04";
+//        yyyymm = null;
         Response response = UtilIT.metricsFilesToMonth(yyyymm, null);
         String precache = response.prettyPrint();
         response.then().assertThat()
@@ -102,7 +100,8 @@ public void testGetFilesToMonth() {
 
     @Test
     public void testGetDownloadsToMonth() {
-
+        String yyyymm = "2018-04";
+//        yyyymm = null;
         Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null);
         String precache = response.prettyPrint();
         response.then().assertThat()

From 994cf18e5c91245404830ef7e03d682c68a43538 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 25 Jan 2024 16:34:16 -0500
Subject: [PATCH 506/546] add "running Dataverse in docker", other cleanup
 #10238

---
 doc/sphinx-guides/source/container/index.rst  | 20 ++------------
 doc/sphinx-guides/source/container/intro.rst  | 26 ++++++++++++++++++
 .../source/container/running/backend-dev.rst  |  7 +++++
 .../source/container/running/demo.rst         | 27 +++++++++++++++++++
 .../source/container/running/frontend-dev.rst |  7 +++++
 .../source/container/running/index.rst        | 12 +++++++++
 .../container/running/metadata-blocks.rst     |  9 +++++++
 .../source/container/running/production.rst   | 11 ++++++++
 docker/compose/demo/compose.yml               |  0
 9 files changed, 101 insertions(+), 18 deletions(-)
 create mode 100644 doc/sphinx-guides/source/container/intro.rst
 create mode 100644 doc/sphinx-guides/source/container/running/backend-dev.rst
 create mode 100644 doc/sphinx-guides/source/container/running/demo.rst
 create mode 100644 doc/sphinx-guides/source/container/running/frontend-dev.rst
 create mode 100755 doc/sphinx-guides/source/container/running/index.rst
 create mode 100644 doc/sphinx-guides/source/container/running/metadata-blocks.rst
 create mode 100644 doc/sphinx-guides/source/container/running/production.rst
 create mode 100644 docker/compose/demo/compose.yml

diff --git a/doc/sphinx-guides/source/container/index.rst b/doc/sphinx-guides/source/container/index.rst
index 4bbc87a4845..abf871dd340 100644
--- a/doc/sphinx-guides/source/container/index.rst
+++ b/doc/sphinx-guides/source/container/index.rst
@@ -1,28 +1,12 @@
 Container Guide
 ===============
 
-Running the Dataverse software in containers is quite different than in a :doc:`standard installation <../installation/prep>`.
-
-Both approaches have pros and cons. These days, containers are very often used for development and testing,
-but there is an ever rising move toward running applications in the cloud using container technology.
-
-**NOTE:**
-**As the Institute for Quantitative Social Sciences (IQSS) at Harvard is running a standard, non-containerized installation,
-container support described in this guide is mostly created and maintained by the Dataverse community on a best-effort
-basis.**
-
-This guide is *not* about installation on technology like Docker Swarm, Kubernetes, Rancher or other
-solutions to run containers in production. There is the `Dataverse on K8s project <https://k8s-docs.gdcc.io>`_ for this
-purpose, as mentioned in the :doc:`/developers/containers` section of the Developer Guide.
-
-This guide focuses on describing the container images managed from the main Dataverse repository (again: by the
-community, not IQSS), their features and limitations. Instructions on how to build the images yourself and how to
-develop and extend them further are provided.
-
 **Contents:**
 
 .. toctree::
 
+  intro
+  running/index
   dev-usage
   base-image
   app-image
diff --git a/doc/sphinx-guides/source/container/intro.rst b/doc/sphinx-guides/source/container/intro.rst
new file mode 100644
index 00000000000..94b2c99f0d1
--- /dev/null
+++ b/doc/sphinx-guides/source/container/intro.rst
@@ -0,0 +1,26 @@
+Introduction
+============
+
+Dataverse in containers!
+
+.. contents:: |toctitle|
+        :local:
+
+Intended Audience
+-----------------
+
+This guide is intended for anyone who wants to run Dataverse in containers. This is potentially a wide audience, from sysadmins interested in running Dataverse in production in containers (not recommended yet) to contributors working on a bug fix (encouraged!).
+
+.. _getting-help-containers:
+
+Getting Help
+------------
+
+Please ask in #containers at https://chat.dataverse.org
+
+.. _helping-containers:
+
+Helping with the Containerization Effort
+----------------------------------------
+
+In 2023 the Containerization Working Group started meeting regularly. All are welcome to join! We talk in #containers at https://chat.dataverse.org and have a regular video call. For details, please visit https://ct.gdcc.io 
diff --git a/doc/sphinx-guides/source/container/running/backend-dev.rst b/doc/sphinx-guides/source/container/running/backend-dev.rst
new file mode 100644
index 00000000000..45aa4450bfb
--- /dev/null
+++ b/doc/sphinx-guides/source/container/running/backend-dev.rst
@@ -0,0 +1,7 @@
+Backend Development
+===================
+
+.. contents:: |toctitle|
+	:local:
+
+See :doc:`../dev-usage`.
diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst
new file mode 100644
index 00000000000..71e45f5028e
--- /dev/null
+++ b/doc/sphinx-guides/source/container/running/demo.rst
@@ -0,0 +1,27 @@
+Demo or Evaluation
+==================
+
+If you would like to demo or evaluate Dataverse running in containers, you're in the right place.
+
+.. contents:: |toctitle|
+	:local:
+
+Hardware Requirements
+---------------------
+
+- 8 GB RAM
+
+Software Requirements
+---------------------
+
+- Mac, Linux, or Windows (experimental)
+- Docker
+
+Windows support is experimental but we are very interested in supporting Windows better. Please report bugs and see :ref:`helping-containers`.
+
+Steps
+-----
+
+- Download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` 
+- Run ``docker compose up`` in the directory where you put ``compose.yml``
+
diff --git a/doc/sphinx-guides/source/container/running/frontend-dev.rst b/doc/sphinx-guides/source/container/running/frontend-dev.rst
new file mode 100644
index 00000000000..1f57d4531ba
--- /dev/null
+++ b/doc/sphinx-guides/source/container/running/frontend-dev.rst
@@ -0,0 +1,7 @@
+Frontend Development
+====================
+
+.. contents:: |toctitle|
+	:local:
+
+https://github.com/IQSS/dataverse-frontend includes docs and scripts for running Dataverse in Docker for frontend development.
diff --git a/doc/sphinx-guides/source/container/running/index.rst b/doc/sphinx-guides/source/container/running/index.rst
new file mode 100755
index 00000000000..8d17b105eb4
--- /dev/null
+++ b/doc/sphinx-guides/source/container/running/index.rst
@@ -0,0 +1,12 @@
+Running Dataverse in Docker
+===========================
+
+Contents:
+
+.. toctree::
+
+   production
+   demo
+   metadata-blocks
+   frontend-dev
+   backend-dev
diff --git a/doc/sphinx-guides/source/container/running/metadata-blocks.rst b/doc/sphinx-guides/source/container/running/metadata-blocks.rst
new file mode 100644
index 00000000000..4794f29ab42
--- /dev/null
+++ b/doc/sphinx-guides/source/container/running/metadata-blocks.rst
@@ -0,0 +1,9 @@
+Editing Metadata Blocks
+=======================
+
+.. contents:: |toctitle|
+	:local:
+
+The Admin Guide has a section on :doc:`/admin/metadatacustomization` and suggests running Dataverse in containers (Docker) for this purpose.
+
+This is certainly possible but the specifics have not yet been written. Until then, please see :doc:`demo`, which should also provide a suitable environment.
diff --git a/doc/sphinx-guides/source/container/running/production.rst b/doc/sphinx-guides/source/container/running/production.rst
new file mode 100644
index 00000000000..89e63ff5ab1
--- /dev/null
+++ b/doc/sphinx-guides/source/container/running/production.rst
@@ -0,0 +1,11 @@
+Production (Future)
+===================
+
+.. contents:: |toctitle|
+	:local:
+
+The images described in this guide not yet recommended for production usage.
+
+You can help the effort to support these images in production by trying them out and giving feedback (see :ref:`helping-containers`).
+
+For now, please follow :doc:`demo`.
diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml
new file mode 100644
index 00000000000..e69de29bb2d

From fb58d895edac32744cae7b164d7ae9f1121dba94 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 26 Jan 2024 10:58:07 -0500
Subject: [PATCH 507/546] tweaks and more use cases #10238

---
 doc/sphinx-guides/source/container/intro.rst   |  2 +-
 .../source/container/running/backend-dev.rst   |  3 +++
 .../source/container/running/demo.rst          |  4 ++--
 .../source/container/running/frontend-dev.rst  |  5 ++++-
 .../source/container/running/github-action.rst | 18 ++++++++++++++++++
 .../source/container/running/index.rst         |  1 +
 .../container/running/metadata-blocks.rst      |  8 +++++++-
 .../source/container/running/production.rst    | 15 ++++++++++++---
 8 files changed, 48 insertions(+), 8 deletions(-)
 create mode 100644 doc/sphinx-guides/source/container/running/github-action.rst

diff --git a/doc/sphinx-guides/source/container/intro.rst b/doc/sphinx-guides/source/container/intro.rst
index 94b2c99f0d1..42b095f3158 100644
--- a/doc/sphinx-guides/source/container/intro.rst
+++ b/doc/sphinx-guides/source/container/intro.rst
@@ -9,7 +9,7 @@ Dataverse in containers!
 Intended Audience
 -----------------
 
-This guide is intended for anyone who wants to run Dataverse in containers. This is potentially a wide audience, from sysadmins interested in running Dataverse in production in containers (not recommended yet) to contributors working on a bug fix (encouraged!).
+This guide is intended for anyone who wants to run Dataverse in containers. This is potentially a wide audience, from sysadmins interested in running Dataverse in production in containers (not recommended yet) to contributors working on a bug fix (encouraged!). See :doc:`running/index` for various scenarios and please let us know if your use case is not covered.
 
 .. _getting-help-containers:
 
diff --git a/doc/sphinx-guides/source/container/running/backend-dev.rst b/doc/sphinx-guides/source/container/running/backend-dev.rst
index 45aa4450bfb..8b2dab956ad 100644
--- a/doc/sphinx-guides/source/container/running/backend-dev.rst
+++ b/doc/sphinx-guides/source/container/running/backend-dev.rst
@@ -4,4 +4,7 @@ Backend Development
 .. contents:: |toctitle|
 	:local:
 
+Intro
+-----
+
 See :doc:`../dev-usage`.
diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst
index 71e45f5028e..8db8cfb2a9c 100644
--- a/doc/sphinx-guides/source/container/running/demo.rst
+++ b/doc/sphinx-guides/source/container/running/demo.rst
@@ -1,7 +1,7 @@
 Demo or Evaluation
 ==================
 
-If you would like to demo or evaluate Dataverse running in containers, you're in the right place.
+If you would like to demo or evaluate Dataverse running in containers, you're in the right place. Your feedback is extremely valuable to us! To let us know what you think, pease see :ref:`helping-containers`.
 
 .. contents:: |toctitle|
 	:local:
@@ -17,7 +17,7 @@ Software Requirements
 - Mac, Linux, or Windows (experimental)
 - Docker
 
-Windows support is experimental but we are very interested in supporting Windows better. Please report bugs and see :ref:`helping-containers`.
+Windows support is experimental but we are very interested in supporting Windows better. Please report bugs (see :ref:`helping-containers`).
 
 Steps
 -----
diff --git a/doc/sphinx-guides/source/container/running/frontend-dev.rst b/doc/sphinx-guides/source/container/running/frontend-dev.rst
index 1f57d4531ba..88d40c12053 100644
--- a/doc/sphinx-guides/source/container/running/frontend-dev.rst
+++ b/doc/sphinx-guides/source/container/running/frontend-dev.rst
@@ -4,4 +4,7 @@ Frontend Development
 .. contents:: |toctitle|
 	:local:
 
-https://github.com/IQSS/dataverse-frontend includes docs and scripts for running Dataverse in Docker for frontend development.
+Intro
+-----
+
+The frontend (web interface) of Dataverse is being decoupled from the backend. This evolving codebase has its own repo at https://github.com/IQSS/dataverse-frontend which includes docs and scripts for running the backend of Dataverse in Docker.
diff --git a/doc/sphinx-guides/source/container/running/github-action.rst b/doc/sphinx-guides/source/container/running/github-action.rst
new file mode 100644
index 00000000000..ae42dd494d1
--- /dev/null
+++ b/doc/sphinx-guides/source/container/running/github-action.rst
@@ -0,0 +1,18 @@
+GitHub Action
+=============
+
+.. contents:: |toctitle|
+	:local:
+
+Intro
+-----
+
+A GitHub Action is under development that will spin up a Dataverse instance within the context of GitHub CI workflows: https://github.com/gdcc/dataverse-action
+
+Use Cases
+---------
+
+Use cases for the GitHub Action include:
+
+- Testing :doc:`/api/client-libraries` that interact with Dataverse APIs
+- Testing :doc:`/admin/integrations` of third party software with Dataverse
diff --git a/doc/sphinx-guides/source/container/running/index.rst b/doc/sphinx-guides/source/container/running/index.rst
index 8d17b105eb4..a02266f7cba 100755
--- a/doc/sphinx-guides/source/container/running/index.rst
+++ b/doc/sphinx-guides/source/container/running/index.rst
@@ -8,5 +8,6 @@ Contents:
    production
    demo
    metadata-blocks
+   github-action
    frontend-dev
    backend-dev
diff --git a/doc/sphinx-guides/source/container/running/metadata-blocks.rst b/doc/sphinx-guides/source/container/running/metadata-blocks.rst
index 4794f29ab42..fcc80ce1909 100644
--- a/doc/sphinx-guides/source/container/running/metadata-blocks.rst
+++ b/doc/sphinx-guides/source/container/running/metadata-blocks.rst
@@ -4,6 +4,12 @@ Editing Metadata Blocks
 .. contents:: |toctitle|
 	:local:
 
+Intro
+-----
+
 The Admin Guide has a section on :doc:`/admin/metadatacustomization` and suggests running Dataverse in containers (Docker) for this purpose.
 
-This is certainly possible but the specifics have not yet been written. Until then, please see :doc:`demo`, which should also provide a suitable environment.
+Status
+------
+
+For now, please see :doc:`demo`, which should also provide a suitable Dockerized Dataverse environment.
diff --git a/doc/sphinx-guides/source/container/running/production.rst b/doc/sphinx-guides/source/container/running/production.rst
index 89e63ff5ab1..0a628dc57b9 100644
--- a/doc/sphinx-guides/source/container/running/production.rst
+++ b/doc/sphinx-guides/source/container/running/production.rst
@@ -4,8 +4,17 @@ Production (Future)
 .. contents:: |toctitle|
 	:local:
 
-The images described in this guide not yet recommended for production usage.
+Status
+------
 
-You can help the effort to support these images in production by trying them out and giving feedback (see :ref:`helping-containers`).
+The images described in this guide are not yet recommended for production usage.
 
-For now, please follow :doc:`demo`.
+How to Help
+-----------
+
+You can help the effort to support these images in production by trying them out (see :doc:`demo`) and giving feedback (see :ref:`helping-containers`).
+
+Alternatives
+------------
+
+Until the images are ready for production, please use the traditional installation method described in the :doc:`/installation/index`.

From b7ec6465b09e41929f985089c2a5c566e95308e4 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Fri, 26 Jan 2024 11:12:50 -0500
Subject: [PATCH 508/546] #9748 delete tools only added by tests

---
 .../iq/dataverse/api/ExternalToolsIT.java     | 102 +++++++-----------
 1 file changed, 39 insertions(+), 63 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
index 022747a3cdc..664c07d598c 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
@@ -40,21 +40,6 @@ public void testGetExternalTools() {
     @Test
     public void testFileLevelTool1() {
 
-        // Delete all external tools before testing.
-        Response getTools = UtilIT.getExternalTools();
-        getTools.prettyPrint();
-        getTools.then().assertThat()
-                .statusCode(OK.getStatusCode());
-        String body = getTools.getBody().asString();
-        JsonReader bodyObject = Json.createReader(new StringReader(body));
-        JsonArray tools = bodyObject.readObject().getJsonArray("data");
-        for (int i = 0; i < tools.size(); i++) {
-            JsonObject tool = tools.getJsonObject(i);
-            int id = tool.getInt("id");
-            Response deleteExternalTool = UtilIT.deleteExternalTool(id);
-            deleteExternalTool.prettyPrint();
-        }
-
         Response createUser = UtilIT.createRandomUser();
         createUser.prettyPrint();
         createUser.then().assertThat()
@@ -145,26 +130,14 @@ public void testFileLevelTool1() {
                 .statusCode(OK.getStatusCode())
                 // No tools for this file type.
                 .body("data", Matchers.hasSize(0));
+        
+        //Delete the tool added by this test...
+        Response deleteExternalTool = UtilIT.deleteExternalTool(toolId);
     }
 
     @Test
     public void testDatasetLevelTool1() {
 
-        // Delete all external tools before testing.
-        Response getTools = UtilIT.getExternalTools();
-        getTools.prettyPrint();
-        getTools.then().assertThat()
-                .statusCode(OK.getStatusCode());
-        String body = getTools.getBody().asString();
-        JsonReader bodyObject = Json.createReader(new StringReader(body));
-        JsonArray tools = bodyObject.readObject().getJsonArray("data");
-        for (int i = 0; i < tools.size(); i++) {
-            JsonObject tool = tools.getJsonObject(i);
-            int id = tool.getInt("id");
-            Response deleteExternalTool = UtilIT.deleteExternalTool(id);
-            deleteExternalTool.prettyPrint();
-        }
-
         Response createUser = UtilIT.createRandomUser();
         createUser.prettyPrint();
         createUser.then().assertThat()
@@ -184,7 +157,6 @@ public void testDatasetLevelTool1() {
         createDataset.then().assertThat()
                 .statusCode(CREATED.getStatusCode());
 
-//        Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
         Integer datasetId = JsonPath.from(createDataset.getBody().asString()).getInt("data.id");
         String datasetPid = JsonPath.from(createDataset.getBody().asString()).getString("data.persistentId");
 
@@ -219,6 +191,8 @@ public void testDatasetLevelTool1() {
         addExternalTool.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body("data.displayName", CoreMatchers.equalTo("DatasetTool1"));
+        
+        long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id");
 
         Response getExternalToolsByDatasetIdInvalidType = UtilIT.getExternalToolsForDataset(datasetId.toString(), "invalidType", apiToken);
         getExternalToolsByDatasetIdInvalidType.prettyPrint();
@@ -233,27 +207,16 @@ public void testDatasetLevelTool1() {
                 .body("data[0].scope", CoreMatchers.equalTo("dataset"))
                 .body("data[0].toolUrlWithQueryParams", CoreMatchers.equalTo("http://datasettool1.com?datasetPid=" + datasetPid + "&key=" + apiToken))
                 .statusCode(OK.getStatusCode());
-
+        
+        //Delete the tool added by this test...
+        Response deleteExternalTool = UtilIT.deleteExternalTool(toolId);
+        deleteExternalTool.then().assertThat()
+                .statusCode(OK.getStatusCode());
     }
 
     @Test
     public void testDatasetLevelToolConfigure() {
 
-        // Delete all external tools before testing.
-        Response getTools = UtilIT.getExternalTools();
-        getTools.prettyPrint();
-        getTools.then().assertThat()
-                .statusCode(OK.getStatusCode());
-        String body = getTools.getBody().asString();
-        JsonReader bodyObject = Json.createReader(new StringReader(body));
-        JsonArray tools = bodyObject.readObject().getJsonArray("data");
-        for (int i = 0; i < tools.size(); i++) {
-            JsonObject tool = tools.getJsonObject(i);
-            int id = tool.getInt("id");
-            Response deleteExternalTool = UtilIT.deleteExternalTool(id);
-            deleteExternalTool.prettyPrint();
-        }
-
         Response createUser = UtilIT.createRandomUser();
         createUser.prettyPrint();
         createUser.then().assertThat()
@@ -302,6 +265,8 @@ public void testDatasetLevelToolConfigure() {
         addExternalTool.then().assertThat()
                 .statusCode(OK.getStatusCode())
                 .body("data.displayName", CoreMatchers.equalTo("Dataset Configurator"));
+        
+        long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id");
 
         Response getExternalToolsByDatasetId = UtilIT.getExternalToolsForDataset(datasetId.toString(), "configure", apiToken);
         getExternalToolsByDatasetId.prettyPrint();
@@ -311,6 +276,11 @@ public void testDatasetLevelToolConfigure() {
                 .body("data[0].types[0]", CoreMatchers.equalTo("configure"))
                 .body("data[0].toolUrlWithQueryParams", CoreMatchers.equalTo("https://datasetconfigurator.com?datasetPid=" + datasetPid))
                 .statusCode(OK.getStatusCode());
+        
+        //Delete the tool added by this test...
+        Response deleteExternalTool = UtilIT.deleteExternalTool(toolId);
+        deleteExternalTool.then().assertThat()
+                .statusCode(OK.getStatusCode());
 
     }
 
@@ -400,12 +370,13 @@ public void deleteTools() {
         String body = getTools.getBody().asString();
         JsonReader bodyObject = Json.createReader(new StringReader(body));
         JsonArray tools = bodyObject.readObject().getJsonArray("data");
+        /*
         for (int i = 0; i < tools.size(); i++) {
             JsonObject tool = tools.getJsonObject(i);
             int id = tool.getInt("id");
             Response deleteExternalTool = UtilIT.deleteExternalTool(id);
             deleteExternalTool.prettyPrint();
-        }
+        }*/
     }
 
     // preview only
@@ -446,6 +417,13 @@ public void createToolShellScript() {
         addExternalTool.prettyPrint();
         addExternalTool.then().assertThat()
                 .statusCode(OK.getStatusCode());
+        
+        long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id");
+        
+        //Delete the tool added by this test...
+        Response deleteExternalTool = UtilIT.deleteExternalTool(toolId);
+        deleteExternalTool.then().assertThat()
+                .statusCode(OK.getStatusCode());
     }
 
     // explore only
@@ -479,6 +457,13 @@ public void createToolDataExplorer() {
         addExternalTool.prettyPrint();
         addExternalTool.then().assertThat()
                 .statusCode(OK.getStatusCode());
+        
+        long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id");
+        
+        //Delete the tool added by this test...
+        Response deleteExternalTool = UtilIT.deleteExternalTool(toolId);
+        deleteExternalTool.then().assertThat()
+                .statusCode(OK.getStatusCode());
     }
 
     // both preview and explore
@@ -527,21 +512,6 @@ public void createToolSpreadsheetViewer() {
     @Test
     public void testFileLevelToolWithAuxFileReq() throws IOException {
 
-        // Delete all external tools before testing.
-        Response getTools = UtilIT.getExternalTools();
-        getTools.prettyPrint();
-        getTools.then().assertThat()
-                .statusCode(OK.getStatusCode());
-        String body = getTools.getBody().asString();
-        JsonReader bodyObject = Json.createReader(new StringReader(body));
-        JsonArray tools = bodyObject.readObject().getJsonArray("data");
-        for (int i = 0; i < tools.size(); i++) {
-            JsonObject tool = tools.getJsonObject(i);
-            int id = tool.getInt("id");
-            Response deleteExternalTool = UtilIT.deleteExternalTool(id);
-            deleteExternalTool.prettyPrint();
-        }
-
         Response createUser = UtilIT.createRandomUser();
         createUser.prettyPrint();
         createUser.then().assertThat()
@@ -640,6 +610,12 @@ public void testFileLevelToolWithAuxFileReq() throws IOException {
                 .body("data[0].displayName", CoreMatchers.equalTo("HDF5 Tool"))
                 .body("data[0].scope", CoreMatchers.equalTo("file"))
                 .body("data[0].contentType", CoreMatchers.equalTo("application/x-hdf5"));
+        
+        //Delete the tool added by this test...
+        Response deleteExternalTool = UtilIT.deleteExternalTool(toolId);
+        deleteExternalTool.then().assertThat()
+                .statusCode(OK.getStatusCode());
+        
     }
 
 }

From cc29efecd2748ad005760610c6be65ba073b35c6 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 26 Jan 2024 11:30:19 -0500
Subject: [PATCH 509/546] stub out demo/eval compose.yml based on dev compose
 #10238

Differences from dev version:

- localstack and minio removed
- env vars filled in based on current .env

The goal is to have a single file to download, rather than a compose
file and an .env file.
---
 docker/compose/demo/compose.yml | 170 ++++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)

diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml
index e69de29bb2d..aea99040acd 100644
--- a/docker/compose/demo/compose.yml
+++ b/docker/compose/demo/compose.yml
@@ -0,0 +1,170 @@
+version: "2.4"
+
+services:
+
+  dev_dataverse:
+    container_name: "dev_dataverse"
+    hostname: dataverse
+    image: gdcc/dataverse:unstable
+    restart: on-failure
+    user: payara
+    environment:
+      DATAVERSE_DB_HOST: postgres
+      DATAVERSE_DB_PASSWORD: secret
+      DATAVERSE_DB_USER: dataverse
+      ENABLE_JDWP: "1"
+      DATAVERSE_FEATURE_API_BEARER_AUTH: "1"
+      DATAVERSE_AUTH_OIDC_ENABLED: "1"
+      DATAVERSE_AUTH_OIDC_CLIENT_ID: test
+      DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8
+      DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test
+      DATAVERSE_JSF_REFRESH_PERIOD: "1"
+      # These two oai settings are here to get HarvestingServerIT to pass
+      dataverse_oai_server_maxidentifiers: "2"
+      dataverse_oai_server_maxrecords: "2"
+      JVM_ARGS: -Ddataverse.files.storage-driver-id=file1
+        -Ddataverse.files.file1.type=file
+        -Ddataverse.files.file1.label=Filesystem
+        -Ddataverse.files.file1.directory=${STORAGE_DIR}/store
+    ports:
+      - "8080:8080" # HTTP (Dataverse Application)
+      - "4848:4848" # HTTP (Payara Admin Console)
+      - "9009:9009" # JDWP
+      - "8686:8686" # JMX
+    networks:
+      - dataverse
+    depends_on:
+      - dev_postgres
+      - dev_solr
+      - dev_dv_initializer
+    volumes:
+      - ./docker-dev-volumes/app/data:/dv
+      - ./docker-dev-volumes/app/secrets:/secrets
+      # Uncomment to map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh
+      # - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications
+      # Uncomment for changes to xhtml to be deployed immediately (if supported your IDE or toolchain).
+      # Replace 6.0 with the current version.
+      # - ./target/dataverse-6.0:/opt/payara/deployments/dataverse
+    tmpfs:
+      - /dumps:mode=770,size=2052M,uid=1000,gid=1000
+      - /tmp:mode=770,size=2052M,uid=1000,gid=1000
+    mem_limit: 2147483648 # 2 GiB
+    mem_reservation: 1024m
+    privileged: false
+
+  dev_bootstrap:
+    container_name: "dev_bootstrap"
+    image: gdcc/configbaker:unstable
+    restart: "no"
+    command:
+      - bootstrap.sh
+      - dev
+    networks:
+      - dataverse
+
+  dev_dv_initializer:
+    container_name: "dev_dv_initializer"
+    image: gdcc/configbaker:unstable
+    restart: "no"
+    command:
+      - sh
+      - -c
+      - "fix-fs-perms.sh dv"
+    volumes:
+      - ./docker-dev-volumes/app/data:/dv
+
+  dev_postgres:
+    container_name: "dev_postgres"
+    hostname: postgres
+    image: postgres:13
+    restart: on-failure
+    environment:
+      - POSTGRES_USER=dataverse
+      - POSTGRES_PASSWORD=secret
+    ports:
+      - "5432:5432"
+    networks:
+      - dataverse
+    volumes:
+      - ./docker-dev-volumes/postgresql/data:/var/lib/postgresql/data
+
+  dev_solr_initializer:
+    container_name: "dev_solr_initializer"
+    image: gdcc/configbaker:unstable
+    restart: "no"
+    command:
+      - sh
+      - -c
+      - "fix-fs-perms.sh solr && cp -a /template/* /solr-template"
+    volumes:
+      - ./docker-dev-volumes/solr/data:/var/solr
+      - ./docker-dev-volumes/solr/conf:/solr-template
+
+  dev_solr:
+    container_name: "dev_solr"
+    hostname: "solr"
+    image: solr:9.3.0
+    depends_on:
+      - dev_solr_initializer
+    restart: on-failure
+    ports:
+      - "8983:8983"
+    networks:
+      - dataverse
+    command:
+      - "solr-precreate"
+      - "collection1"
+      - "/template"
+    volumes:
+      - ./docker-dev-volumes/solr/data:/var/solr
+      - ./docker-dev-volumes/solr/conf:/template
+
+  dev_smtp:
+    container_name: "dev_smtp"
+    hostname: "smtp"
+    image: maildev/maildev:2.0.5
+    restart: on-failure
+    ports:
+      - "25:25" # smtp server
+      - "1080:1080" # web ui
+    environment:
+      - MAILDEV_SMTP_PORT=25
+      - MAILDEV_MAIL_DIRECTORY=/mail
+    networks:
+      - dataverse
+    #volumes:
+    #  - ./docker-dev-volumes/smtp/data:/mail
+    tmpfs:
+      - /mail:mode=770,size=128M,uid=1000,gid=1000
+
+  dev_keycloak:
+    container_name: "dev_keycloak"
+    image: 'quay.io/keycloak/keycloak:21.0'
+    hostname: keycloak
+    environment:
+      - KEYCLOAK_ADMIN=kcadmin
+      - KEYCLOAK_ADMIN_PASSWORD=kcpassword
+      - KEYCLOAK_LOGLEVEL=DEBUG
+      - KC_HOSTNAME_STRICT=false
+    networks:
+      dataverse:
+        aliases:
+          - keycloak.mydomain.com #create a DNS alias within the network (add the same alias to your /etc/hosts to get a working OIDC flow)
+    command: start-dev --import-realm --http-port=8090  # change port to 8090, so within the network and external the same port is used
+    ports:
+      - "8090:8090"
+    volumes:
+      - './conf/keycloak/test-realm.json:/opt/keycloak/data/import/test-realm.json'
+
+  dev_nginx:
+    container_name: dev_nginx
+    image: gdcc/dev_nginx:unstable
+    ports:
+      - "4849:4849"
+    restart: always
+    networks:
+      - dataverse
+
+networks:
+  dataverse:
+    driver: bridge

From 0c736cc698a3fef25fa8d5f25e76d4a85a6ec088 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 26 Jan 2024 12:47:38 -0500
Subject: [PATCH 510/546] switch from unstable to alpha images #10238

---
 docker/compose/demo/compose.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml
index aea99040acd..403143130ac 100644
--- a/docker/compose/demo/compose.yml
+++ b/docker/compose/demo/compose.yml
@@ -5,7 +5,7 @@ services:
   dev_dataverse:
     container_name: "dev_dataverse"
     hostname: dataverse
-    image: gdcc/dataverse:unstable
+    image: gdcc/dataverse:alpha
     restart: on-failure
     user: payara
     environment:
@@ -54,7 +54,7 @@ services:
 
   dev_bootstrap:
     container_name: "dev_bootstrap"
-    image: gdcc/configbaker:unstable
+    image: gdcc/configbaker:alpha
     restart: "no"
     command:
       - bootstrap.sh
@@ -64,7 +64,7 @@ services:
 
   dev_dv_initializer:
     container_name: "dev_dv_initializer"
-    image: gdcc/configbaker:unstable
+    image: gdcc/configbaker:alpha
     restart: "no"
     command:
       - sh
@@ -90,7 +90,7 @@ services:
 
   dev_solr_initializer:
     container_name: "dev_solr_initializer"
-    image: gdcc/configbaker:unstable
+    image: gdcc/configbaker:alpha
     restart: "no"
     command:
       - sh

From 91287b35960afd0d351d1b07942333763ce84555 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Fri, 26 Jan 2024 15:55:12 -0500
Subject: [PATCH 511/546] #9748 one more assert

---
 src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
index 664c07d598c..6f0aa499dd1 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
@@ -133,6 +133,8 @@ public void testFileLevelTool1() {
         
         //Delete the tool added by this test...
         Response deleteExternalTool = UtilIT.deleteExternalTool(toolId);
+        deleteExternalTool.then().assertThat()
+                .statusCode(OK.getStatusCode());
     }
 
     @Test

From 69d3bb9172ad134c32299a326ef76efda2420458 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 26 Jan 2024 16:21:58 -0500
Subject: [PATCH 512/546] more content for demo/eval #10238

Also update tags section under "app image" (now live).
---
 .../source/container/app-image.rst            |  18 +--
 doc/sphinx-guides/source/container/intro.rst  |   2 +
 .../source/container/running/demo.rst         | 125 ++++++++++++++++--
 3 files changed, 126 insertions(+), 19 deletions(-)

diff --git a/doc/sphinx-guides/source/container/app-image.rst b/doc/sphinx-guides/source/container/app-image.rst
index 29f6d6ac1d4..caf4aadbf7e 100644
--- a/doc/sphinx-guides/source/container/app-image.rst
+++ b/doc/sphinx-guides/source/container/app-image.rst
@@ -22,20 +22,20 @@ IQSS will not offer you support how to deploy or run it, please reach out to the
 You might be interested in taking a look at :doc:`../developers/containers`, linking you to some (community-based)
 efforts.
 
-
+.. _supported-image-tags-app:
 
 Supported Image Tags
 ++++++++++++++++++++
 
 This image is sourced from the main upstream code `repository of the Dataverse software <https://github.com/IQSS/dataverse>`_.
-Development and maintenance of the `image's code <https://github.com/IQSS/dataverse/tree/develop>`_ happens there
-(again, by the community).
-
-.. note::
-    Please note that this image is not (yet) available from Docker Hub. You need to build local to use
-    (see below). Follow https://github.com/IQSS/dataverse/issues/9444 for new developments.
-
-
+Development and maintenance of the `image's code <https://github.com/IQSS/dataverse/tree/develop/src/main/docker>`_
+happens there (again, by the community). Community-supported image tags are based on the two most important
+upstream branches:
+
+- The ``unstable`` tag corresponds to the ``develop`` branch, where pull requests are merged.
+  (`Dockerfile <https://github.com/IQSS/dataverse/blob/develop/src/main/docker/Dockerfile>`__)
+- The ``alpha`` tag corresponds to the ``master`` branch, where releases are cut from.
+  (`Dockerfile <https://github.com/IQSS/dataverse/blob/master/src/main/docker/Dockerfile>`__)
 
 Image Contents
 ++++++++++++++
diff --git a/doc/sphinx-guides/source/container/intro.rst b/doc/sphinx-guides/source/container/intro.rst
index 42b095f3158..5099531dcc9 100644
--- a/doc/sphinx-guides/source/container/intro.rst
+++ b/doc/sphinx-guides/source/container/intro.rst
@@ -18,6 +18,8 @@ Getting Help
 
 Please ask in #containers at https://chat.dataverse.org
 
+Alternatively, you can try one or more of the channels under :ref:`support`.
+
 .. _helping-containers:
 
 Helping with the Containerization Effort
diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst
index 8db8cfb2a9c..0ad1e50442f 100644
--- a/doc/sphinx-guides/source/container/running/demo.rst
+++ b/doc/sphinx-guides/source/container/running/demo.rst
@@ -1,27 +1,132 @@
 Demo or Evaluation
 ==================
 
-If you would like to demo or evaluate Dataverse running in containers, you're in the right place. Your feedback is extremely valuable to us! To let us know what you think, pease see :ref:`helping-containers`.
+If you would like to demo or evaluate Dataverse running in containers, you're in the right place. Your feedback is extremely valuable to us! To let us know what you think, please see :ref:`helping-containers`.
 
 .. contents:: |toctitle|
 	:local:
 
-Hardware Requirements
----------------------
+Quickstart
+----------
 
-- 8 GB RAM
+- Download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>`
+- Run ``docker compose up`` in the directory where you put ``compose.yml``
+- Visit http://localhost:8080 and try logging in:
+
+  - username: dataverseAdmin
+  - password: admin1
 
-Software Requirements
----------------------
+Hardware and Software Requirements
+-----------------------------------
 
+- 8 GB RAM (if not much else is running)
 - Mac, Linux, or Windows (experimental)
 - Docker
 
 Windows support is experimental but we are very interested in supporting Windows better. Please report bugs (see :ref:`helping-containers`).
 
-Steps
------
+Tags and Versions
+-----------------
 
-- Download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` 
-- Run ``docker compose up`` in the directory where you put ``compose.yml``
+The compose file references a tag called "alpha", which corresponds to the latest released version of Dataverse. This means that if a release of Dataverse comes out while you are demo'ing or evaluating, the version of Dataverse you are using could change. We are aware that there is a desire for tags that correspond to versions to ensure consistency. You are welcome to join `the discussion <https://dataverse.zulipchat.com/#narrow/stream/375812-containers/topic/tagging.20images.20with.20versions/near/366600747>`_ and otherwise get in touch (see :ref:`helping-containers`). For more on tags, see :ref:`supported-image-tags-app`.
+
+Once Dataverse is running, you can check which version you have through the normal methods:
+
+- Check the bottom right in a web browser.
+- Check http://localhost:8080/api/info/version via API.
+
+About the Containers
+--------------------
+
+If you run ``docker ps``, you'll see that multiple containers are spun up in a demo or evaluation. Here are the most important ones:
+
+- dataverse
+- postgres
+- solr
+- smtp
+- bootstrap
+
+Most are self-explanatory, and correspond to components listed under :doc:`/installation/prerequisites` in the (traditional) Installation Guide, but "bootstrap" refers to :doc:`../configbaker-image`.
+
+Additional containers are used in development (see :doc:`../dev-usage`), but for the purposes of a demo or evaluation, fewer moving (sometimes pointy) parts are included.
+
+Security
+--------
+
+Please be aware that for now, the "dev" persona is used to bootstrap Dataverse, which means that admin APIs are wide open (to allow developers to test them; see :ref:`securing-your-installation` for more on API blocking), the "create user" key is set to a default value, etc. You can inspect the dev person `on GitHub <https://github.com/IQSS/dataverse/blob/master/modules/container-configbaker/scripts/bootstrap/dev/init.sh>`_ (look for ``--insecure``).
+
+We plan to ship a "demo" persona but it is not ready yet. See also :ref:`configbaker-personas`.
+
+Common Operations
+-----------------
+
+Starting the Containers
++++++++++++++++++++++++
+
+First, download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` and place it somewhere you'll remember.
+
+Then, run ``docker compose up`` in the directory where you put ``compose.yml``
+
+Starting the containers for the first time involves a bootstrap process. You should see "have a nice day" output at the end.
+
+Stopping the Containers
++++++++++++++++++++++++
+
+You might want to stop the containers if you aren't using them. Hit ``Ctrl-c`` (hold down the ``Ctrl`` key and then hit the ``c`` key).
+
+You data is still intact and you can start the containers again with ``docker compose up``.
+
+Deleting the Containers
++++++++++++++++++++++++
+
+If you no longer need the containers because your demo or evaluation is finished and you want to reclaim disk space, run ``docker compose down`` in the directory where you put ``compose.yml``.
+
+Deleting the Data Directory
++++++++++++++++++++++++++++
+
+Data related to the Dataverse containers is placed in a directory called ``docker-dev-volumes`` next to the ``compose.yml`` file. If you are finished with your demo or evaluation or you want to start fresh, simply delete this directory.
+
+Configuration
+-------------
+
+Configuration is described in greater detail under :doc:`/installation/config` in the Installation Guide, but there are some specifics to running in containers you should know about.
+
+.. _configbaker-personas:
+
+Personas
+++++++++
+
+When the containers are bootstrapped, the "dev" persona is used. In the future we plan to add a "demo" persona that is more suited to demo and evaluation use cases.
+
+Database Settings
++++++++++++++++++
+
+Updating database settings is the same as described under :ref:`database-settings` in the Installation Guide.
+
+MPCONFIG Options
+++++++++++++++++
+
+The compose file contains an ``environment`` section with various MicroProfile Config (MPCONFIG) options. You can experiment with this by adding ``DATAVERSE_VERSION: foobar`` to change the (displayed) version of Dataverse to "foobar".
+
+JVM Options
++++++++++++
+
+JVM options are not especially easy to change in the container. The general process is to get a shell on the "dataverse" container, change the settings, and then stop and start the containers. See :ref:`jvm-options` for more.
+
+Troubleshooting
+---------------
+
+Bootstrapping Did Not Complete
+++++++++++++++++++++++++++++++
+
+In the compose file, try increasing the timeout in the bootstrap container by adding something like this:
+
+.. code-block:: bash
+
+   environment:
+     - TIMEOUT=10m
+
+Getting Help
+------------
 
+Please do not be shy about reaching out for help. We very much want you to have a pleasant demo or evaluation experience. For ways to contact us, please see See :ref:`getting-help-containers`.

From d3a378de0815a8d9af94fe8972f61d95841f89f2 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 26 Jan 2024 16:23:20 -0500
Subject: [PATCH 513/546] remove limits used for harvesting tests #10238

---
 docker/compose/demo/compose.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml
index 403143130ac..4cfd8cd9345 100644
--- a/docker/compose/demo/compose.yml
+++ b/docker/compose/demo/compose.yml
@@ -19,9 +19,6 @@ services:
       DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8
       DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test
       DATAVERSE_JSF_REFRESH_PERIOD: "1"
-      # These two oai settings are here to get HarvestingServerIT to pass
-      dataverse_oai_server_maxidentifiers: "2"
-      dataverse_oai_server_maxrecords: "2"
       JVM_ARGS: -Ddataverse.files.storage-driver-id=file1
         -Ddataverse.files.file1.type=file
         -Ddataverse.files.file1.label=Filesystem

From 4555ae3f9dae12fd83c369b846c4aff114fecbf0 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 26 Jan 2024 16:25:59 -0500
Subject: [PATCH 514/546] remove keycloak container and OIDC config #10238

---
 docker/compose/demo/compose.yml | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml
index 4cfd8cd9345..e0839eb1023 100644
--- a/docker/compose/demo/compose.yml
+++ b/docker/compose/demo/compose.yml
@@ -14,10 +14,6 @@ services:
       DATAVERSE_DB_USER: dataverse
       ENABLE_JDWP: "1"
       DATAVERSE_FEATURE_API_BEARER_AUTH: "1"
-      DATAVERSE_AUTH_OIDC_ENABLED: "1"
-      DATAVERSE_AUTH_OIDC_CLIENT_ID: test
-      DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8
-      DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test
       DATAVERSE_JSF_REFRESH_PERIOD: "1"
       JVM_ARGS: -Ddataverse.files.storage-driver-id=file1
         -Ddataverse.files.file1.type=file
@@ -134,25 +130,6 @@ services:
     tmpfs:
       - /mail:mode=770,size=128M,uid=1000,gid=1000
 
-  dev_keycloak:
-    container_name: "dev_keycloak"
-    image: 'quay.io/keycloak/keycloak:21.0'
-    hostname: keycloak
-    environment:
-      - KEYCLOAK_ADMIN=kcadmin
-      - KEYCLOAK_ADMIN_PASSWORD=kcpassword
-      - KEYCLOAK_LOGLEVEL=DEBUG
-      - KC_HOSTNAME_STRICT=false
-    networks:
-      dataverse:
-        aliases:
-          - keycloak.mydomain.com #create a DNS alias within the network (add the same alias to your /etc/hosts to get a working OIDC flow)
-    command: start-dev --import-realm --http-port=8090  # change port to 8090, so within the network and external the same port is used
-    ports:
-      - "8090:8090"
-    volumes:
-      - './conf/keycloak/test-realm.json:/opt/keycloak/data/import/test-realm.json'
-
   dev_nginx:
     container_name: dev_nginx
     image: gdcc/dev_nginx:unstable

From bb4d78649338ced4f66ec4ba4167c6a94efcd23f Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Fri, 26 Jan 2024 16:29:33 -0500
Subject: [PATCH 515/546] remove various dev stuff not needed for a demo #10238

---
 docker/compose/demo/compose.yml | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml
index e0839eb1023..b72d06951e8 100644
--- a/docker/compose/demo/compose.yml
+++ b/docker/compose/demo/compose.yml
@@ -12,9 +12,7 @@ services:
       DATAVERSE_DB_HOST: postgres
       DATAVERSE_DB_PASSWORD: secret
       DATAVERSE_DB_USER: dataverse
-      ENABLE_JDWP: "1"
       DATAVERSE_FEATURE_API_BEARER_AUTH: "1"
-      DATAVERSE_JSF_REFRESH_PERIOD: "1"
       JVM_ARGS: -Ddataverse.files.storage-driver-id=file1
         -Ddataverse.files.file1.type=file
         -Ddataverse.files.file1.label=Filesystem
@@ -33,11 +31,6 @@ services:
     volumes:
       - ./docker-dev-volumes/app/data:/dv
       - ./docker-dev-volumes/app/secrets:/secrets
-      # Uncomment to map the glassfish applications folder so that we can update webapp resources using scripts/intellij/cpwebapp.sh
-      # - ./docker-dev-volumes/glassfish/applications:/opt/payara/appserver/glassfish/domains/domain1/applications
-      # Uncomment for changes to xhtml to be deployed immediately (if supported your IDE or toolchain).
-      # Replace 6.0 with the current version.
-      # - ./target/dataverse-6.0:/opt/payara/deployments/dataverse
     tmpfs:
       - /dumps:mode=770,size=2052M,uid=1000,gid=1000
       - /tmp:mode=770,size=2052M,uid=1000,gid=1000
@@ -130,15 +123,6 @@ services:
     tmpfs:
       - /mail:mode=770,size=128M,uid=1000,gid=1000
 
-  dev_nginx:
-    container_name: dev_nginx
-    image: gdcc/dev_nginx:unstable
-    ports:
-      - "4849:4849"
-    restart: always
-    networks:
-      - dataverse
-
 networks:
   dataverse:
     driver: bridge

From c5f4ca46b6d384965c80926bce199f64f80d1af3 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 29 Jan 2024 10:33:34 -0500
Subject: [PATCH 516/546] remove "dev_" from container names #10238

---
 docker/compose/demo/compose.yml | 36 ++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml
index b72d06951e8..09dde63d5f4 100644
--- a/docker/compose/demo/compose.yml
+++ b/docker/compose/demo/compose.yml
@@ -2,8 +2,8 @@ version: "2.4"
 
 services:
 
-  dev_dataverse:
-    container_name: "dev_dataverse"
+  dataverse:
+    container_name: "dataverse"
     hostname: dataverse
     image: gdcc/dataverse:alpha
     restart: on-failure
@@ -25,9 +25,9 @@ services:
     networks:
       - dataverse
     depends_on:
-      - dev_postgres
-      - dev_solr
-      - dev_dv_initializer
+      - postgres
+      - solr
+      - dv_initializer
     volumes:
       - ./docker-dev-volumes/app/data:/dv
       - ./docker-dev-volumes/app/secrets:/secrets
@@ -38,8 +38,8 @@ services:
     mem_reservation: 1024m
     privileged: false
 
-  dev_bootstrap:
-    container_name: "dev_bootstrap"
+  bootstrap:
+    container_name: "bootstrap"
     image: gdcc/configbaker:alpha
     restart: "no"
     command:
@@ -48,8 +48,8 @@ services:
     networks:
       - dataverse
 
-  dev_dv_initializer:
-    container_name: "dev_dv_initializer"
+  dv_initializer:
+    container_name: "dv_initializer"
     image: gdcc/configbaker:alpha
     restart: "no"
     command:
@@ -59,8 +59,8 @@ services:
     volumes:
       - ./docker-dev-volumes/app/data:/dv
 
-  dev_postgres:
-    container_name: "dev_postgres"
+  postgres:
+    container_name: "postgres"
     hostname: postgres
     image: postgres:13
     restart: on-failure
@@ -74,8 +74,8 @@ services:
     volumes:
       - ./docker-dev-volumes/postgresql/data:/var/lib/postgresql/data
 
-  dev_solr_initializer:
-    container_name: "dev_solr_initializer"
+  solr_initializer:
+    container_name: "solr_initializer"
     image: gdcc/configbaker:alpha
     restart: "no"
     command:
@@ -86,12 +86,12 @@ services:
       - ./docker-dev-volumes/solr/data:/var/solr
       - ./docker-dev-volumes/solr/conf:/solr-template
 
-  dev_solr:
-    container_name: "dev_solr"
+  solr:
+    container_name: "solr"
     hostname: "solr"
     image: solr:9.3.0
     depends_on:
-      - dev_solr_initializer
+      - solr_initializer
     restart: on-failure
     ports:
       - "8983:8983"
@@ -105,8 +105,8 @@ services:
       - ./docker-dev-volumes/solr/data:/var/solr
       - ./docker-dev-volumes/solr/conf:/template
 
-  dev_smtp:
-    container_name: "dev_smtp"
+  smtp:
+    container_name: "smtp"
     hostname: "smtp"
     image: maildev/maildev:2.0.5
     restart: on-failure

From c0cda028c3ce0922f51c670917d94ef22cab61c5 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 29 Jan 2024 10:39:14 -0500
Subject: [PATCH 517/546] rename docker-dev-volumes to data #10238

---
 .../source/container/running/demo.rst          |  2 +-
 docker/compose/demo/.gitignore                 |  1 +
 docker/compose/demo/compose.yml                | 18 +++++++++---------
 3 files changed, 11 insertions(+), 10 deletions(-)
 create mode 100644 docker/compose/demo/.gitignore

diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst
index 0ad1e50442f..5eda108c842 100644
--- a/doc/sphinx-guides/source/container/running/demo.rst
+++ b/doc/sphinx-guides/source/container/running/demo.rst
@@ -84,7 +84,7 @@ If you no longer need the containers because your demo or evaluation is finished
 Deleting the Data Directory
 +++++++++++++++++++++++++++
 
-Data related to the Dataverse containers is placed in a directory called ``docker-dev-volumes`` next to the ``compose.yml`` file. If you are finished with your demo or evaluation or you want to start fresh, simply delete this directory.
+Data related to the Dataverse containers is placed in a directory called ``data`` next to the ``compose.yml`` file. If you are finished with your demo or evaluation or you want to start fresh, simply delete this directory.
 
 Configuration
 -------------
diff --git a/docker/compose/demo/.gitignore b/docker/compose/demo/.gitignore
new file mode 100644
index 00000000000..1269488f7fb
--- /dev/null
+++ b/docker/compose/demo/.gitignore
@@ -0,0 +1 @@
+data
diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml
index 09dde63d5f4..3817921f10a 100644
--- a/docker/compose/demo/compose.yml
+++ b/docker/compose/demo/compose.yml
@@ -29,8 +29,8 @@ services:
       - solr
       - dv_initializer
     volumes:
-      - ./docker-dev-volumes/app/data:/dv
-      - ./docker-dev-volumes/app/secrets:/secrets
+      - ./data/app/data:/dv
+      - ./data/app/secrets:/secrets
     tmpfs:
       - /dumps:mode=770,size=2052M,uid=1000,gid=1000
       - /tmp:mode=770,size=2052M,uid=1000,gid=1000
@@ -57,7 +57,7 @@ services:
       - -c
       - "fix-fs-perms.sh dv"
     volumes:
-      - ./docker-dev-volumes/app/data:/dv
+      - ./data/app/data:/dv
 
   postgres:
     container_name: "postgres"
@@ -72,7 +72,7 @@ services:
     networks:
       - dataverse
     volumes:
-      - ./docker-dev-volumes/postgresql/data:/var/lib/postgresql/data
+      - ./data/postgresql/data:/var/lib/postgresql/data
 
   solr_initializer:
     container_name: "solr_initializer"
@@ -83,8 +83,8 @@ services:
       - -c
       - "fix-fs-perms.sh solr && cp -a /template/* /solr-template"
     volumes:
-      - ./docker-dev-volumes/solr/data:/var/solr
-      - ./docker-dev-volumes/solr/conf:/solr-template
+      - ./data/solr/data:/var/solr
+      - ./data/solr/conf:/solr-template
 
   solr:
     container_name: "solr"
@@ -102,8 +102,8 @@ services:
       - "collection1"
       - "/template"
     volumes:
-      - ./docker-dev-volumes/solr/data:/var/solr
-      - ./docker-dev-volumes/solr/conf:/template
+      - ./data/solr/data:/var/solr
+      - ./data/solr/conf:/template
 
   smtp:
     container_name: "smtp"
@@ -119,7 +119,7 @@ services:
     networks:
       - dataverse
     #volumes:
-    #  - ./docker-dev-volumes/smtp/data:/mail
+    #  - ./data/smtp/data:/mail
     tmpfs:
       - /mail:mode=770,size=128M,uid=1000,gid=1000
 

From d275a6343c0b7d0b296e8dc2d3c158afdd980058 Mon Sep 17 00:00:00 2001
From: raravumich <48064835+raravumich@users.noreply.github.com>
Date: Mon, 29 Jan 2024 10:42:23 -0500
Subject: [PATCH 518/546] Add TurboCurator to External Tools list

---
 .../source/_static/admin/dataverse-external-tools.tsv            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
index 4f4c29d0670..a20ab864d2a 100644
--- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
+++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
@@ -5,3 +5,4 @@ Binder	explore	dataset	Binder allows you to spin up custom computing environment
 File Previewers	explore	file	"A set of tools that display the content of files - including audio, html, `Hypothes.is <https://hypothes.is/>`_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers"
 Data Curation Tool	configure	file	"A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions."
 Ask the Data	query	file	Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. 
+TurboCurator by ICPSR configure dataset "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator <https://turbocurator.icpsr.umich.edu/tc/adminabout/>`_ page for more details on how it works and adding TurboCurator to your Dataverse installation."

From 1ea4db3f3c011dc8ea28d9eb656e423fdccfccd9 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 30 Jan 2024 13:02:34 -0500
Subject: [PATCH 519/546] a checklist for making a core field allowMultiples
 for the dev. guide #9634

---
 doc/sphinx-guides/source/developers/index.rst |  1 +
 .../source/developers/metadatablocksdev.rst   | 26 +++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 doc/sphinx-guides/source/developers/metadatablocksdev.rst

diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst
index 25fea138736..25007baf589 100755
--- a/doc/sphinx-guides/source/developers/index.rst
+++ b/doc/sphinx-guides/source/developers/index.rst
@@ -31,6 +31,7 @@ Developer Guide
    making-releases
    making-library-releases
    metadataexport
+   metadatablocksdev
    tools
    unf/index
    make-data-count
diff --git a/doc/sphinx-guides/source/developers/metadatablocksdev.rst b/doc/sphinx-guides/source/developers/metadatablocksdev.rst
new file mode 100644
index 00000000000..17093471467
--- /dev/null
+++ b/doc/sphinx-guides/source/developers/metadatablocksdev.rst
@@ -0,0 +1,26 @@
+===========================
+Metadata Blocks Development
+===========================
+
+.. contents:: |toctitle|
+    :local:
+
+Introduction
+------------
+
+The idea behind Metadata Blocks in Dataverse is to have everything about the supported metadata fields configurable and customizable. Ideally, this should be accomplished by simply re-importing the updated tsv for the block via the API. In practice, when it comes to the core blocks that are distributed with Dataverse - such as the Citation and Social Science blocks - unfortunately, many dependencies exist in various parts of Dataverse, primarily import and export subsystems, on many specific fields being configured a certain way. This means that code changes may be required whenever a field from one of these core blocks is modified. 
+
+Making a Field Multiple
+-----------------------
+
+Back in 2023, in order to accommodate specific needs of some community member institutions a few fields from Citation and Social Science were changed to support multiple values. (For example, the ``alternativeTitle`` field from the Citation block.) A number of code changes had to be made to accommodate this, plus a number of changes in the sample metadata files that are maintained in the Dataverse code tree. The checklist below is to help another developer should a similar change become necessary in the future. Note that some of the steps below may not apply 1:1 to a different metadata field, depending on how it is exported and imported in various formats by Dataverse. It may help to consult the PR `#9440 <https://github.com/IQSS/dataverse/pull/9440/files>`_ as a specific example of the changes that had to be made for the ``alternativeTitle`` field. 
+
+- Change the value from ``FALSE`` to ``TRUE`` in the ``alowmultiples`` column of the .tsv file for the block (obviously).
+- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/9.3.0/schema.xml`` as of writing this).
+- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (you may be able to use the change in the PR above as a model.)
+- Modify the DDI export utility (``DdiExportUtil.java``).
+- Modify the OpenAire export utility (``OpenAireExportUtil.java``).
+- Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests).
+- Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``.
+- Make sure all the automated Unit and Integration tests are passing.
+- Write a short release note to announce the change in the upcoming release.

From 2eeda3d910ed128176c75b290c651252722dd919 Mon Sep 17 00:00:00 2001
From: Don Sizemore <don.sizemore@github.com>
Date: Tue, 30 Jan 2024 13:08:58 -0500
Subject: [PATCH 520/546] add sleep to SwordIT per qqmyers

---
 src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java
index 39156f1c59b..4df6c89411d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java
@@ -855,7 +855,7 @@ public void testDeleteFiles() {
         List<String> oneFileLeftInV2Draft = statement3.getBody().xmlPath().getList("feed.entry.id");
         logger.info("Number of files remaining in this post version 1 draft:" + oneFileLeftInV2Draft.size());
         assertEquals(1, oneFileLeftInV2Draft.size());
-
+        UtilIT.sleepForLock(datasetPersistentId, "EditInProgress", apiToken, UtilIT.MAXIMUM_PUBLISH_LOCK_DURATION);
         Response deleteIndex1b = UtilIT.deleteFile(Integer.parseInt(index1b), apiToken);
         deleteIndex1b.then().assertThat()
                 .statusCode(NO_CONTENT.getStatusCode());

From e4776101e8507a4b470b58ec70e90046516e4fa4 Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Tue, 30 Jan 2024 13:16:11 -0500
Subject: [PATCH 521/546] linked the dev. checklist in the metadata
 customization section of the admin guide. #9634

---
 doc/sphinx-guides/source/admin/metadatacustomization.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index 4f737bd730b..36956567a7d 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -648,6 +648,11 @@ Alternatively, you are welcome to request "edit" access to this "Tips for Datave
 
 The thinking is that the tips can become issues and the issues can eventually be worked on as features to improve the Dataverse Software metadata system.
 
+Development Tasks Specific to Changing Fields in Core Metadata Blocks
+---------------------------------------------------------------------
+
+When it comes to the fields from the core blocks that are distributed with Dataverse (such as Citation and Social Science blocks), code dependencies may exist in Dataverse, primarily in the Import and Export subsystems, on these fields being configured a certain way. So, if it becomes necessary to modify one of such core fields (a real life example is making a single value-only field support multiple values), code changes may be necessary to accompany the change in the block tsv, plus some sample and test files maintained in the Dataverse source tree will need to be adjusted accordingly. An example of a checklist of such tasks is provided in the Development Guide, please see the :doc:`/developers/metadatablocksdev` section.
+
 Footnotes
 ---------
 

From d960b980f926ba3e1d8ed0336ef3d541ddc6fb50 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 30 Jan 2024 16:01:55 -0500
Subject: [PATCH 522/546] #9748 comment out disabled test

---
 src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
index 6f0aa499dd1..2c96ce96dea 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
@@ -432,6 +432,7 @@ public void createToolShellScript() {
     @Disabled
     @Test
     public void createToolDataExplorer() {
+    /*    
         JsonObjectBuilder job = Json.createObjectBuilder();
         job.add("displayName", "Data Explorer");
         job.add("description", "");
@@ -466,6 +467,7 @@ public void createToolDataExplorer() {
         Response deleteExternalTool = UtilIT.deleteExternalTool(toolId);
         deleteExternalTool.then().assertThat()
                 .statusCode(OK.getStatusCode());
+        */
     }
 
     // both preview and explore

From 9b0a3cf2f0c5a6337aaed925ff640651fecf6116 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 30 Jan 2024 16:50:07 -0500
Subject: [PATCH 523/546] rewrite demo page as a tutorial #10238

Also, explain how to create a persona and some basic config.
---
 .../source/container/running/demo.rst         | 169 +++++++++++-------
 docker/compose/demo/compose.yml               |   4 +
 .../scripts/bootstrap/demo/init.sh            |  13 ++
 3 files changed, 126 insertions(+), 60 deletions(-)
 create mode 100644 modules/container-configbaker/scripts/bootstrap/demo/init.sh

diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst
index 5eda108c842..4e2a9db3f48 100644
--- a/doc/sphinx-guides/source/container/running/demo.rst
+++ b/doc/sphinx-guides/source/container/running/demo.rst
@@ -1,7 +1,7 @@
 Demo or Evaluation
 ==================
 
-If you would like to demo or evaluate Dataverse running in containers, you're in the right place. Your feedback is extremely valuable to us! To let us know what you think, please see :ref:`helping-containers`.
+In the following tutorial we'll walk through spinning up Dataverse in containers for demo or evaluation purposes.
 
 .. contents:: |toctitle|
 	:local:
@@ -9,6 +9,8 @@ If you would like to demo or evaluate Dataverse running in containers, you're in
 Quickstart
 ----------
 
+First, let's confirm that we can get Dataverse running on your system.
+
 - Download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>`
 - Run ``docker compose up`` in the directory where you put ``compose.yml``
 - Visit http://localhost:8080 and try logging in:
@@ -16,106 +18,138 @@ Quickstart
   - username: dataverseAdmin
   - password: admin1
 
-Hardware and Software Requirements
------------------------------------
+If you can log in, great! Please continue through the tutorial. If you have any trouble, please consult the sections below on troubleshooting and getting help.
 
-- 8 GB RAM (if not much else is running)
-- Mac, Linux, or Windows (experimental)
-- Docker
+Stopping and Starting the Containers
+------------------------------------
 
-Windows support is experimental but we are very interested in supporting Windows better. Please report bugs (see :ref:`helping-containers`).
+Let's practice stopping the containers and starting them up again. Your data, stored in a directory called ``data``, will remain intact
 
-Tags and Versions
------------------
+To stop the containers hit ``Ctrl-c`` (hold down the ``Ctrl`` key and then hit the ``c`` key).
 
-The compose file references a tag called "alpha", which corresponds to the latest released version of Dataverse. This means that if a release of Dataverse comes out while you are demo'ing or evaluating, the version of Dataverse you are using could change. We are aware that there is a desire for tags that correspond to versions to ensure consistency. You are welcome to join `the discussion <https://dataverse.zulipchat.com/#narrow/stream/375812-containers/topic/tagging.20images.20with.20versions/near/366600747>`_ and otherwise get in touch (see :ref:`helping-containers`). For more on tags, see :ref:`supported-image-tags-app`.
+To start the containers, run ``docker compose up``.
 
-Once Dataverse is running, you can check which version you have through the normal methods:
+Deleting Data and Starting Over
+-------------------------------
 
-- Check the bottom right in a web browser.
-- Check http://localhost:8080/api/info/version via API.
+Again, data related to your Dataverse installation such as the database is stored in a directory called ``data`` that gets created in the directory where you ran ``docker compose`` commands.
 
-About the Containers
---------------------
+You may reach a point during your demo or evaluation that you'd like to start over with a fresh database. Simply make sure the containers are not running and then remove the ``data`` directory. Now, as before, you can run ``docker compose up`` to spin up the containers.
 
-If you run ``docker ps``, you'll see that multiple containers are spun up in a demo or evaluation. Here are the most important ones:
+Configuring Dataverse
+---------------------
 
-- dataverse
-- postgres
-- solr
-- smtp
-- bootstrap
+Now that you are familiar with the basics of running Dataverse in containers, let's move on to configuration.
 
-Most are self-explanatory, and correspond to components listed under :doc:`/installation/prerequisites` in the (traditional) Installation Guide, but "bootstrap" refers to :doc:`../configbaker-image`.
+Start Fresh
++++++++++++
 
-Additional containers are used in development (see :doc:`../dev-usage`), but for the purposes of a demo or evaluation, fewer moving (sometimes pointy) parts are included.
+For this configuration exercise, please start fresh by stopping all containers and removing the ``data`` directory.
 
-Security
---------
+Change the Site URL
++++++++++++++++++++
 
-Please be aware that for now, the "dev" persona is used to bootstrap Dataverse, which means that admin APIs are wide open (to allow developers to test them; see :ref:`securing-your-installation` for more on API blocking), the "create user" key is set to a default value, etc. You can inspect the dev person `on GitHub <https://github.com/IQSS/dataverse/blob/master/modules/container-configbaker/scripts/bootstrap/dev/init.sh>`_ (look for ``--insecure``).
+Edit ``compose.yml`` and change ``_CT_DATAVERSE_SITEURL`` to the URL you plan to use for your installation.
 
-We plan to ship a "demo" persona but it is not ready yet. See also :ref:`configbaker-personas`.
+(You can read more about this setting at :ref:`dataverse.siteUrl`.)
 
-Common Operations
------------------
+This is an example of setting an environment variable to configure Dataverse.
 
-Starting the Containers
-+++++++++++++++++++++++
+Create and Run a Demo Persona
++++++++++++++++++++++++++++++
 
-First, download :download:`compose.yml <../../../../../docker/compose/demo/compose.yml>` and place it somewhere you'll remember.
+Previously we used the "dev" persona to bootstrap Dataverse, but for security reasons, we should create a persona more suited to demos and evaluations.
 
-Then, run ``docker compose up`` in the directory where you put ``compose.yml``
+Edit the ``compose.yml`` file and look for the following section.
 
-Starting the containers for the first time involves a bootstrap process. You should see "have a nice day" output at the end.
+.. code-block:: bash
 
-Stopping the Containers
-+++++++++++++++++++++++
+  bootstrap:
+    container_name: "bootstrap"
+    image: gdcc/configbaker:alpha
+    restart: "no"
+    command:
+      - bootstrap.sh
+      - dev
+      #- demo
+    #volumes:
+    #  - ./demo:/scripts/bootstrap/demo
+    networks:
+      - dataverse
 
-You might want to stop the containers if you aren't using them. Hit ``Ctrl-c`` (hold down the ``Ctrl`` key and then hit the ``c`` key).
+Comment out "dev" and uncomment "demo".
 
-You data is still intact and you can start the containers again with ``docker compose up``.
+Uncomment the "volumes" section.
 
-Deleting the Containers
-+++++++++++++++++++++++
+Create a directory called "demo" and copy :download:`init.sh <../../../../../modules/container-configbaker/scripts/bootstrap/demo/init.sh>` into it. You are welcome to edit this demo init script, customizing the final message, for example.
 
-If you no longer need the containers because your demo or evaluation is finished and you want to reclaim disk space, run ``docker compose down`` in the directory where you put ``compose.yml``.
+Now run ``docker compose up``. The "bootstrap" container should exit with the message from the init script and Dataverse should be running on http://localhost:8080 as before during the quickstart exercise.
 
-Deleting the Data Directory
-+++++++++++++++++++++++++++
+One of the main differences between the "dev" persona and our new "demo" persona is that we are now running the setup-all script without the ``--insecure`` flag. This makes our installation more secure, though it does block "admin" APIs that are useful for configuration. 
 
-Data related to the Dataverse containers is placed in a directory called ``data`` next to the ``compose.yml`` file. If you are finished with your demo or evaluation or you want to start fresh, simply delete this directory.
+Set DOI Provider to FAKE
+++++++++++++++++++++++++
 
-Configuration
--------------
+For the purposes of a demo, we'll use the "FAKE" DOI provider. (For more on this and related settings, see :ref:`pids-configuration` in the Installation Guide.) Without this step, you won't be able to create or publish datasets.
 
-Configuration is described in greater detail under :doc:`/installation/config` in the Installation Guide, but there are some specifics to running in containers you should know about.
+Run the following command. (In this context, "dataverse" is the name of the running container.)
 
-.. _configbaker-personas:
+``docker exec -it dataverse curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE``
 
-Personas
-++++++++
+This is an example of configuring a database setting, which you can read more about at :ref:`database-settings` in the Installation Guide.
 
-When the containers are bootstrapped, the "dev" persona is used. In the future we plan to add a "demo" persona that is more suited to demo and evaluation use cases.
+Smoke Test
+----------
 
-Database Settings
-+++++++++++++++++
+At this point, please try some basic operations within your installation, such as:
 
-Updating database settings is the same as described under :ref:`database-settings` in the Installation Guide.
+- logging in as dataverseAdmin
+- publishing the "root" collection (dataverse)
+- creating a collection
+- creating a dataset
+- uploading a data file
+- publishing the dataset
 
-MPCONFIG Options
-++++++++++++++++
+About the Containers
+--------------------
 
-The compose file contains an ``environment`` section with various MicroProfile Config (MPCONFIG) options. You can experiment with this by adding ``DATAVERSE_VERSION: foobar`` to change the (displayed) version of Dataverse to "foobar".
+Container List
+++++++++++++++
 
-JVM Options
-+++++++++++
+If you run ``docker ps``, you'll see that multiple containers are spun up in a demo or evaluation. Here are the most important ones:
 
-JVM options are not especially easy to change in the container. The general process is to get a shell on the "dataverse" container, change the settings, and then stop and start the containers. See :ref:`jvm-options` for more.
+- dataverse
+- postgres
+- solr
+- smtp
+- bootstrap
+
+Most are self-explanatory, and correspond to components listed under :doc:`/installation/prerequisites` in the (traditional) Installation Guide, but "bootstrap" refers to :doc:`../configbaker-image`.
+
+Additional containers are used in development (see :doc:`../dev-usage`), but for the purposes of a demo or evaluation, fewer moving (sometimes pointy) parts are included.
+
+Tags and Versions
++++++++++++++++++
+
+The compose file references a tag called "alpha", which corresponds to the latest released version of Dataverse. This means that if a release of Dataverse comes out while you are demo'ing or evaluating, the version of Dataverse you are using could change if you do a ``docker pull``. We are aware that there is a desire for tags that correspond to versions to ensure consistency. You are welcome to join `the discussion <https://dataverse.zulipchat.com/#narrow/stream/375812-containers/topic/tagging.20images.20with.20versions/near/366600747>`_ and otherwise get in touch (see :ref:`helping-containers`). For more on tags, see :ref:`supported-image-tags-app`.
+
+Once Dataverse is running, you can check which version you have through the normal methods:
+
+- Check the bottom right in a web browser.
+- Check http://localhost:8080/api/info/version via API.
 
 Troubleshooting
 ---------------
 
+Hardware and Software Requirements
+++++++++++++++++++++++++++++++++++
+
+- 8 GB RAM (if not much else is running)
+- Mac, Linux, or Windows (experimental)
+- Docker
+
+Windows support is experimental but we are very interested in supporting Windows better. Please report bugs (see :ref:`helping-containers`).
+
 Bootstrapping Did Not Complete
 ++++++++++++++++++++++++++++++
 
@@ -126,6 +160,21 @@ In the compose file, try increasing the timeout in the bootstrap container by ad
    environment:
      - TIMEOUT=10m
 
+Wrapping Up
+-----------
+
+Deleting the Containers and Data
+++++++++++++++++++++++++++++++++
+
+If you no longer need the containers because your demo or evaluation is finished and you want to reclaim disk space, run ``docker compose down`` in the directory where you put ``compose.yml``.
+
+You might also want to delete the ``data`` directory, as described above.
+
+Giving Feedback
+---------------
+
+Your feedback is extremely valuable to us! To let us know what you think, please see :ref:`helping-containers`.
+
 Getting Help
 ------------
 
diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml
index 3817921f10a..a262f43006a 100644
--- a/docker/compose/demo/compose.yml
+++ b/docker/compose/demo/compose.yml
@@ -9,6 +9,7 @@ services:
     restart: on-failure
     user: payara
     environment:
+      _CT_DATAVERSE_SITEURL: "https://demo.example.org"
       DATAVERSE_DB_HOST: postgres
       DATAVERSE_DB_PASSWORD: secret
       DATAVERSE_DB_USER: dataverse
@@ -45,6 +46,9 @@ services:
     command:
       - bootstrap.sh
       - dev
+      #- demo
+    #volumes:
+    #  - ./demo:/scripts/bootstrap/demo
     networks:
       - dataverse
 
diff --git a/modules/container-configbaker/scripts/bootstrap/demo/init.sh b/modules/container-configbaker/scripts/bootstrap/demo/init.sh
new file mode 100644
index 00000000000..0e9be7ffef5
--- /dev/null
+++ b/modules/container-configbaker/scripts/bootstrap/demo/init.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# Set some defaults as documented
+DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"}
+export DATAVERSE_URL
+
+echo "Running base setup-all.sh..."
+"${BOOTSTRAP_DIR}"/base/setup-all.sh -p=admin1 | tee /tmp/setup-all.sh.out
+
+echo ""
+echo "Done, your instance has been configured for demo or eval. Have a nice day!"

From bdc2c8e980ac9878ef472f874098e4f25431592b Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Wed, 31 Jan 2024 10:05:04 -0500
Subject: [PATCH 524/546] #9748 avoid issue with existing tools

---
 .../edu/harvard/iq/dataverse/api/TestApi.java | 26 +++++++++++++++++++
 .../iq/dataverse/api/ExternalToolsIT.java     | 15 ++++++-----
 .../edu/harvard/iq/dataverse/api/UtilIT.java  | 15 +++++++++++
 3 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java
index 87be1f14e05..10510013495 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java
@@ -71,5 +71,31 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que
             return wr.getResponse();
         }
     }
+    
+    @Path("files/{id}/externalTool/{toolId}")
+    @GET
+    public Response getExternalToolForFileById(@PathParam("id") String idSupplied, @QueryParam("type") String typeSupplied, @PathParam("toolId") String toolId) {
+        ExternalTool.Type type;
+        try {
+            type = ExternalTool.Type.fromString(typeSupplied);
+        } catch (IllegalArgumentException ex) {
+            return error(BAD_REQUEST, ex.getLocalizedMessage());
+        }
+        try {
+            DataFile dataFile = findDataFileOrDie(idSupplied);
+            List<ExternalTool> datasetTools = externalToolService.findFileToolsByTypeAndContentType(type, dataFile.getContentType());
+            for (ExternalTool tool : datasetTools) {
+                ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey());
+                ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null);
+                JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler);
+                if (externalToolService.meetsRequirements(tool, dataFile) && tool.getId().toString().equals(toolId)) {
+                    return ok(toolToJson);
+                }
+            }
+            return error(BAD_REQUEST, "Could not find external tool with id of " + toolId);
+        } catch (WrappedResponse wr) {
+            return wr.getResponse();
+        }
+    }
 
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
index 2c96ce96dea..9a280f475a1 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java
@@ -101,7 +101,7 @@ public void testFileLevelTool1() {
                 .statusCode(OK.getStatusCode())
                 .body("data.displayName", CoreMatchers.equalTo("AwesomeTool"));
 
-        long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id");
+        Long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id");
 
         Response getTool = UtilIT.getExternalTool(toolId);
         getTool.prettyPrint();
@@ -115,14 +115,17 @@ public void testFileLevelTool1() {
                 .statusCode(BAD_REQUEST.getStatusCode())
                 .body("message", CoreMatchers.equalTo("Type must be one of these values: [explore, configure, preview, query]."));
 
-        Response getExternalToolsForTabularFiles = UtilIT.getExternalToolsForFile(tabularFileId.toString(), "explore", apiToken);
+       // Getting tool by tool Id to avoid issue where there are existing tools
+        String toolIdString = toolId.toString();
+        Response getExternalToolsForTabularFiles = UtilIT.getExternalToolForFileById(tabularFileId.toString(), "explore", apiToken, toolIdString);
         getExternalToolsForTabularFiles.prettyPrint();
+        
         getExternalToolsForTabularFiles.then().assertThat()
                 .statusCode(OK.getStatusCode())
-                .body("data[0].displayName", CoreMatchers.equalTo("AwesomeTool"))
-                .body("data[0].scope", CoreMatchers.equalTo("file"))
-                .body("data[0].contentType", CoreMatchers.equalTo("text/tab-separated-values"))
-                .body("data[0].toolUrlWithQueryParams", CoreMatchers.equalTo("http://awesometool.com?fileid=" + tabularFileId + "&key=" + apiToken));
+                .body("data.displayName", CoreMatchers.equalTo("AwesomeTool"))
+                .body("data.scope", CoreMatchers.equalTo("file"))
+                .body("data.contentType", CoreMatchers.equalTo("text/tab-separated-values"))
+                .body("data.toolUrlWithQueryParams", CoreMatchers.equalTo("http://awesometool.com?fileid=" + tabularFileId + "&key=" + apiToken));
 
         Response getExternalToolsForJuptyerNotebooks = UtilIT.getExternalToolsForFile(jupyterNotebookFileId.toString(), "explore", apiToken);
         getExternalToolsForJuptyerNotebooks.prettyPrint();
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
index 6af3f8a0a09..ec41248a65f 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -2354,6 +2354,21 @@ static Response getExternalToolsForFile(String idOrPersistentIdOfFile, String ty
         }
         return requestSpecification.get("/api/admin/test/files/" + idInPath + "/externalTools?type=" + type + optionalQueryParam);
     }
+    
+    static Response getExternalToolForFileById(String idOrPersistentIdOfFile, String type, String apiToken, String toolId) {
+        String idInPath = idOrPersistentIdOfFile; // Assume it's a number.
+        String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
+        if (!NumberUtils.isCreatable(idOrPersistentIdOfFile)) {
+            idInPath = ":persistentId";
+            optionalQueryParam = "&persistentId=" + idOrPersistentIdOfFile;
+        }
+        RequestSpecification requestSpecification = given();
+        if (apiToken != null) {
+            requestSpecification = given()
+                    .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken);
+        }
+        return requestSpecification.get("/api/admin/test/files/" + idInPath + "/externalTool/" + toolId + "?type=" + type + optionalQueryParam);
+    }
 
     static Response submitFeedback(JsonObjectBuilder job) {
         return given()

From 7d537aa394c447562820cf0343fd6ec2d8a760ca Mon Sep 17 00:00:00 2001
From: Leonid Andreev <leonid@hmdc.harvard.edu>
Date: Wed, 31 Jan 2024 17:45:01 -0500
Subject: [PATCH 525/546] simplified/reorganized the new dev. checklist for
 making a core field multiple #9634

---
 .../source/admin/metadatacustomization.rst    | 19 +++++++++++++-
 doc/sphinx-guides/source/developers/index.rst |  1 -
 .../source/developers/metadatablocksdev.rst   | 26 -------------------
 3 files changed, 18 insertions(+), 28 deletions(-)
 delete mode 100644 doc/sphinx-guides/source/developers/metadatablocksdev.rst

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index 36956567a7d..f97b222b51f 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -651,7 +651,24 @@ The thinking is that the tips can become issues and the issues can eventually be
 Development Tasks Specific to Changing Fields in Core Metadata Blocks
 ---------------------------------------------------------------------
 
-When it comes to the fields from the core blocks that are distributed with Dataverse (such as Citation and Social Science blocks), code dependencies may exist in Dataverse, primarily in the Import and Export subsystems, on these fields being configured a certain way. So, if it becomes necessary to modify one of such core fields (a real life example is making a single value-only field support multiple values), code changes may be necessary to accompany the change in the block tsv, plus some sample and test files maintained in the Dataverse source tree will need to be adjusted accordingly. An example of a checklist of such tasks is provided in the Development Guide, please see the :doc:`/developers/metadatablocksdev` section.
+When it comes to the fields from the core blocks that are distributed with Dataverse (such as Citation, Social Science and Geospatial blocks), code dependencies may exist in Dataverse, primarily in the Import and Export subsystems, on these fields being configured a certain way. So, if it becomes necessary to modify one of such core fields, code changes may be necessary to accompany the change in the block tsv, plus some sample and test files maintained in the Dataverse source tree will need to be adjusted accordingly. 
+
+Making a Field Multi-Valued
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As a recent real life example, a few fields from the Citation and Social Science block were changed to support multiple values, in order to accommodate specific needs of some community member institutions. A PR for one of these fields, ``alternativeTitle`` from the Citation block is linked below. Each time a number of code changes, plus some changes in the sample metadata files in the Dataverse code tree had to be made. The checklist below is to help another developer in the event that a similar change becomes necessary in the future. Note that some of the steps below may not apply 1:1 to a different metadata field, depending on how it is exported and imported in various formats by Dataverse. It may help to consult the PR `#9440 <https://github.com/IQSS/dataverse/pull/9440/files>`_ as a specific example of the changes that had to be made for the ``alternativeTitle`` field. 
+
+- Change the value from ``FALSE`` to ``TRUE`` in the ``alowmultiples`` column of the .tsv file for the block.
+- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/9.3.0/schema.xml`` as of writing this).
+- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (you may be able to use the change in the PR above as a model.)
+- Modify the DDI export utility (``DdiExportUtil.java``).
+- Modify the OpenAire export utility (``OpenAireExportUtil.java``).
+- Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests).
+- Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``.
+- Make sure all the automated Unit and Integration tests are passing.
+- Write a short release note to announce the change in the upcoming release.
+- Make a Pull Request. 
+
 
 Footnotes
 ---------
diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst
index 25007baf589..25fea138736 100755
--- a/doc/sphinx-guides/source/developers/index.rst
+++ b/doc/sphinx-guides/source/developers/index.rst
@@ -31,7 +31,6 @@ Developer Guide
    making-releases
    making-library-releases
    metadataexport
-   metadatablocksdev
    tools
    unf/index
    make-data-count
diff --git a/doc/sphinx-guides/source/developers/metadatablocksdev.rst b/doc/sphinx-guides/source/developers/metadatablocksdev.rst
deleted file mode 100644
index 17093471467..00000000000
--- a/doc/sphinx-guides/source/developers/metadatablocksdev.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-===========================
-Metadata Blocks Development
-===========================
-
-.. contents:: |toctitle|
-    :local:
-
-Introduction
-------------
-
-The idea behind Metadata Blocks in Dataverse is to have everything about the supported metadata fields configurable and customizable. Ideally, this should be accomplished by simply re-importing the updated tsv for the block via the API. In practice, when it comes to the core blocks that are distributed with Dataverse - such as the Citation and Social Science blocks - unfortunately, many dependencies exist in various parts of Dataverse, primarily import and export subsystems, on many specific fields being configured a certain way. This means that code changes may be required whenever a field from one of these core blocks is modified. 
-
-Making a Field Multiple
------------------------
-
-Back in 2023, in order to accommodate specific needs of some community member institutions a few fields from Citation and Social Science were changed to support multiple values. (For example, the ``alternativeTitle`` field from the Citation block.) A number of code changes had to be made to accommodate this, plus a number of changes in the sample metadata files that are maintained in the Dataverse code tree. The checklist below is to help another developer should a similar change become necessary in the future. Note that some of the steps below may not apply 1:1 to a different metadata field, depending on how it is exported and imported in various formats by Dataverse. It may help to consult the PR `#9440 <https://github.com/IQSS/dataverse/pull/9440/files>`_ as a specific example of the changes that had to be made for the ``alternativeTitle`` field. 
-
-- Change the value from ``FALSE`` to ``TRUE`` in the ``alowmultiples`` column of the .tsv file for the block (obviously).
-- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/9.3.0/schema.xml`` as of writing this).
-- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (you may be able to use the change in the PR above as a model.)
-- Modify the DDI export utility (``DdiExportUtil.java``).
-- Modify the OpenAire export utility (``OpenAireExportUtil.java``).
-- Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests).
-- Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``.
-- Make sure all the automated Unit and Integration tests are passing.
-- Write a short release note to announce the change in the upcoming release.

From ad12c7f2ddaf4f6fb1ec5023845d98092df0da47 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 1 Feb 2024 12:28:06 -0500
Subject: [PATCH 526/546] Apply suggestions from code review

---
 .../source/admin/metadatacustomization.rst             | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index f97b222b51f..841dfd8b3cd 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -658,16 +658,16 @@ Making a Field Multi-Valued
 
 As a recent real life example, a few fields from the Citation and Social Science block were changed to support multiple values, in order to accommodate specific needs of some community member institutions. A PR for one of these fields, ``alternativeTitle`` from the Citation block is linked below. Each time a number of code changes, plus some changes in the sample metadata files in the Dataverse code tree had to be made. The checklist below is to help another developer in the event that a similar change becomes necessary in the future. Note that some of the steps below may not apply 1:1 to a different metadata field, depending on how it is exported and imported in various formats by Dataverse. It may help to consult the PR `#9440 <https://github.com/IQSS/dataverse/pull/9440/files>`_ as a specific example of the changes that had to be made for the ``alternativeTitle`` field. 
 
-- Change the value from ``FALSE`` to ``TRUE`` in the ``alowmultiples`` column of the .tsv file for the block.
-- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/9.3.0/schema.xml`` as of writing this).
-- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (you may be able to use the change in the PR above as a model.)
+- Change the value from ``FALSE`` to ``TRUE`` in the ``allowmultiples`` column of the .tsv file for the block.
+- Change the value of the ``multiValued`` attribute for the search field in the Solr schema (``conf/solr/x.x.x/schema.xml``).
+- Modify the DDI import code (``ImportDDIServiceBean.java``) to support multiple values. (You may be able to use the change in the PR above as a model.)
 - Modify the DDI export utility (``DdiExportUtil.java``).
 - Modify the OpenAire export utility (``OpenAireExportUtil.java``).
 - Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests).
 - Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``.
-- Make sure all the automated Unit and Integration tests are passing.
+- Make sure all the automated unit and integration tests are passing.
 - Write a short release note to announce the change in the upcoming release.
-- Make a Pull Request. 
+- Make a pull request. 
 
 
 Footnotes

From e064313c4c11fbec2bf875d0f8dbe98b99013fca Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 1 Feb 2024 12:31:01 -0500
Subject: [PATCH 527/546] add refs to dev guide #9634

---
 doc/sphinx-guides/source/admin/metadatacustomization.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index 841dfd8b3cd..5bd28bfa103 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -665,8 +665,8 @@ As a recent real life example, a few fields from the Citation and Social Science
 - Modify the OpenAire export utility (``OpenAireExportUtil.java``).
 - Modify the following JSON source files in the Dataverse code tree to actually include multiple values for the field (two should be quite enough!): ``scripts/api/data/dataset-create-new-all-default-fields.json``, ``src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt``, ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json``. (These are used as examples for populating datasets via the import API and by the automated import and export code tests).
 - Similarly modify the following XML files that are used by the DDI export code tests: ``src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.xml`` and ``src/test/java/edu/harvard/iq/dataverse/export/ddi/exportfull.xml``.
-- Make sure all the automated unit and integration tests are passing.
-- Write a short release note to announce the change in the upcoming release.
+- Make sure all the automated unit and integration tests are passing. See :doc:`/developers/testing` in the Developer Guide.
+- Write a short release note to announce the change in the upcoming release. See :ref:`writing-release-note-snippets` in the Developer Guide.
 - Make a pull request. 
 
 

From 89739bc39542930546c807c2236033b7da790688 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Thu, 1 Feb 2024 16:37:58 -0500
Subject: [PATCH 528/546] use --insecure and secure later #10238

Using --insecure at first and then doing securing APIs, etc later
(like non --insecure does) seems like the best option for now.

It allows us to simplify the tutorial and set up an unblock key
for later use.
---
 .../source/container/running/demo.rst         | 96 +++++++++++++------
 .../scripts/bootstrap/demo/init.sh            | 30 +++++-
 2 files changed, 94 insertions(+), 32 deletions(-)

diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst
index 4e2a9db3f48..24027e677a1 100644
--- a/doc/sphinx-guides/source/container/running/demo.rst
+++ b/doc/sphinx-guides/source/container/running/demo.rst
@@ -36,27 +36,18 @@ Again, data related to your Dataverse installation such as the database is store
 
 You may reach a point during your demo or evaluation that you'd like to start over with a fresh database. Simply make sure the containers are not running and then remove the ``data`` directory. Now, as before, you can run ``docker compose up`` to spin up the containers.
 
-Configuring Dataverse
+Setting Up for a Demo
 ---------------------
 
-Now that you are familiar with the basics of running Dataverse in containers, let's move on to configuration.
+Now that you are familiar with the basics of running Dataverse in containers, let's move on to a better setup for a demo or evaluation.
 
-Start Fresh
-+++++++++++
-
-For this configuration exercise, please start fresh by stopping all containers and removing the ``data`` directory.
-
-Change the Site URL
-+++++++++++++++++++
-
-Edit ``compose.yml`` and change ``_CT_DATAVERSE_SITEURL`` to the URL you plan to use for your installation.
-
-(You can read more about this setting at :ref:`dataverse.siteUrl`.)
+Starting Fresh
+++++++++++++++
 
-This is an example of setting an environment variable to configure Dataverse.
+For this exercise, please start fresh by stopping all containers and removing the ``data`` directory.
 
-Create and Run a Demo Persona
-+++++++++++++++++++++++++++++
+Creating and Running a Demo Persona
++++++++++++++++++++++++++++++++++++
 
 Previously we used the "dev" persona to bootstrap Dataverse, but for security reasons, we should create a persona more suited to demos and evaluations.
 
@@ -83,36 +74,81 @@ Uncomment the "volumes" section.
 
 Create a directory called "demo" and copy :download:`init.sh <../../../../../modules/container-configbaker/scripts/bootstrap/demo/init.sh>` into it. You are welcome to edit this demo init script, customizing the final message, for example.
 
+Note that the init script contains a key for using the admin API once it is blocked. You should change it in the script from "unblockme" to something only you know.
+
 Now run ``docker compose up``. The "bootstrap" container should exit with the message from the init script and Dataverse should be running on http://localhost:8080 as before during the quickstart exercise.
 
 One of the main differences between the "dev" persona and our new "demo" persona is that we are now running the setup-all script without the ``--insecure`` flag. This makes our installation more secure, though it does block "admin" APIs that are useful for configuration. 
 
-Set DOI Provider to FAKE
-++++++++++++++++++++++++
+Smoke Testing
+-------------
+
+At this point, please try the following basic operations within your installation:
+
+- logging in as dataverseAdmin (password "admin1")
+- publishing the "root" collection (dataverse)
+- creating a collection
+- creating a dataset
+- uploading a data file
+- publishing the dataset
+
+If anything isn't working, please see the sections below on troubleshooting, giving feedback, and getting help.
+
+Further Configuration
+---------------------
+
+Now that we've verified through a smoke test that basic operations are working, let's configure our installation of Dataverse.
+
+Please refer to the :doc:`/installation/config` section of the Installation Guide for various configuration options.
 
-For the purposes of a demo, we'll use the "FAKE" DOI provider. (For more on this and related settings, see :ref:`pids-configuration` in the Installation Guide.) Without this step, you won't be able to create or publish datasets.
+Below we'll explain some specifics for configuration in containers.
 
-Run the following command. (In this context, "dataverse" is the name of the running container.)
+JVM Options/MicroProfile Config
++++++++++++++++++++++++++++++++
 
-``docker exec -it dataverse curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE``
+:ref:`jvm-options` can be configured under ``JVM_ARGS`` in the ``compose.yml`` file. Here's an example:
+
+.. code-block:: bash
 
-This is an example of configuring a database setting, which you can read more about at :ref:`database-settings` in the Installation Guide.
+    environment:
+      JVM_ARGS: -Ddataverse.files.storage-driver-id=file1
 
-Smoke Test
+Some JVM options can be configured as environment variables. For example, you can configure the database host like this:
+
+.. code-block:: bash
+
+    environment:
+      DATAVERSE_DB_HOST: postgres
+
+We are in the process of making more JVM options configurable as environment variables. Look for the term "MicroProfile Config" in under :doc:`/installation/config` in the Installation Guide to know if you can use them this way.
+
+Please note that for a few environment variables (the ones that start with ``%ct`` in :download:`microprofile-config.properties <../../../../../src/main/resources/META-INF/microprofile-config.properties>`), you have to prepend ``_CT_`` to make, for example, ``_CT_DATAVERSE_SITEURL``. We are working on a fix for this in https://github.com/IQSS/dataverse/issues/10285.
+
+There is a final way to configure JVM options that we plan to deprecate once all JVM options have been converted to MicroProfile Config. Look for "magic trick" under "tunables" at :doc:`../app-image` for more information.
+
+Database Settings
++++++++++++++++++
+
+Generally, you should be able to look at the list of :ref:`database-settings` and configure them but the "demo" persona above secured your installation to the point that you'll need an "unblock key" to access the "admin" API and change database settings.
+
+In the example below of configuring :ref:`:FooterCopyright` we use the default unblock key of "unblockme" but you should use the key you set above.
+
+``curl -X PUT -d ", My Org" "http://localhost:8080/api/admin/settings/:FooterCopyright?unblock-key=unblockme"``
+
+One you make this change it should be visible in the copyright in the bottom left of every page.
+
+Next Steps
 ----------
 
-At this point, please try some basic operations within your installation, such as:
+From here, you are encouraged to continue poking around, configuring, and testing. You probably spend a lot of time reading the :doc:`/installation/config` section of the Installation Guide.
 
-- logging in as dataverseAdmin
-- publishing the "root" collection (dataverse)
-- creating a collection
-- creating a dataset
-- uploading a data file
-- publishing the dataset
+Please consider giving feedback using the methods described below. Good luck with your demo!
 
 About the Containers
 --------------------
 
+Now that you've gone through the tutorial, you might be interested in the various containers you've spun up and what they do.
+
 Container List
 ++++++++++++++
 
diff --git a/modules/container-configbaker/scripts/bootstrap/demo/init.sh b/modules/container-configbaker/scripts/bootstrap/demo/init.sh
index 0e9be7ffef5..e8d1d07dd2d 100644
--- a/modules/container-configbaker/scripts/bootstrap/demo/init.sh
+++ b/modules/container-configbaker/scripts/bootstrap/demo/init.sh
@@ -2,12 +2,38 @@
 
 set -euo pipefail
 
-# Set some defaults as documented
+# Set some defaults
 DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"}
 export DATAVERSE_URL
 
+BLOCKED_API_KEY=${BLOCKED_API_KEY:-"unblockme"}
+export BLOCKED_API_KEY
+
+# --insecure is used so we can configure a few things but
+# later in this script we'll apply the changes as if we had
+# run the script without --insecure.
 echo "Running base setup-all.sh..."
-"${BOOTSTRAP_DIR}"/base/setup-all.sh -p=admin1 | tee /tmp/setup-all.sh.out
+"${BOOTSTRAP_DIR}"/base/setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out
+
+echo ""
+echo "Setting DOI provider to \"FAKE\"..."
+curl -sS -X PUT -d FAKE "${DATAVERSE_URL}/api/admin/settings/:DoiProvider"
+
+echo ""
+echo "Revoke the key that allows for creation of builtin users..."
+curl -sS -X DELETE "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY"
+
+echo ""
+echo "Set key for accessing blocked API endpoints..."
+curl -sS -X PUT -d "$BLOCKED_API_KEY" "${DATAVERSE_URL}/api/admin/settings/:BlockedApiKey"
+
+echo ""
+echo "Set policy to only allow access to admin APIs with with a key..."
+curl -sS -X PUT -d unblock-key "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy"
+
+echo ""
+echo "Block admin and other sensitive API endpoints..."
+curl -sS -X PUT -d 'admin,builtin-users' "${DATAVERSE_URL}/api/admin/settings/:BlockedApiEndpoints"
 
 echo ""
 echo "Done, your instance has been configured for demo or eval. Have a nice day!"

From c8f71f16d41c83586bd4572fd2e4bcf9f8b3962b Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva
 <142103991+jp-tosca@users.noreply.github.com>
Date: Fri, 2 Feb 2024 16:15:17 -0500
Subject: [PATCH 529/546] Update metadatacustomization.rst

The /tree seems to be just a reference for the GitHub URL but the project doesn't have a "tree" directory so probably would be better or less confusing to reference the root of the project.

Also the property files are in a different location than the one specified on the Documentation.
---
 doc/sphinx-guides/source/admin/metadatacustomization.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index 5bd28bfa103..c9cb3c47f85 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -37,8 +37,8 @@ tab-separated value (TSV). [1]_\ :sup:`,`\  [2]_ While it is technically
 possible to define more than one metadata block in a TSV file, it is
 good organizational practice to define only one in each file.
 
-The metadata block TSVs shipped with the Dataverse Software are in `/tree/develop/scripts/api/data/metadatablocks
-<https://github.com/IQSS/dataverse/tree/develop/scripts/api/data/metadatablocks>`__ and the corresponding ResourceBundle property files `/tree/develop/src/main/java <https://github.com/IQSS/dataverse/tree/develop/src/main/java>`__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets
+The metadata block TSVs shipped with the Dataverse Software are in `/src/scripts/api/data/metadatablocks
+<https://github.com/IQSS/dataverse/tree/develop/scripts/api/data/metadatablocks>`__ and the corresponding ResourceBundle property files `/src/main/java/propertyFiles <https://github.com/IQSS/dataverse/tree/develop/src/main/java/propertyFiles>`__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets
 document <https://docs.google.com/spreadsheets/d/13HP-jI_cwLDHBetn9UKTREPJ_F4iHdAvhjmlvmYdSSw/edit#gid=0>`__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet.
 
 About the metadata block TSV

From 2978080e5299d91d340ff926ec2a3a33a81b40df Mon Sep 17 00:00:00 2001
From: qqmyers <qqmyers@hotmail.com>
Date: Fri, 2 Feb 2024 16:50:20 -0500
Subject: [PATCH 530/546] Update metadatacustomization.rst

---
 doc/sphinx-guides/source/admin/metadatacustomization.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index c9cb3c47f85..78eadd9b2ce 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -37,8 +37,8 @@ tab-separated value (TSV). [1]_\ :sup:`,`\  [2]_ While it is technically
 possible to define more than one metadata block in a TSV file, it is
 good organizational practice to define only one in each file.
 
-The metadata block TSVs shipped with the Dataverse Software are in `/src/scripts/api/data/metadatablocks
-<https://github.com/IQSS/dataverse/tree/develop/scripts/api/data/metadatablocks>`__ and the corresponding ResourceBundle property files `/src/main/java/propertyFiles <https://github.com/IQSS/dataverse/tree/develop/src/main/java/propertyFiles>`__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets
+The metadata block TSVs shipped with the Dataverse Software are in `/scripts/api/data/metadatablocks
+<https://github.com/IQSS/dataverse/tree/develop/scripts/api/data/metadatablocks>`__ with the corresponding ResourceBundle property files in `/src/main/java/propertyFiles <https://github.com/IQSS/dataverse/tree/develop/src/main/java/propertyFiles>`__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets
 document <https://docs.google.com/spreadsheets/d/13HP-jI_cwLDHBetn9UKTREPJ_F4iHdAvhjmlvmYdSSw/edit#gid=0>`__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet.
 
 About the metadata block TSV

From 24daf553ecdbc7811737da58d6a41b6294a98434 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva
 <142103991+jp-tosca@users.noreply.github.com>
Date: Fri, 2 Feb 2024 16:53:24 -0500
Subject: [PATCH 531/546] Update metadatacustomization.rst

As @qqmyers pointed these are not on /src
---
 doc/sphinx-guides/source/admin/metadatacustomization.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index c9cb3c47f85..4920859d716 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -37,7 +37,7 @@ tab-separated value (TSV). [1]_\ :sup:`,`\  [2]_ While it is technically
 possible to define more than one metadata block in a TSV file, it is
 good organizational practice to define only one in each file.
 
-The metadata block TSVs shipped with the Dataverse Software are in `/src/scripts/api/data/metadatablocks
+The metadata block TSVs shipped with the Dataverse Software are in `/scripts/api/data/metadatablocks
 <https://github.com/IQSS/dataverse/tree/develop/scripts/api/data/metadatablocks>`__ and the corresponding ResourceBundle property files `/src/main/java/propertyFiles <https://github.com/IQSS/dataverse/tree/develop/src/main/java/propertyFiles>`__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets
 document <https://docs.google.com/spreadsheets/d/13HP-jI_cwLDHBetn9UKTREPJ_F4iHdAvhjmlvmYdSSw/edit#gid=0>`__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet.
 

From 7c248239c260e56c2c7e162b0ddfafda1af7d9f6 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 2 Feb 2024 19:12:59 -0500
Subject: [PATCH 532/546] Fix line break

---
 doc/sphinx-guides/source/admin/metadatacustomization.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index 7d6e0c4c5c1..f518c7eb802 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -40,7 +40,6 @@ good organizational practice to define only one in each file.
 The metadata block TSVs shipped with the Dataverse Software are in `/scripts/api/data/metadatablocks
 
 <https://github.com/IQSS/dataverse/tree/develop/scripts/api/data/metadatablocks>`__ with the corresponding ResourceBundle property files in `/src/main/java/propertyFiles <https://github.com/IQSS/dataverse/tree/develop/src/main/java/propertyFiles>`__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets
-
 document <https://docs.google.com/spreadsheets/d/13HP-jI_cwLDHBetn9UKTREPJ_F4iHdAvhjmlvmYdSSw/edit#gid=0>`__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet.
 
 About the metadata block TSV

From 59f1560daa77404c602029e2112546b00f9f19f2 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Fri, 2 Feb 2024 19:16:02 -0500
Subject: [PATCH 533/546] Fix incorrect line break that cause build fail

---
 doc/sphinx-guides/source/admin/metadatacustomization.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst
index f518c7eb802..78eadd9b2ce 100644
--- a/doc/sphinx-guides/source/admin/metadatacustomization.rst
+++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst
@@ -38,7 +38,6 @@ possible to define more than one metadata block in a TSV file, it is
 good organizational practice to define only one in each file.
 
 The metadata block TSVs shipped with the Dataverse Software are in `/scripts/api/data/metadatablocks
-
 <https://github.com/IQSS/dataverse/tree/develop/scripts/api/data/metadatablocks>`__ with the corresponding ResourceBundle property files in `/src/main/java/propertyFiles <https://github.com/IQSS/dataverse/tree/develop/src/main/java/propertyFiles>`__ of the Dataverse Software GitHub repo. Human-readable copies are available in `this Google Sheets
 document <https://docs.google.com/spreadsheets/d/13HP-jI_cwLDHBetn9UKTREPJ_F4iHdAvhjmlvmYdSSw/edit#gid=0>`__ but they tend to get out of sync with the TSV files, which should be considered authoritative. The Dataverse Software installation process operates on the TSVs, not the Google spreadsheet.
 

From 77951683a2f495e04098125a81945dc076d80b4b Mon Sep 17 00:00:00 2001
From: raravumich <48064835+raravumich@users.noreply.github.com>
Date: Mon, 5 Feb 2024 10:33:46 -0500
Subject: [PATCH 534/546] added tabs

---
 .../source/_static/admin/dataverse-external-tools.tsv           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
index a20ab864d2a..05263498977 100644
--- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
+++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
@@ -5,4 +5,4 @@ Binder	explore	dataset	Binder allows you to spin up custom computing environment
 File Previewers	explore	file	"A set of tools that display the content of files - including audio, html, `Hypothes.is <https://hypothes.is/>`_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers"
 Data Curation Tool	configure	file	"A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions."
 Ask the Data	query	file	Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. 
-TurboCurator by ICPSR configure dataset "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator <https://turbocurator.icpsr.umich.edu/tc/adminabout/>`_ page for more details on how it works and adding TurboCurator to your Dataverse installation."
+TurboCurator by ICPSR  configure  dataset  "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator <https://turbocurator.icpsr.umich.edu/tc/adminabout/>`_ page for more details on how it works and adding TurboCurator to your Dataverse installation."

From 905c8cf906857feb2e7231f31c1a2e224b33d26b Mon Sep 17 00:00:00 2001
From: raravumich <48064835+raravumich@users.noreply.github.com>
Date: Mon, 5 Feb 2024 10:36:27 -0500
Subject: [PATCH 535/546] added correct tabs

---
 .../source/_static/admin/dataverse-external-tools.tsv           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
index 05263498977..10f9a6a6062 100644
--- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
+++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
@@ -5,4 +5,4 @@ Binder	explore	dataset	Binder allows you to spin up custom computing environment
 File Previewers	explore	file	"A set of tools that display the content of files - including audio, html, `Hypothes.is <https://hypothes.is/>`_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers"
 Data Curation Tool	configure	file	"A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions."
 Ask the Data	query	file	Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. 
-TurboCurator by ICPSR  configure  dataset  "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator <https://turbocurator.icpsr.umich.edu/tc/adminabout/>`_ page for more details on how it works and adding TurboCurator to your Dataverse installation."
+TurboCurator by ICPSR    configure    dataset    "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator <https://turbocurator.icpsr.umich.edu/tc/adminabout/>`_ page for more details on how it works and adding TurboCurator to your Dataverse installation."

From 5760c259ae493ce3670eefcd850480e5106133ef Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Mon, 5 Feb 2024 15:11:55 -0500
Subject: [PATCH 536/546] fix formatting #10279

---
 .../source/_static/admin/dataverse-external-tools.tsv           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
index 10f9a6a6062..c22392a7c5e 100644
--- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
+++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv
@@ -5,4 +5,4 @@ Binder	explore	dataset	Binder allows you to spin up custom computing environment
 File Previewers	explore	file	"A set of tools that display the content of files - including audio, html, `Hypothes.is <https://hypothes.is/>`_ annotations, images, PDF, Markdown, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers"
 Data Curation Tool	configure	file	"A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions."
 Ask the Data	query	file	Ask the Data is an experimental tool that allows you ask natural language questions about the data contained in Dataverse tables (tabular data). See the README.md file at https://github.com/IQSS/askdataverse/tree/main/askthedata for the instructions on adding Ask the Data to your Dataverse installation. 
-TurboCurator by ICPSR    configure    dataset    "TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI’s ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator <https://turbocurator.icpsr.umich.edu/tc/adminabout/>`_ page for more details on how it works and adding TurboCurator to your Dataverse installation."
+TurboCurator by ICPSR	configure	dataset	TurboCurator generates metadata improvements for title, description, and keywords. It relies on open AI's ChatGPT & ICPSR best practices. See the `TurboCurator Dataverse Administrator <https://turbocurator.icpsr.umich.edu/tc/adminabout/>`_ page for more details on how it works and adding TurboCurator to your Dataverse installation.

From a92560059ecd18a081a063a08f4c5a998fb1e3d4 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Mon, 5 Feb 2024 19:33:33 -0500
Subject: [PATCH 537/546] Fix to provide latest version metadata

---
 src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +-
 src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index ea74368d110..e3505cbbb33 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -778,7 +778,7 @@ public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @
     @Path("{id}/metadata")
     @Produces("application/ld+json, application/json-ld")
     public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers) {
-        return getVersionJsonLDMetadata(crc, id, DS_VERSION_DRAFT, uriInfo, headers);
+        return getVersionJsonLDMetadata(crc, id, DS_VERSION_LATEST, uriInfo, headers);
     }
 
     @PUT
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java
index 125753296a2..cd292a40a1e 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java
@@ -1202,7 +1202,7 @@ public void testGeospatialSearch() {
                                                                                         .add("value", "42.33661")
                                                                                         .add("typeClass", "primitive")
                                                                                         .add("multiple", false)
-                                                                                        .add("typeName", "southLongitude")
+                                                                                        .add("typeName", "southLongitud e")
                                                                         )
                                                                         .add("eastLongitude",
                                                                                 Json.createObjectBuilder()

From ae9b74fd4592103e1c8135655d312bb7ef0c24d7 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 6 Feb 2024 09:27:09 -0500
Subject: [PATCH 538/546] #10229 fix popup list

---
 src/main/java/edu/harvard/iq/dataverse/DataversePage.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
index 943a74327d5..3dbc22902b0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
@@ -362,7 +362,7 @@ public void initFeaturedDataverses() {
         List<Dataverse> featuredSource = new ArrayList<>();
         List<Dataverse> featuredTarget = new ArrayList<>();
         featuredSource.addAll(dataverseService.findAllPublishedByOwnerId(dataverse.getId()));
-        featuredSource.addAll(linkingService.findLinkingDataverses(dataverse.getId()));
+        featuredSource.addAll(linkingService.findLinkedDataverses(dataverse.getId()));
         List<DataverseFeaturedDataverse> featuredList = featuredDataverseService.findByDataverseId(dataverse.getId());
         for (DataverseFeaturedDataverse dfd : featuredList) {
             Dataverse fd = dfd.getFeaturedDataverse();

From 4309ab06308f1be2333dcf40bc0bda3c11022437 Mon Sep 17 00:00:00 2001
From: Stephen Kraffmiller <skraffmiller@hmdc.harvard.edu>
Date: Tue, 6 Feb 2024 09:34:01 -0500
Subject: [PATCH 539/546] #10229 add to error message

---
 src/main/java/propertyFiles/Bundle.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties
index 157f2ecaf54..f1c8381816c 100644
--- a/src/main/java/propertyFiles/Bundle.properties
+++ b/src/main/java/propertyFiles/Bundle.properties
@@ -875,7 +875,7 @@ dataverse.option.deleteDataverse=Delete Dataverse
 dataverse.publish.btn=Publish
 dataverse.publish.header=Publish Dataverse
 dataverse.nopublished=No Published Dataverses
-dataverse.nopublished.tip=In order to use this feature you must have at least one published dataverse.
+dataverse.nopublished.tip=In order to use this feature you must have at least one published or linked dataverse.
 dataverse.contact=Email Dataverse Contact
 dataverse.link=Link Dataverse
 dataverse.link.btn.tip=Link to Your Dataverse

From 2f7ce01fd67539a9213d87884dc229e689a055da Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 6 Feb 2024 10:38:44 -0500
Subject: [PATCH 540/546] Add to DatasetsIT testSemanticMetadataAPIs test cases
 for published and draft

---
 .../harvard/iq/dataverse/api/DatasetsIT.java  | 60 +++++++++++++++++--
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
index 6e6855306e4..e1c4b901116 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
@@ -3013,6 +3013,46 @@ public void testSemanticMetadataAPIs() {
         response = UtilIT.updateDatasetJsonLDMetadata(datasetId, apiToken, badTerms, false);
         response.then().assertThat().statusCode(BAD_REQUEST.getStatusCode());
 
+        
+        //We publish the dataset and dataverse      
+        UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode());    
+        UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode());
+        
+        //We check the version is published
+        response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiToken);
+        response.prettyPrint();
+        jsonLDString = getData(response.getBody().asString());
+        jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString);
+        String publishedVersion = jsonLDObject.getString("http://schema.org/version");
+        assertNotEquals("DRAFT", publishedVersion);
+
+        // Upload a file so a draft version is created
+        String pathToFile = "src/main/webapp/resources/images/cc0.png";
+        Response uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
+        uploadResponse.prettyPrint();
+        uploadResponse.then().assertThat().statusCode(OK.getStatusCode());
+        int fileID = uploadResponse.jsonPath().getInt("data.files[0].dataFile.id");
+        
+        //We check the authenticated user gets DRAFT
+        response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiToken);
+        response.prettyPrint(); 
+        jsonLDString = getData(response.getBody().asString());
+        jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString);
+        assertEquals("DRAFT", jsonLDObject.getString("http://schema.org/version"));
+        
+        // Create user with no permission and check they get published version
+        String apiTokenNoPerms = UtilIT.createRandomUserGetToken();
+        response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiTokenNoPerms);
+        response.prettyPrint();
+        jsonLDString = getData(response.getBody().asString());
+        jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString);
+        assertNotEquals("DRAFT", jsonLDObject.getString("http://schema.org/version"));
+        
+        // Delete the file
+        Response deleteFileResponse = UtilIT.deleteFileInDataset(fileID, apiToken);
+        deleteFileResponse.prettyPrint();
+        deleteFileResponse.then().assertThat().statusCode(OK.getStatusCode());
+
         // Delete the terms of use
         response = UtilIT.deleteDatasetJsonLDMetadata(datasetId, apiToken,
                 "{\"https://dataverse.org/schema/core#termsOfUse\": \"New terms\"}");
@@ -3026,15 +3066,27 @@ public void testSemanticMetadataAPIs() {
         jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString);
         assertTrue(!jsonLDObject.containsKey("https://dataverse.org/schema/core#termsOfUse"));
 
-        // Cleanup - delete dataset, dataverse, user...
-        Response deleteDatasetResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken);
-        deleteDatasetResponse.prettyPrint();
-        assertEquals(200, deleteDatasetResponse.getStatusCode());
+        //Delete the DRAFT dataset
+        Response deleteDraftResponse = UtilIT.deleteDatasetVersionViaNativeApi(datasetId, DS_VERSION_DRAFT, apiToken);
+        deleteDraftResponse.prettyPrint();
+        deleteDraftResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        //We set the user as superuser so we can delete the published dataset
+        Response superUserResponse = UtilIT.makeSuperUser(username);
+        superUserResponse.prettyPrint();
+        deleteDraftResponse.then().assertThat().statusCode(OK.getStatusCode());
+
+        //Delete the published dataset
+        Response deletePublishedResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken);
+        deletePublishedResponse.prettyPrint();
+        deleteDraftResponse.then().assertThat().statusCode(OK.getStatusCode());
 
+        //Delete the dataverse
         Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken);
         deleteDataverseResponse.prettyPrint();
         assertEquals(200, deleteDataverseResponse.getStatusCode());
 
+        //Delete the user
         Response deleteUserResponse = UtilIT.deleteUser(username);
         deleteUserResponse.prettyPrint();
         assertEquals(200, deleteUserResponse.getStatusCode());

From 9568c20359234bbe87b17656c91926ab11329a57 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 6 Feb 2024 10:53:24 -0500
Subject: [PATCH 541/546] Add release notes

---
 doc/release-notes/10297-metadata-api-fix.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/release-notes/10297-metadata-api-fix.md

diff --git a/doc/release-notes/10297-metadata-api-fix.md b/doc/release-notes/10297-metadata-api-fix.md
new file mode 100644
index 00000000000..11ee086af04
--- /dev/null
+++ b/doc/release-notes/10297-metadata-api-fix.md
@@ -0,0 +1 @@
+The API endpoint `api/datasets/{id}/metadata` has been changed to default to the latest version of the dataset that the user has access.

From 2f167cf57def265d719f52a7211ed6648b7e3df8 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 6 Feb 2024 10:56:03 -0500
Subject: [PATCH 542/546] Restore SearchIT

---
 src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java
index cd292a40a1e..125753296a2 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java
@@ -1202,7 +1202,7 @@ public void testGeospatialSearch() {
                                                                                         .add("value", "42.33661")
                                                                                         .add("typeClass", "primitive")
                                                                                         .add("multiple", false)
-                                                                                        .add("typeName", "southLongitud e")
+                                                                                        .add("typeName", "southLongitude")
                                                                         )
                                                                         .add("eastLongitude",
                                                                                 Json.createObjectBuilder()

From df4f49a1650070427a710046be32b7c5f6ad5312 Mon Sep 17 00:00:00 2001
From: Philip Durbin <philip_durbin@harvard.edu>
Date: Tue, 6 Feb 2024 14:43:38 -0500
Subject: [PATCH 543/546] add release note #10238

---
 doc/release-notes/10238-container-demo.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc/release-notes/10238-container-demo.md

diff --git a/doc/release-notes/10238-container-demo.md b/doc/release-notes/10238-container-demo.md
new file mode 100644
index 00000000000..edc4db4b650
--- /dev/null
+++ b/doc/release-notes/10238-container-demo.md
@@ -0,0 +1 @@
+The Container Guide now containers a tutorial for running Dataverse in containers for demo or evaluation purposes: https://guides.dataverse.org/en/6.2/container

From ce4b1e0418b31a9a4db9fa7ab1926f17459a046c Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 6 Feb 2024 14:50:35 -0500
Subject: [PATCH 544/546] Change the workflow section including feedback from
 @sekmiller

---
 doc/sphinx-guides/source/qa/overview.md | 27 ++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md
index 64796357831..a5b613f6516 100644
--- a/doc/sphinx-guides/source/qa/overview.md
+++ b/doc/sphinx-guides/source/qa/overview.md
@@ -11,11 +11,28 @@ This guide describes the testing process used by QA at IQSS and provides a refer
 
 ## Workflow
 
-The basic workflow is as follows. Bugs or feature requests are submitted to GitHub by the community or by team members as [issues](https://github.com/IQSS/dataverse/issues). These issues are prioritized and added to a two-week sprint that is reflected on the GitHub {ref}`kanban-board`. As developers work on these issues, a GitHub branch is produced, code is contributed, and a pull request is made to merge these new changes back into the common {ref}`develop branch <develop-branch>` and ultimately released as part of the product.
-
-Before a pull request is moved to QA, it must be reviewed by a member of the development team from a coding perspective, and it must pass automated tests. There it is tested manually, exercising the UI (using three common browsers) and any business logic it implements.  
-
-Depending on whether the code modifies existing code or is completely new, a smoke test of core functionality is performed and some basic regression testing of modified or related code is performed. Any documentation provided is used to understand the feature and any assertions made in that documentation are tested. Once this passes and any bugs that are found are corrected, and the automated tests are confirmed to be passing, the PR is merged into the develop branch, the PR is closed, and the branch is deleted (if it is local). At this point, the PR moves from the QA column automatically into the Merged column (where it might be discussed at the next standup) and the process repeats with the next PR until it is decided to {doc}`make a release </developers/making-releases>`.
+Here is a brief description of our workflow: 
+
+### Issue Submission and Prioritization: 
+- Members of the community or the development team submit bugs or request features through GitHub as [Issues](https://github.com/IQSS/dataverse/issues)sues.
+- These Issues are prioritized and added to a two-week-long sprint that can be tracked on the {ref}`kanban-board`.
+
+### Development Process:
+- Developers will work on a solution on a separate branch
+- Once a developer completes their work, they submit a [Pull Request](https://github.com/IQSS/dataverse/pulls) (PR).
+- The PR is reviewed by a developer from the team.
+- During the review, the reviewer may suggest coding or documentation changes to the original developer.
+
+### Quality Assurance (QA) Testing:
+- The QA tester performs a smoke test of core functionality and regression testing.
+- Documentation is used to understand the feature and validate any assertions made.
+- If no documentation is provided in the PR, the tester may refer to the original bug report to determine the desired outcome of the changes.
+- Once the branch is assumed to be safe, it is merged into the develop branch.
+
+### Final Steps:
+- The PR and the Issue are closed and assigned the “merged” status.
+- It is good practice to delete the branch if it is local.
+- The content from the PR becomes part of the codebase for {doc}`future releases </developers/making-releases>`.
 
 The complete suggested workflow can be found at {doc}`qa-workflow`.
 

From de3bad6e6ec000f182c9a50e019f155cb0c20fb9 Mon Sep 17 00:00:00 2001
From: Juan Pablo Tosca Villanueva <jptosca@gmail.com>
Date: Tue, 6 Feb 2024 14:53:05 -0500
Subject: [PATCH 545/546] Typo correction

---
 doc/sphinx-guides/source/qa/overview.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/sphinx-guides/source/qa/overview.md b/doc/sphinx-guides/source/qa/overview.md
index a5b613f6516..60e6a28ee9a 100644
--- a/doc/sphinx-guides/source/qa/overview.md
+++ b/doc/sphinx-guides/source/qa/overview.md
@@ -14,7 +14,7 @@ This guide describes the testing process used by QA at IQSS and provides a refer
 Here is a brief description of our workflow: 
 
 ### Issue Submission and Prioritization: 
-- Members of the community or the development team submit bugs or request features through GitHub as [Issues](https://github.com/IQSS/dataverse/issues)sues.
+- Members of the community or the development team submit bugs or request features through GitHub as [Issues](https://github.com/IQSS/dataverse/issues).
 - These Issues are prioritized and added to a two-week-long sprint that can be tracked on the {ref}`kanban-board`.
 
 ### Development Process:

From bec394519826529c02adedfdd601f04b45f859c2 Mon Sep 17 00:00:00 2001
From: landreev <leonid@hmdc.harvard.edu>
Date: Wed, 7 Feb 2024 11:50:52 -0500
Subject: [PATCH 546/546] 8524 adding mechanism for storing tab. files with
 variable headers (#10282)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* "stored with header" flag #8524

* more changes for the streaming and redirect code. #8524

* disabling dynamically-generated varheader in the remaining storage drivers. #8524

* Ingest plugins (work in progress) #8524

* R ingest plugin (#8524)

* still some unaddressed @todo:s, but the branch should build and the unit tests should be passing. # 8524

* work-in-progress, on the subsetting code in the download instance writer. #8524

* more work-in-progress changes. removing all the unused code from TabularSubsetGenerator, for clarity etc. #8524

* more bits and pieces #8524

* 2 more ingest plugins. #8542

* Integration tests. #8524

* typo #8524

* documenting the new setting. #8524

* a release note for the pr. also, added the "storage quotas enabled" to the list of settings documented in the config guide while I was at it. #8524

* removed all the unused code from this class (lots of it) for clarity, etc. git history can be consulted if anyone is curious about what we used to do here. #8524

* removing @todo: that's no longer relevant #8524

* (cosmetic) defined the control constants used in the integration test. #8524
---
 ...4-storing-tabular-files-with-varheaders.md |    6 +
 .../source/installation/config.rst            |   22 +
 .../edu/harvard/iq/dataverse/DataTable.java   |   18 +
 .../dataverse/api/DownloadInstanceWriter.java |   78 +-
 .../harvard/iq/dataverse/api/TestIngest.java  |    2 +-
 .../iq/dataverse/dataaccess/FileAccessIO.java |    3 +-
 .../dataaccess/GlobusOverlayAccessIO.java     |    8 +-
 .../dataaccess/RemoteOverlayAccessIO.java     |    8 +-
 .../iq/dataverse/dataaccess/S3AccessIO.java   |    3 +-
 .../dataverse/dataaccess/SwiftAccessIO.java   |    3 +-
 .../dataaccess/TabularSubsetGenerator.java    | 1150 +----------------
 .../dataaccess/TabularSubsetInputStream.java  |  114 --
 .../export/DDIExportServiceBean.java          |   11 +
 .../dataverse/ingest/IngestServiceBean.java   |   64 +-
 .../tabulardata/TabularDataFileReader.java    |   26 +-
 .../impl/plugins/csv/CSVFileReader.java       |   24 +-
 .../impl/plugins/dta/DTAFileReader.java       |   11 +-
 .../impl/plugins/dta/NewDTAFileReader.java    |   19 +-
 .../impl/plugins/por/PORFileReader.java       |   13 +-
 .../impl/plugins/rdata/RDATAFileReader.java   |    4 +-
 .../impl/plugins/rdata/RTabFileParser.java    |   28 +-
 .../impl/plugins/sav/SAVFileReader.java       |   24 +-
 .../impl/plugins/xlsx/XLSXFileReader.java     |   11 +-
 .../settings/SettingsServiceBean.java         |    7 +-
 .../iq/dataverse/util/SystemConfig.java       |    8 +
 ...24-store-tabular-files-with-varheaders.sql |    1 +
 .../edu/harvard/iq/dataverse/api/FilesIT.java |  128 ++
 .../dataverse/ingest/IngestFrequencyTest.java |    2 +-
 .../impl/plugins/csv/CSVFileReaderTest.java   |   24 +-
 .../impl/plugins/dta/DTAFileReaderTest.java   |    2 +-
 .../plugins/dta/NewDTAFileReaderTest.java     |   14 +-
 31 files changed, 501 insertions(+), 1335 deletions(-)
 create mode 100644 doc/release-notes/8524-storing-tabular-files-with-varheaders.md
 delete mode 100644 src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java
 create mode 100644 src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql

diff --git a/doc/release-notes/8524-storing-tabular-files-with-varheaders.md b/doc/release-notes/8524-storing-tabular-files-with-varheaders.md
new file mode 100644
index 00000000000..f7034c846f6
--- /dev/null
+++ b/doc/release-notes/8524-storing-tabular-files-with-varheaders.md
@@ -0,0 +1,6 @@
+Tabular Data Ingest can now save the generated archival files with the list of variable names added as the first tab-delimited line. As the most significant effect of this feature,
+Access API will be able to take advantage of Direct Download for tab. files saved with these headers on S3 - since they no longer have to be generated and added to the streamed content on the fly.
+
+This behavior is controlled by the new setting `:StoreIngestedTabularFilesWithVarHeaders`. It is false by default, preserving the legacy behavior. When enabled, Dataverse will be able to handle both the newly ingested files, and any already-existing legacy files stored without these headers transparently to the user. E.g. the access API will continue delivering tab-delimited files **with** this header line, whether it needs to add it dynamically for the legacy files, or reading complete files directly from storage for the ones stored with it.
+
+An API for converting existing legacy tabular files will be added separately. [this line will need to be changed if we have time to add said API before 6.2 is released].
\ No newline at end of file
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
index a7d7905ca4a..c233e594fa7 100644
--- a/doc/sphinx-guides/source/installation/config.rst
+++ b/doc/sphinx-guides/source/installation/config.rst
@@ -4151,3 +4151,25 @@ A true/false (default) option determining whether the dataset datafile table dis
 
 .. _supported MicroProfile Config API source: https://docs.payara.fish/community/docs/Technical%20Documentation/MicroProfile/Config/Overview.html
 
+
+.. _:UseStorageQuotas:
+
+:UseStorageQuotas
++++++++++++++++++
+
+Enables storage use quotas in collections. See the :doc:`/api/native-api` for details.
+
+
+.. _:StoreIngestedTabularFilesWithVarHeaders:
+
+:StoreIngestedTabularFilesWithVarHeaders
+++++++++++++++++++++++++++++++++++++++++
+
+With this setting enabled, tabular files produced during Ingest will
+be stored with the list of variable names added as the first
+tab-delimited line. As the most significant effect of this feature,
+Access API will be able to take advantage of Direct Download for
+tab. files saved with these headers on S3 - since they no longer have
+to be generated and added to the streamed file on the fly.
+
+The setting is ``false`` by default, preserving the legacy behavior. 
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataTable.java b/src/main/java/edu/harvard/iq/dataverse/DataTable.java
index a17d8c65138..95f3aed0f40 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataTable.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataTable.java
@@ -112,6 +112,16 @@ public DataTable() {
     @Column( nullable = true )
     private String originalFileName;
     
+    
+    /**
+     * The physical tab-delimited file is in storage with the list of variable
+     * names saved as the 1st line. This means that we do not need to generate 
+     * this line on the fly. (Also means that direct download mechanism can be
+     * used for this file!)
+     */
+    @Column(nullable = false)
+    private boolean storedWithVariableHeader = false;  
+    
     /*
      * Getter and Setter methods:
      */
@@ -206,6 +216,14 @@ public void setOriginalFileName(String originalFileName) {
         this.originalFileName = originalFileName;
     }
     
+    public boolean isStoredWithVariableHeader() {
+        return storedWithVariableHeader;
+    }
+    
+    public void setStoredWithVariableHeader(boolean storedWithVariableHeader) {
+        this.storedWithVariableHeader = storedWithVariableHeader;
+    }
+    
     /* 
      * Custom overrides for hashCode(), equals() and toString() methods:
      */
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index bcb8799ec9e..89b22b76a7d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -22,7 +22,6 @@
 import jakarta.ws.rs.ext.Provider;
 
 import edu.harvard.iq.dataverse.DataFile;
-import edu.harvard.iq.dataverse.FileMetadata;
 import edu.harvard.iq.dataverse.dataaccess.*;
 import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.engine.command.Command;
@@ -104,8 +103,10 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                 String auxiliaryTag = null;
                 String auxiliaryType = null;
                 String auxiliaryFileName = null; 
+                
                 // Before we do anything else, check if this download can be handled 
                 // by a redirect to remote storage (only supported on S3, as of 5.4):
+                
                 if (storageIO.downloadRedirectEnabled()) {
 
                     // Even if the above is true, there are a few cases where a  
@@ -159,7 +160,7 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         }
 
                     } else if (dataFile.isTabularData()) {
-                        // Many separate special cases here.
+                        // Many separate special cases here. 
 
                         if (di.getConversionParam() != null) {
                             if (di.getConversionParam().equals("format")) {
@@ -180,12 +181,26 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                         redirectSupported = false;
                                     }
                                 }
-                            } else if (!di.getConversionParam().equals("noVarHeader")) {
-                                // This is a subset request - can't do. 
+                            } else if (di.getConversionParam().equals("noVarHeader")) {
+                                // This will work just fine, if the tab. file is 
+                                // stored without the var. header. Throw "unavailable"
+                                // exception otherwise. 
+                                // @todo: should we actually drop support for this "noVarHeader" flag?
+                                if (dataFile.getDataTable().isStoredWithVariableHeader()) {
+                                    throw new ServiceUnavailableException();
+                                }
+                                // ... defaults to redirectSupported = true
+                            } else {
+                                // This must be a subset request then - can't do. 
+                                redirectSupported = false; 
+                            } 
+                        } else {
+                            // "straight" download of the full tab-delimited file. 
+                            // can redirect, but only if stored with the variable 
+                            // header already added: 
+                            if (!dataFile.getDataTable().isStoredWithVariableHeader()) {
                                 redirectSupported = false;
                             }
-                        } else {
-                            redirectSupported = false;
                         }
                     }
                 }
@@ -247,11 +262,16 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         // finally, issue the redirect:
                         Response response = Response.seeOther(redirect_uri).build();
                         logger.fine("Issuing redirect to the file location.");
+                        // Yes, this throws an exception. It's not an exception 
+                        // as in, "bummer, something went wrong". This is how a 
+                        // redirect is produced here!
                         throw new RedirectionException(response);
                     }
                     throw new ServiceUnavailableException();
                 }
 
+                // Past this point, this is a locally served/streamed download
+                
                 if (di.getConversionParam() != null) {
                     // Image Thumbnail and Tabular data conversion: 
                     // NOTE: only supported on local files, as of 4.0.2!
@@ -285,9 +305,14 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                         // request any tabular-specific services. 
 
                         if (di.getConversionParam().equals("noVarHeader")) {
-                            logger.fine("tabular data with no var header requested");
-                            storageIO.setNoVarHeader(Boolean.TRUE);
-                            storageIO.setVarHeader(null);
+                            if (!dataFile.getDataTable().isStoredWithVariableHeader()) {
+                                logger.fine("tabular data with no var header requested");
+                                storageIO.setNoVarHeader(Boolean.TRUE);
+                                storageIO.setVarHeader(null);
+                            } else {
+                                logger.fine("can't serve request for tabular data without varheader, since stored with it");
+                                throw new ServiceUnavailableException();
+                            }
                         } else if (di.getConversionParam().equals("format")) {
                             // Conversions, and downloads of "stored originals" are 
                             // now supported on all DataFiles for which StorageIO 
@@ -329,11 +354,10 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                         if (variable.getDataTable().getDataFile().getId().equals(dataFile.getId())) {
                                             logger.fine("adding variable id " + variable.getId() + " to the list.");
                                             variablePositionIndex.add(variable.getFileOrder());
-                                            if (subsetVariableHeader == null) {
-                                                subsetVariableHeader = variable.getName();
-                                            } else {
-                                                subsetVariableHeader = subsetVariableHeader.concat("\t");
-                                                subsetVariableHeader = subsetVariableHeader.concat(variable.getName());
+                                            if (!dataFile.getDataTable().isStoredWithVariableHeader()) {
+                                                subsetVariableHeader = subsetVariableHeader == null 
+                                                        ? variable.getName()
+                                                        : subsetVariableHeader.concat("\t" + variable.getName());
                                             }
                                         } else {
                                             logger.warning("variable does not belong to this data file.");
@@ -346,7 +370,17 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                                     try {
                                         File tempSubsetFile = File.createTempFile("tempSubsetFile", ".tmp");
                                         TabularSubsetGenerator tabularSubsetGenerator = new TabularSubsetGenerator();
-                                        tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), tempSubsetFile.getAbsolutePath(), variablePositionIndex, dataFile.getDataTable().getCaseQuantity(), "\t");
+                                        
+                                        long numberOfLines = dataFile.getDataTable().getCaseQuantity();
+                                        if (dataFile.getDataTable().isStoredWithVariableHeader()) {
+                                            numberOfLines++;
+                                        }
+                                        
+                                        tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), 
+                                                tempSubsetFile.getAbsolutePath(), 
+                                                variablePositionIndex, 
+                                                numberOfLines, 
+                                                "\t");
 
                                         if (tempSubsetFile.exists()) {
                                             FileInputStream subsetStream = new FileInputStream(tempSubsetFile);
@@ -354,8 +388,11 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
 
                                             InputStreamIO subsetStreamIO = new InputStreamIO(subsetStream, subsetSize);
                                             logger.fine("successfully created subset output stream.");
-                                            subsetVariableHeader = subsetVariableHeader.concat("\n");
-                                            subsetStreamIO.setVarHeader(subsetVariableHeader);
+                                            
+                                            if (subsetVariableHeader != null) {
+                                                subsetVariableHeader = subsetVariableHeader.concat("\n");
+                                                subsetStreamIO.setVarHeader(subsetVariableHeader);
+                                            }
 
                                             String tabularFileName = storageIO.getFileName();
 
@@ -380,8 +417,13 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
                             } else {
                                 logger.fine("empty list of extra arguments.");
                             }
+                            // end of tab. data subset case
+                        } else if (dataFile.getDataTable().isStoredWithVariableHeader()) {
+                            logger.fine("tabular file stored with the var header included, no need to generate it on the fly");
+                            storageIO.setNoVarHeader(Boolean.TRUE);
+                            storageIO.setVarHeader(null);
                         }
-                    }
+                    } // end of tab. data file case
 
                     if (storageIO == null) {
                         //throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java
index 05ba150df8e..add43ea2091 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java
@@ -100,7 +100,7 @@ public String datafile(@QueryParam("fileName") String fileName, @QueryParam("fil
         TabularDataIngest tabDataIngest = null;
         
         try {
-            tabDataIngest = ingestPlugin.read(fileInputStream, null);
+            tabDataIngest = ingestPlugin.read(fileInputStream, false, null);
         } catch (IOException ingestEx) {
             output = output.concat("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage());
             return output;
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
index f2a1312a150..26637ec5742 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
@@ -120,7 +120,8 @@ public void open (DataAccessOption... options) throws IOException {
                         && dataFile.getContentType().equals("text/tab-separated-values")
                         && dataFile.isTabularData()
                         && dataFile.getDataTable() != null
-                        && (!this.noVarHeader())) {
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
index 7a6809cb2ff..733daaf1328 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIO.java
@@ -450,8 +450,12 @@ public void open(DataAccessOption... options) throws IOException {
                     this.setSize(retrieveSizeFromMedia());
                 }
                 // Only applies for the S3 Connector case (where we could have run an ingest)
-                if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
-                        && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
+                if (dataFile.getContentType() != null 
+                        && dataFile.getContentType().equals("text/tab-separated-values")
+                        && dataFile.isTabularData() 
+                        && dataFile.getDataTable() != null 
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index 1616bfabf96..bca70259cb7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -124,8 +124,12 @@ public void open(DataAccessOption... options) throws IOException {
                     logger.fine("Setting size");
                     this.setSize(retrieveSizeFromMedia());
                 }
-                if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
-                        && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
+                if (dataFile.getContentType() != null 
+                        && dataFile.getContentType().equals("text/tab-separated-values")
+                        && dataFile.isTabularData() 
+                        && dataFile.getDataTable() != null 
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index 8afc365417e..c2143bd4789 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -225,7 +225,8 @@ public void open(DataAccessOption... options) throws IOException {
                         && dataFile.getContentType().equals("text/tab-separated-values")
                         && dataFile.isTabularData()
                         && dataFile.getDataTable() != null
-                        && (!this.noVarHeader())) {
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
index 105a60ab418..717f46ffd60 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
@@ -142,7 +142,8 @@ public void open(DataAccessOption... options) throws IOException {
                         && dataFile.getContentType().equals("text/tab-separated-values")
                         && dataFile.isTabularData()
                         && dataFile.getDataTable() != null
-                        && (!this.noVarHeader())) {
+                        && (!this.noVarHeader())
+                        && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
 
                     List<DataVariable> datavariables = dataFile.getDataTable().getDataVariables();
                     String varHeaderLine = generateVariableHeader(datavariables);
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java
index 782f7f3a52d..c369010c8cd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java
@@ -60,305 +60,26 @@
  
 public class TabularSubsetGenerator implements SubsetGenerator {
 
-    private static Logger dbgLog = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName());
+    private static Logger logger = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName());
 
-    private static int COLUMN_TYPE_STRING = 1;
-    private static int COLUMN_TYPE_LONG   = 2;
-    private static int COLUMN_TYPE_DOUBLE = 3; 
-    private static int COLUMN_TYPE_FLOAT = 4; 
-    
-    private static int MAX_COLUMN_BUFFER = 8192;
-    
-    private FileChannel fileChannel = null; 
-    
-    private int varcount; 
-    private int casecount; 
-    private int subsetcount;
-    
-    private byte[][] columnEntries = null; 
-
-    
-    private ByteBuffer[] columnByteBuffers;
-    private int[] columnBufferSizes;
-    private int[] columnBufferOffsets;
-    
-    private long[] columnStartOffsets; 
-    private long[] columnTotalOffsets;
-    private long[] columnTotalLengths;
-    
-    public TabularSubsetGenerator() {
-        
-    }
-    
-    public TabularSubsetGenerator (DataFile datafile, List<DataVariable> variables) throws IOException {
-        if (!datafile.isTabularData()) {
-            throw new IOException("DataFile is not tabular data.");
-        }
-         
-        setVarCount(datafile.getDataTable().getVarQuantity().intValue()); 
-        setCaseCount(datafile.getDataTable().getCaseQuantity().intValue()); 
-        
-           
-        
-        StorageIO<DataFile> dataAccess = datafile.getStorageIO();
-        if (!dataAccess.isLocalFile()) {
-            throw new IOException("Subsetting is supported on local files only!");
-        }
-        
-        //File tabfile = datafile.getFileSystemLocation().toFile();
-        File tabfile = dataAccess.getFileSystemPath().toFile();
+    //private static int MAX_COLUMN_BUFFER = 8192;
         
-        File rotatedImageFile = getRotatedImage(tabfile, getVarCount(), getCaseCount());
-        long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, getVarCount(), getCaseCount()); 
-        
-        fileChannel = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ));
-        
-        if (variables == null || variables.size() < 1 || variables.size() > getVarCount()) {
-            throw new IOException("Illegal number of variables in the subset request");
-        }
-        
-        subsetcount = variables.size();
-        columnTotalOffsets = new long[subsetcount];
-        columnTotalLengths = new long[subsetcount];
-        columnByteBuffers = new ByteBuffer[subsetcount];
-
-
+    public TabularSubsetGenerator() {
         
-        if (subsetcount == 1) {
-            if (!datafile.getDataTable().getId().equals(variables.get(0).getDataTable().getId())) {
-                throw new IOException("Variable in the subset request does not belong to the datafile.");
-            }
-            dbgLog.fine("single variable subset; setting fileChannel position to "+extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder()));
-            fileChannel.position(extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder()));
-            columnTotalLengths[0] = extractColumnLength(columnEndOffsets, variables.get(0).getFileOrder());
-            columnTotalOffsets[0] = 0;
-        } else {
-            columnEntries = new byte[subsetcount][];
-
-            columnBufferSizes = new int[subsetcount];
-            columnBufferOffsets = new int[subsetcount];
-            columnStartOffsets = new long[subsetcount];
-
-            int i = 0;
-            for (DataVariable var : variables) {
-                if (!datafile.getDataTable().getId().equals(var.getDataTable().getId())) {
-                    throw new IOException("Variable in the subset request does not belong to the datafile.");
-                }
-                columnByteBuffers[i] = ByteBuffer.allocate(MAX_COLUMN_BUFFER);
-                columnTotalLengths[i] = extractColumnLength(columnEndOffsets, var.getFileOrder());
-                columnStartOffsets[i] = extractColumnOffset(columnEndOffsets, var.getFileOrder());
-                if (columnTotalLengths[i] < MAX_COLUMN_BUFFER) {
-                    columnByteBuffers[i].limit((int)columnTotalLengths[i]);
-                }
-                fileChannel.position(columnStartOffsets[i]);
-                columnBufferSizes[i] = fileChannel.read(columnByteBuffers[i]);
-                columnBufferOffsets[i] = 0;
-                columnTotalOffsets[i] = columnBufferSizes[i];
-                i++;
-            }
-        }
-    }
-    
-    private int getVarCount() {
-        return varcount;
     }
     
-    private void setVarCount(int varcount) {
-        this.varcount = varcount; 
-    }
-    
-    private int getCaseCount() {
-        return casecount;
-    }
-    
-    private void setCaseCount(int casecount) {
-        this.casecount = casecount; 
-    }
-    
-    
-    /* 
-     * Note that this method operates on the *absolute* column number, i.e.
-     * the number of the physical column in the tabular file. This is stored
-     * in DataVariable.FileOrder. 
-     * This "column number" should not be confused with the number of column 
-     * in the subset request; a user can request any number of variable 
-     * columns, in an order that doesn't have to follow the physical order
-     * of the columns in the file. 
-    */
-    private long extractColumnOffset(long[] columnEndOffsets, int column) throws IOException {
-        if (columnEndOffsets == null || columnEndOffsets.length <= column) {
-            throw new IOException("Offsets table not initialized; or column out of bounds.");
-        }
-        long columnOffset;
-        
-        if (column > 0) {
-            columnOffset = columnEndOffsets[column - 1];
-        } else {
-            columnOffset = getVarCount() * 8; 
-        }
-        return columnOffset; 
-    }
-    
-    /* 
-     * See the comment for the method above. 
+    /**
+     * This class used to be much more complex. There were methods for subsetting
+     * from fixed-width field files; including using the optimized, "90 deg. rotated"
+     * versions of such files (i.e. you create a *columns-wise* copy of your data 
+     * file in which the columns are stored sequentially, and a table of byte 
+     * offsets of each column. You can then read individual variable columns 
+     * for cheap; at the expense of doubling the storage size of your tabular 
+     * data files. These methods were not used, so they were deleted (in Jan. 2024
+     * prior to 6.2.
+     * Please consult git history if you are interested in looking at that code. 
      */
-    private long extractColumnLength(long[] columnEndOffsets, int column) throws IOException {
-        if (columnEndOffsets == null || columnEndOffsets.length <= column) {
-            throw new IOException("Offsets table not initialized; or column out of bounds.");
-        }
-        long columnLength; 
-        
-        if (column > 0) {
-            columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1];
-        } else {
-            columnLength = columnEndOffsets[0] - varcount * 8;  
-        }
-        
-        return columnLength; 
-    }
-      
-    
-    private void bufferMoreColumnBytes(int column) throws IOException {
-        if (columnTotalOffsets[column] >= columnTotalLengths[column]) {
-            throw new IOException("attempt to buffer bytes past the column boundary");
-        }
-        fileChannel.position(columnStartOffsets[column] + columnTotalOffsets[column]);
-        
-        columnByteBuffers[column].clear();
-        if (columnTotalLengths[column] < columnTotalOffsets[column] + MAX_COLUMN_BUFFER) {
-            dbgLog.fine("Limiting the buffer to "+(columnTotalLengths[column] - columnTotalOffsets[column])+" bytes");
-            columnByteBuffers[column].limit((int) (columnTotalLengths[column] - columnTotalOffsets[column]));
-        }
-        columnBufferSizes[column] = fileChannel.read(columnByteBuffers[column]);
-        dbgLog.fine("Read "+columnBufferSizes[column]+" bytes for subset column "+column);
-        columnBufferOffsets[column] = 0;
-        columnTotalOffsets[column] += columnBufferSizes[column];
-    }
-    
-    public byte[] readColumnEntryBytes(int column) {
-        return readColumnEntryBytes(column, true);
-    }
-    
-    
-    public byte[] readColumnEntryBytes(int column, boolean addTabs) {
-        byte[] leftover = null; 
-        byte[] ret = null; 
-        
-        if (columnBufferOffsets[column] >= columnBufferSizes[column]) {
-            try {
-                bufferMoreColumnBytes(column);
-                if (columnBufferSizes[column] < 1) {
-                    return null;
-                }
-            } catch (IOException ioe) {
-                return null; 
-            }
-        }
-        
-        int byteindex = columnBufferOffsets[column];
-        try {
-            while (columnByteBuffers[column].array()[byteindex] != '\n') {
-                byteindex++;
-                if (byteindex == columnBufferSizes[column]) {
-                    // save the leftover: 
-                    if (leftover == null) {
-                        leftover = new byte[columnBufferSizes[column] - columnBufferOffsets[column]];
-                        System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], leftover, 0, columnBufferSizes[column] - columnBufferOffsets[column]);
-                    } else {
-                        byte[] merged = new byte[leftover.length + columnBufferSizes[column]];
-                        
-                        System.arraycopy(leftover, 0, merged, 0, leftover.length);
-                        System.arraycopy(columnByteBuffers[column].array(), 0, merged, leftover.length, columnBufferSizes[column]);
-                        leftover = merged;
-                        merged = null; 
-                    }
-                    // read more bytes:
-                    bufferMoreColumnBytes(column);
-                    if (columnBufferSizes[column] < 1) {
-                        return null;
-                    }
-                    byteindex = 0;
-                }
-            }
-
-            // presumably, we have found our '\n':
-            if (leftover == null) {
-                ret = new byte[byteindex - columnBufferOffsets[column] + 1];
-                System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], ret, 0, byteindex - columnBufferOffsets[column] + 1);
-            } else {
-                ret = new byte[leftover.length + byteindex + 1];
-                System.arraycopy(leftover, 0, ret, 0, leftover.length);
-                System.arraycopy(columnByteBuffers[column].array(), 0, ret, leftover.length, byteindex + 1);
-            }
-
-        } catch (IOException ioe) {
-            return null;
-        }
-
-        columnBufferOffsets[column] = (byteindex + 1);
-
-        if (column < columnBufferOffsets.length - 1) {
-            ret[ret.length - 1] = '\t';
-        }
-        return ret;
-    }
-    
-    public int readSingleColumnSubset(byte[] buffer) throws IOException {
-        if (columnTotalOffsets[0] == columnTotalLengths[0]) {
-            return -1;
-        }
-        
-        if (columnByteBuffers[0] == null) {
-            dbgLog.fine("allocating single column subset buffer.");
-            columnByteBuffers[0] = ByteBuffer.allocate(buffer.length);
-        }
-                
-        int bytesread = fileChannel.read(columnByteBuffers[0]);
-        dbgLog.fine("single column subset: read "+bytesread+" bytes.");
-        if (columnTotalOffsets[0] + bytesread > columnTotalLengths[0]) {
-            bytesread = (int)(columnTotalLengths[0] - columnTotalOffsets[0]);
-        }
-        System.arraycopy(columnByteBuffers[0].array(), 0, buffer, 0, bytesread);
-
-        columnTotalOffsets[0] += bytesread;
-        columnByteBuffers[0].clear();
-        return bytesread > 0 ? bytesread : -1;
-    }
-    
-    
-    public byte[] readSubsetLineBytes() throws IOException {
-        byte[] ret = null; 
-        int total = 0; 
         
-        for (int i = 0; i < subsetcount; i++) {
-            columnEntries[i] = readColumnEntryBytes(i);
-            if (columnEntries[i] == null) {
-                throw new IOException("Failed to read subset line entry");
-            }
-            total += columnEntries[i].length;
-        }
-        
-        ret = new byte[total];
-        int offset = 0;
-        for (int i = 0; i < subsetcount; i++) {
-            System.arraycopy(columnEntries[i], 0, ret, offset, columnEntries[i].length);
-            offset += columnEntries[i].length;
-        }
-        dbgLog.fine("line: "+new String(ret));
-        return ret;
-    } 
-    
-    
-    public void close() {
-        if (fileChannel != null) {
-            try {
-                fileChannel.close();
-            } catch (IOException ioe) {
-                // don't care.
-            }
-        }
-    }
-    
     public void subsetFile(String infile, String outfile, List<Integer> columns, Long numCases) {
         subsetFile(infile, outfile, columns, numCases, "\t");
     }
@@ -411,11 +132,15 @@ public void subsetFile(InputStream in, String outfile, List<Integer> columns, Lo
      * files, OK to use on small files:
      */
     
-    public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) {
+    public static Double[] subsetDoubleVector(InputStream in, int column, int numCases, boolean skipHeader) {
         Double[] retVector = new Double[numCases];
         try (Scanner scanner = new Scanner(in)) {
             scanner.useDelimiter("\\n");
 
+            if (skipHeader) {
+                skipFirstLine(scanner);
+            }
+            
             for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
                 if (scanner.hasNext()) {
                     String[] line = (scanner.next()).split("\t", -1);
@@ -463,11 +188,15 @@ public static Double[] subsetDoubleVector(InputStream in, int column, int numCas
      * Same deal as with the method above - straightforward, but (potentially) slow. 
      * Not a resource hog though - will only try to store one vector in memory. 
      */
-    public static Float[] subsetFloatVector(InputStream in, int column, int numCases) {
+    public static Float[] subsetFloatVector(InputStream in, int column, int numCases, boolean skipHeader) {
         Float[] retVector = new Float[numCases];
         try (Scanner scanner = new Scanner(in)) {
             scanner.useDelimiter("\\n");
 
+            if (skipHeader) {
+                skipFirstLine(scanner);
+            }
+            
             for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
                 if (scanner.hasNext()) {
                     String[] line = (scanner.next()).split("\t", -1);
@@ -513,11 +242,15 @@ public static Float[] subsetFloatVector(InputStream in, int column, int numCases
      * Same deal as with the method above - straightforward, but (potentially) slow. 
      * Not a resource hog though - will only try to store one vector in memory. 
      */
-    public static Long[] subsetLongVector(InputStream in, int column, int numCases) {
+    public static Long[] subsetLongVector(InputStream in, int column, int numCases, boolean skipHeader) {
         Long[] retVector = new Long[numCases];
         try (Scanner scanner = new Scanner(in)) {
             scanner.useDelimiter("\\n");
 
+            if (skipHeader) {
+                skipFirstLine(scanner);
+            }
+            
             for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
                 if (scanner.hasNext()) {
                     String[] line = (scanner.next()).split("\t", -1);
@@ -549,11 +282,15 @@ public static Long[] subsetLongVector(InputStream in, int column, int numCases)
      * Same deal as with the method above - straightforward, but (potentially) slow. 
      * Not a resource hog though - will only try to store one vector in memory. 
      */
-    public static String[] subsetStringVector(InputStream in, int column, int numCases) {
+    public static String[] subsetStringVector(InputStream in, int column, int numCases, boolean skipHeader) {
         String[] retVector = new String[numCases];
         try (Scanner scanner = new Scanner(in)) {
             scanner.useDelimiter("\\n");
 
+            if (skipHeader) {
+                skipFirstLine(scanner);
+            }
+            
             for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
                 if (scanner.hasNext()) {
                     String[] line = (scanner.next()).split("\t", -1);
@@ -621,819 +358,10 @@ public static String[] subsetStringVector(InputStream in, int column, int numCas
 
     }
 
-    /*
-     * Straightforward method for subsetting a tab-delimited data file, extracting
-     * all the columns representing continuous variables and returning them as 
-     * a 2-dimensional array of Doubles;
-     * Inefficient on large files, OK to use on small ones.
-     */
-    public static Double[][] subsetDoubleVectors(InputStream in, Set<Integer> columns, int numCases) throws IOException {
-        Double[][] retVector = new Double[columns.size()][numCases];
-        try (Scanner scanner = new Scanner(in)) {
-            scanner.useDelimiter("\\n");
-
-            for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
-                if (scanner.hasNext()) {
-                    String[] line = (scanner.next()).split("\t", -1);
-                    int j = 0;
-                    for (Integer i : columns) {
-                        try {
-                            // TODO: verify that NaN and +-Inf are going to be
-                            // handled correctly here! -- L.A.
-                            // NO, "+-Inf" is not handled correctly; see the
-                            // comment further down below.
-                            retVector[j][caseIndex] = new Double(line[i]);
-                        } catch (NumberFormatException ex) {
-                            retVector[j][caseIndex] = null; // missing value
-                        }
-                        j++;
-                    }
-                } else {
-                    throw new IOException("Tab file has fewer rows than the stored number of cases!");
-                }
-            }
-
-            int tailIndex = numCases;
-            while (scanner.hasNext()) {
-                String nextLine = scanner.next();
-                if (!"".equals(nextLine)) {
-                    throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine);
-                }
-                tailIndex++;
-            }
-
-        }
-        return retVector;
-
-    }
-    
-    public String[] subsetStringVector(DataFile datafile, int column) throws IOException {
-        return (String[])subsetObjectVector(datafile, column, COLUMN_TYPE_STRING);
-    }
-    
-    public Double[] subsetDoubleVector(DataFile datafile, int column) throws IOException {
-        return (Double[])subsetObjectVector(datafile, column, COLUMN_TYPE_DOUBLE);
-    }
-    
-    public Long[] subsetLongVector(DataFile datafile, int column) throws IOException {
-        return (Long[])subsetObjectVector(datafile, column, COLUMN_TYPE_LONG);
-    }
-    
-    // Float methods are temporary; 
-    // In normal operations we'll be treating all the floating point types as 
-    // doubles. I need to be able to handle floats for some 4.0 vs 3.* ingest
-    // tests. -- L.A. 
-    
-    public Float[] subsetFloatVector(DataFile datafile, int column) throws IOException {
-        return (Float[])subsetObjectVector(datafile, column, COLUMN_TYPE_FLOAT);
-    }
-    
-    public String[] subsetStringVector(File tabfile, int column, int varcount, int casecount) throws IOException {
-        return (String[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_STRING);
-    }
-    
-    public Double[] subsetDoubleVector(File tabfile, int column, int varcount, int casecount) throws IOException {
-        return (Double[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_DOUBLE);
-    }
-    
-    public Long[] subsetLongVector(File tabfile, int column, int varcount, int casecount) throws IOException {
-        return (Long[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_LONG);
-    }
-    
-    public Float[] subsetFloatVector(File tabfile, int column, int varcount, int casecount) throws IOException {
-        return (Float[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_FLOAT);
-    }
-    
-    public Object[] subsetObjectVector(DataFile dataFile, int column, int columntype) throws IOException {
-        if (!dataFile.isTabularData()) {
-            throw new IOException("DataFile is not tabular data.");
-        }
-         
-        int varcount = dataFile.getDataTable().getVarQuantity().intValue(); 
-        int casecount = dataFile.getDataTable().getCaseQuantity().intValue(); 
-        
-        if (column >= varcount) {
-            throw new IOException("Column "+column+" is out of bounds.");
-        }
-        
-        StorageIO<DataFile> dataAccess = dataFile.getStorageIO();
-        if (!dataAccess.isLocalFile()) {
-            throw new IOException("Subsetting is supported on local files only!");
-        }
-        
-        //File tabfile = datafile.getFileSystemLocation().toFile();
-        File tabfile = dataAccess.getFileSystemPath().toFile();
-
-        if (columntype == COLUMN_TYPE_STRING) {
-            String filename = dataFile.getFileMetadata().getLabel();
-            if (filename != null) {
-                filename = filename.replaceFirst("^_", "");
-                Integer fnumvalue = null; 
-                try {
-                    fnumvalue = new Integer(filename);
-                } catch (Exception ex){
-                    fnumvalue = null; 
-                }
-                if (fnumvalue != null) {
-                    //if ((fnumvalue.intValue() < 112497)) { // && (fnumvalue.intValue() > 60015)) {
-                    if ((fnumvalue.intValue() < 111931)) { // && (fnumvalue.intValue() > 60015)) {
-                        if (!(fnumvalue.intValue() == 60007
-                                || fnumvalue.intValue() == 59997
-                                || fnumvalue.intValue() == 60015
-                                || fnumvalue.intValue() == 59948
-                                || fnumvalue.intValue() == 60012
-                                || fnumvalue.intValue() == 52585
-                                || fnumvalue.intValue() == 60005
-                                || fnumvalue.intValue() == 60002
-                                || fnumvalue.intValue() == 59954
-                                || fnumvalue.intValue() == 60008
-                                || fnumvalue.intValue() == 54972
-                                || fnumvalue.intValue() == 55010
-                                || fnumvalue.intValue() == 54996
-                                || fnumvalue.intValue() == 53527
-                                || fnumvalue.intValue() == 53546
-                                || fnumvalue.intValue() == 55002
-                                || fnumvalue.intValue() == 55006
-                                || fnumvalue.intValue() == 54998
-                                || fnumvalue.intValue() == 52552
-                                // SPSS/SAV cases with similar issue - compat mode must be disabled
-                                //|| fnumvalue.intValue() == 101826 // temporary - tricky file with accents and v. 16...
-                                || fnumvalue.intValue() == 54618 // another SAV file, with long strings...
-                                || fnumvalue.intValue() == 54619 // [same]
-                                || fnumvalue.intValue() == 57983 
-                                || fnumvalue.intValue() == 58262
-                                || fnumvalue.intValue() == 58288
-                                || fnumvalue.intValue() == 58656
-                                || fnumvalue.intValue() == 59144
-                                // || fnumvalue.intValue() == 69626 [nope!]
-                                )) {
-                            dbgLog.info("\"Old\" file name detected; using \"compatibility mode\" for a character vector subset;");
-                            return subsetObjectVector(tabfile, column, varcount, casecount, columntype, true);
-                        }
-                    }
-                }
-            }
+    private static void skipFirstLine(Scanner scanner) {
+        if (!scanner.hasNext()) {
+            throw new RuntimeException("Failed to read the variable name header line from the tab-delimited file!");
         }
-
-        return subsetObjectVector(tabfile, column, varcount, casecount, columntype);
-    }
-    
-    public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype) throws IOException {
-        return subsetObjectVector(tabfile, column, varcount, casecount, columntype, false);
-    }
-    
-    
-    
-    public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype, boolean compatmode) throws IOException {
-        
-        Object[] retVector = null; 
-        
-        boolean isString = false; 
-        boolean isDouble = false;
-        boolean isLong   = false; 
-        boolean isFloat  = false; 
-        
-        //Locale loc = new Locale("en", "US");
-        
-        if (columntype == COLUMN_TYPE_STRING) {
-            isString = true; 
-            retVector = new String[casecount];
-        } else if (columntype == COLUMN_TYPE_DOUBLE) {
-            isDouble = true; 
-            retVector = new Double[casecount];
-        } else if (columntype == COLUMN_TYPE_LONG) {
-            isLong = true; 
-            retVector = new Long[casecount];
-        } else if (columntype == COLUMN_TYPE_FLOAT){
-            isFloat = true;
-            retVector = new Float[casecount];
-        } else {
-            throw new IOException("Unsupported column type: "+columntype);
-        }
-        
-        File rotatedImageFile = getRotatedImage(tabfile, varcount, casecount);
-        long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, varcount, casecount); 
-        long columnOffset = 0; 
-        long columnLength = 0; 
-        
-        if (column > 0) {
-            columnOffset = columnEndOffsets[column - 1];
-            columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1];
-        } else {
-            columnOffset = varcount * 8; 
-            columnLength = columnEndOffsets[0] - varcount * 8;  
-        }
-        int caseindex = 0;
-        
-        try (FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()),
-                StandardOpenOption.READ))) {
-            fc.position(columnOffset);
-            int MAX_COLUMN_BUFFER = 8192;
-
-            ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER);
-
-            if (columnLength < MAX_COLUMN_BUFFER) {
-                in.limit((int) (columnLength));
-            }
-
-            long bytesRead = 0;
-            long bytesReadTotal = 0;
-
-            int byteoffset = 0;
-            byte[] leftover = null;
-
-            while (bytesReadTotal < columnLength) {
-                bytesRead = fc.read(in);
-                byte[] columnBytes = in.array();
-                int bytecount = 0;
-
-                while (bytecount < bytesRead) {
-                    if (columnBytes[bytecount] == '\n') {
-                        /*
-                        String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8");
-
-                        if (leftover != null) {
-                            String leftoverString = new String (leftover, "UTF8");
-                            token = leftoverString + token;
-                            leftover = null;
-                        }
-                        */
-                        /*
-                         * Note that the way I was doing it at first - above - 
-                         * was not quite the correct way - because I was creating UTF8
-                         * strings from the leftover bytes, and the bytes in the 
-                         * current buffer *separately*; which means, if a multi-byte
-                         * UTF8 character got split in the middle between one buffer
-                         * and the next, both chunks of it would become junk 
-                         * characters, on each side!
-                         * The correct way of doing it, of course, is to create a
-                         * merged byte buffer, and then turn it into a UTF8 string. 
-                         *      -- L.A. 4.0
-                         */
-                        String token = null;
-
-                        if (leftover == null) {
-                            token = new String(columnBytes, byteoffset, bytecount - byteoffset, "UTF8");
-                        } else {
-                            byte[] merged = new byte[leftover.length + bytecount - byteoffset];
-
-                            System.arraycopy(leftover, 0, merged, 0, leftover.length);
-                            System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount - byteoffset);
-                            token = new String(merged, "UTF8");
-                            leftover = null;
-                            merged = null;
-                        }
-
-                        if (isString) {
-                            if ("".equals(token)) {
-                                // An empty string is a string missing value!
-                                // An empty string in quotes is an empty string!
-                                retVector[caseindex] = null;
-                            } else {
-                                // Strip the outer quotes:
-                                token = token.replaceFirst("^\\\"", "");
-                                token = token.replaceFirst("\\\"$", "");
-
-                                // We need to restore the special characters that
-                                // are stored in tab files escaped - quotes, new lines
-                                // and tabs. Before we do that however, we need to
-                                // take care of any escaped backslashes stored in
-                                // the tab file. I.e., "foo\t" should be transformed
-                                // to "foo<TAB>"; but "foo\\t" should be transformed
-                                // to "foo\t". This way new lines and tabs that were
-                                // already escaped in the original data are not
-                                // going to be transformed to unescaped tab and
-                                // new line characters!
-
-                                String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2);
-
-                                // (note that it's important to use the 2-argument version
-                                // of String.split(), and set the limit argument to a
-                                // negative value; otherwise any trailing backslashes
-                                // are lost.)
-
-                                for (int i = 0; i < splitTokens.length; i++) {
-                                    splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\"");
-                                    splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t");
-                                    splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n");
-                                    splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r");
-                                }
-                                // TODO:
-                                // Make (some of?) the above optional; for ex., we
-                                // do need to restore the newlines when calculating UNFs;
-                                // But if we are subsetting these vectors in order to
-                                // create a new tab-delimited file, they will
-                                // actually break things! -- L.A. Jul. 28 2014
-
-                                token = StringUtils.join(splitTokens, '\\');
-
-                                // "compatibility mode" - a hack, to be able to produce
-                                // unfs identical to those produced by the "early"
-                                // unf5 jar; will be removed in production 4.0.
-                                // -- L.A. (TODO: ...)
-                                if (compatmode && !"".equals(token)) {
-                                    if (token.length() > 128) {
-                                        if ("".equals(token.trim())) {
-                                            // don't ask...
-                                            token = token.substring(0, 129);
-                                        } else {
-                                            token = token.substring(0, 128);
-                                            // token = String.format(loc, "%.128s", token);
-                                            token = token.trim();
-                                            // dbgLog.info("formatted and trimmed: "+token);
-                                        }
-                                    } else {
-                                        if ("".equals(token.trim())) {
-                                            // again, don't ask;
-                                            // - this replicates some bugginness
-                                            // that happens inside unf5;
-                                            token = "null";
-                                        } else {
-                                            token = token.trim();
-                                        }
-                                    }
-                                }
-
-                                retVector[caseindex] = token;
-                            }
-                        } else if (isDouble) {
-                            try {
-                                // TODO: verify that NaN and +-Inf are
-                                // handled correctly here! -- L.A.
-                                // Verified: new Double("nan") works correctly,
-                                // resulting in Double.NaN;
-                                // Double("[+-]Inf") doesn't work however;
-                                // (the constructor appears to be expecting it
-                                // to be spelled as "Infinity", "-Infinity", etc.
-                                if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) {
-                                    retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY;
-                                } else if ("-inf".equalsIgnoreCase(token)) {
-                                    retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY;
-                                } else if (token == null || token.equals("")) {
-                                    // missing value:
-                                    retVector[caseindex] = null;
-                                } else {
-                                    retVector[caseindex] = new Double(token);
-                                }
-                            } catch (NumberFormatException ex) {
-                                dbgLog.warning("NumberFormatException thrown for " + token + " as Double");
-
-                                retVector[caseindex] = null; // missing value
-                                // TODO: ?
-                            }
-                        } else if (isLong) {
-                            try {
-                                retVector[caseindex] = new Long(token);
-                            } catch (NumberFormatException ex) {
-                                retVector[caseindex] = null; // assume missing value
-                            }
-                        } else if (isFloat) {
-                            try {
-                                if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) {
-                                    retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY;
-                                } else if ("-inf".equalsIgnoreCase(token)) {
-                                    retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY;
-                                } else if (token == null || token.equals("")) {
-                                    // missing value:
-                                    retVector[caseindex] = null;
-                                } else {
-                                    retVector[caseindex] = new Float(token);
-                                }
-                            } catch (NumberFormatException ex) {
-                                dbgLog.warning("NumberFormatException thrown for " + token + " as Float");
-                                retVector[caseindex] = null; // assume missing value (TODO: ?)
-                            }
-                        }
-                        caseindex++;
-
-                        if (bytecount == bytesRead - 1) {
-                            byteoffset = 0;
-                        } else {
-                            byteoffset = bytecount + 1;
-                        }
-                    } else {
-                        if (bytecount == bytesRead - 1) {
-                            // We've reached the end of the buffer;
-                            // This means we'll save whatever unused bytes left in
-                            // it - i.e., the bytes between the last new line
-                            // encountered and the end - in the leftover buffer.
-
-                            // *EXCEPT*, there may be a case of a very long String
-                            // that is actually longer than MAX_COLUMN_BUFFER, in
-                            // which case it is possible that we've read through
-                            // an entire buffer of bytes without finding any
-                            // new lines... in this case we may need to add this
-                            // entire byte buffer to an already existing leftover
-                            // buffer!
-                            if (leftover == null) {
-                                leftover = new byte[(int) bytesRead - byteoffset];
-                                System.arraycopy(columnBytes, byteoffset, leftover, 0, (int) bytesRead - byteoffset);
-                            } else {
-                                if (byteoffset != 0) {
-                                throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!");
-                                }
-                                byte[] merged = new byte[leftover.length + (int) bytesRead];
-
-                                System.arraycopy(leftover, 0, merged, 0, leftover.length);
-                                System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int) bytesRead);
-                                // leftover = null;
-                                leftover = merged;
-                                merged = null;
-                            }
-                            byteoffset = 0;
-
-                        }
-                    }
-                    bytecount++;
-                }
-
-                bytesReadTotal += bytesRead;
-                in.clear();
-                if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) {
-                    in.limit((int) (columnLength - bytesReadTotal));
-                }
-            }
-
-        }
-
-        if (caseindex != casecount) {
-            throw new IOException("Faile to read "+casecount+" tokens for column "+column);
-            //System.out.println("read "+caseindex+" tokens instead of expected "+casecount+".");
-        }
-        
-        return retVector; 
-    }
-    
-    private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException {
-        long[] byteOffsets = new long[varcount];
-        
-        try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile))) {
-
-            byte[] offsetHeader = new byte[varcount * 8];
-
-            int readlen = rotfileStream.read(offsetHeader);
-
-            if (readlen != varcount * 8) {
-                throw new IOException("Could not read " + varcount * 8 + " header bytes from the rotated file.");
-            }
-
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                byte[] offsetBytes = new byte[8];
-                System.arraycopy(offsetHeader, varindex * 8, offsetBytes, 0, 8);
-
-                ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes);
-                byteOffsets[varindex] = offsetByteBuffer.getLong();
-
-                // System.out.println(byteOffsets[varindex]);
-            }
-
-        }
-
-        return byteOffsets;
-    }
-    
-    private File getRotatedImage(File tabfile, int varcount, int casecount)  throws IOException {
-        String fileName = tabfile.getAbsolutePath();
-        String rotatedImageFileName = fileName + ".90d";
-        File rotatedImageFile = new File(rotatedImageFileName); 
-        if (rotatedImageFile.exists()) {
-            //System.out.println("Image already exists!");
-            return rotatedImageFile;
-        }
-        
-        return generateRotatedImage(tabfile, varcount, casecount);
-        
-    }
-    
-    private File generateRotatedImage (File tabfile, int varcount, int casecount) throws IOException {
-        // TODO: throw exceptions if bad file, zero varcount, etc. ...
-        
-        String fileName = tabfile.getAbsolutePath();
-        String rotatedImageFileName = fileName + ".90d";
-        
-        int MAX_OUTPUT_STREAMS = 32;
-        int MAX_BUFFERED_BYTES = 10 * 1024 * 1024; // 10 MB - for now?
-        int MAX_COLUMN_BUFFER = 8 * 1024; 
-        
-        // offsetHeader will contain the byte offsets of the individual column 
-        // vectors in the final rotated image file
-        byte[] offsetHeader = new byte[varcount * 8];
-        int[] bufferedSizes = new int[varcount];
-        long[] cachedfileSizes = new long[varcount];
-        File[] columnTempFiles = new File[varcount];
-        
-        for (int i = 0; i < varcount; i++) {
-            bufferedSizes[i] = 0; 
-            cachedfileSizes[i] = 0;
-        }
-        
-        // TODO: adjust MAX_COLUMN_BUFFER here, so that the total size is 
-        // no more than MAX_BUFFERED_BYTES (but no less than 1024 maybe?)
-        
-        byte[][] bufferedColumns = new byte [varcount][MAX_COLUMN_BUFFER];
-        
-        // read the tab-delimited file: 
-        
-        try (FileInputStream tabfileStream = new FileInputStream(tabfile);
-                Scanner scanner = new Scanner(tabfileStream)) {
-            scanner.useDelimiter("\\n");
-
-            for (int caseindex = 0; caseindex < casecount; caseindex++) {
-                if (scanner.hasNext()) {
-                    String[] line = (scanner.next()).split("\t", -1);
-                    // TODO: throw an exception if there are fewer tab-delimited
-                    // tokens than the number of variables specified.
-                    String token = "";
-                    int tokensize = 0;
-                    for (int varindex = 0; varindex < varcount; varindex++) {
-                        // TODO: figure out the safest way to convert strings to
-                        // bytes here. Is it going to be safer to use getBytes("UTF8")?
-                        // we are already making the assumption that the values
-                        // in the tab file are in UTF8. -- L.A.
-                        token = line[varindex] + "\n";
-                        tokensize = token.getBytes().length;
-                        if (bufferedSizes[varindex] + tokensize > MAX_COLUMN_BUFFER) {
-                            // fill the buffer and dump its contents into the temp file:
-                            // (do note that there may be *several* MAX_COLUMN_BUFFERs
-                            // worth of bytes in the token!)
-
-                            int tokenoffset = 0;
-
-                            if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) {
-                                tokenoffset = MAX_COLUMN_BUFFER - bufferedSizes[varindex];
-                                System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset);
-                            } // (otherwise the buffer is already full, and we should
-                              // simply dump it into the temp file, without adding any
-                              // extra bytes to it)
-
-                            File bufferTempFile = columnTempFiles[varindex];
-                            if (bufferTempFile == null) {
-                                bufferTempFile = File.createTempFile("columnBufferFile", "bytes");
-                                columnTempFiles[varindex] = bufferTempFile;
-                            }
-
-                            // *append* the contents of the buffer to the end of the
-                            // temp file, if already exists:
-                            try (BufferedOutputStream outputStream = new BufferedOutputStream(
-                                    new FileOutputStream(bufferTempFile, true))) {
-                                outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER);
-                                cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;
-
-                                // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into
-                                // the temp file, for as long as there's more than MAX_COLUMN_BUFFER
-                                // bytes left in the token:
-
-                                while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) {
-                                    outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER);
-                                    cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;
-                                    tokenoffset += MAX_COLUMN_BUFFER;
-                                }
-
-                            }
-
-                            // buffer the remaining bytes and reset the buffered
-                            // byte counter:
-
-                            System.arraycopy(token.getBytes(), 
-                                    tokenoffset, 
-                                    bufferedColumns[varindex], 
-                                    0,
-                                    tokensize - tokenoffset);
-
-                            bufferedSizes[varindex] = tokensize - tokenoffset;
-
-                        } else {
-                            // continue buffering
-                            System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize);
-                            bufferedSizes[varindex] += tokensize;
-                        }
-                    }
-                } else {
-                    throw new IOException("Tab file has fewer rows than the stored number of cases!");
-                }
-            }
-        }
-        
-        // OK, we've created the individual byte vectors of the tab file columns;
-        // they may be partially saved in temp files and/or in memory.
-        // We now need to go through all these buffers and create the final 
-        // rotated image file. 
-        
-        try (BufferedOutputStream finalOut = new BufferedOutputStream(
-                new FileOutputStream(new File(rotatedImageFileName)))) {
-
-            // but first we should create the offset header and write it out into
-            // the final file; because it should be at the head, doh!
-
-            long columnOffset = varcount * 8;
-            // (this is the offset of the first column vector; it is equal to the
-            // size of the offset header, i.e. varcount * 8 bytes)
-
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex];
-                columnOffset += totalColumnBytes;
-                // totalColumnBytes;
-                byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array();
-                System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8);
-            }
-
-            finalOut.write(offsetHeader, 0, varcount * 8);
-
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                long cachedBytesRead = 0;
-
-                // check if there is a cached temp file:
-
-                File cachedTempFile = columnTempFiles[varindex];
-                if (cachedTempFile != null) {
-                    byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER];
-                    try (BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile))) {
-                        int readlen = 0;
-                        while ((readlen = cachedIn.read(cachedBytes)) > -1) {
-                            finalOut.write(cachedBytes, 0, readlen);
-                            cachedBytesRead += readlen;
-                        }
-                    }
-
-                    // delete the temp file:
-                    cachedTempFile.delete();
-
-                }
-
-                if (cachedBytesRead != cachedfileSizes[varindex]) {
-                    throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+
-                        cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read.");
-                }
-
-                // then check if there are any bytes buffered for this column:
-
-                if (bufferedSizes[varindex] > 0) {
-                    finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]);
-                }
-
-            }
-        }
-        
-        return new File(rotatedImageFileName);
-
-    }
-  
-    /*
-     * Test method for taking a "rotated" image, and reversing it, reassembling 
-     * all the columns in the original order. Which should result in a file 
-     * byte-for-byte identical file to the original tab-delimited version.
-     *
-     * (do note that this method is not efficiently implemented; it's only 
-     * being used for experiments so far, to confirm the accuracy of the 
-     * accuracy of generateRotatedImage(). It should not be used for any 
-     * practical means in the application!)
-     */
-    private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException {
-        // open the file, read in the offset header: 
-        try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile))) {
-            byte[] offsetHeader = new byte[varcount * 8];
-            long[] byteOffsets = new long[varcount];
-            
-            int readlen = rotfileStream.read(offsetHeader); 
-            
-            if (readlen != varcount * 8) {
-                throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file.");
-            }
-            
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                byte[] offsetBytes = new byte[8];
-                System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8);
-               
-                ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes);
-                byteOffsets[varindex] = offsetByteBuffer.getLong();
-                
-                //System.out.println(byteOffsets[varindex]);
-            }
-            
-            String [][] reversedMatrix = new String[casecount][varcount];
-            
-            long offset = varcount * 8; 
-            byte[] columnBytes; 
-            
-            for (int varindex = 0; varindex < varcount; varindex++) {
-                long columnLength = byteOffsets[varindex] - offset; 
-                
-                
-                
-                columnBytes = new byte[(int)columnLength];
-                readlen = rotfileStream.read(columnBytes);
-                
-                if (readlen != columnLength) {
-                    throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex);
-                }
-                /*
-                String columnString = new String(columnBytes);
-                //System.out.print(columnString);
-                String[] values = columnString.split("\n", -1);
-                
-                if (values.length < casecount) {
-                    throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex);
-                }
-                
-                for (int caseindex = 0; caseindex < casecount; caseindex++) {
-                    reversedMatrix[caseindex][varindex] = values[caseindex];
-                }*/
-                
-                int bytecount = 0; 
-                int byteoffset = 0; 
-                int caseindex = 0;
-                //System.out.println("generating value vector for column "+varindex);
-                while (bytecount < columnLength) {
-                    if (columnBytes[bytecount] == '\n') {
-                        String token = new String(columnBytes, byteoffset, bytecount-byteoffset);
-                        reversedMatrix[caseindex++][varindex] = token;
-                        byteoffset = bytecount + 1;
-                    }
-                    bytecount++;
-                }
-                
-                if (caseindex != casecount) {
-                    throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex);
-                }
-                offset = byteOffsets[varindex];
-            }
-            
-            for (int caseindex = 0; caseindex < casecount; caseindex++) {
-                for (int varindex = 0; varindex < varcount; varindex++) {
-                    System.out.print(reversedMatrix[caseindex][varindex]);
-                    if (varindex < varcount-1) {
-                        System.out.print("\t");
-                    } else {
-                        System.out.print("\n");
-                    }
-                }
-            }
-            
-        }
-        
-        
-    }
-    
-    /**
-     * main() method, for testing
-     * usage: java edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator testfile.tab varcount casecount column type
-     * make sure the CLASSPATH contains ...
-     * 
-     */
-    
-    public static void main(String[] args) {
-        
-        String tabFileName = args[0]; 
-        int varcount = new Integer(args[1]).intValue();
-        int casecount = new Integer(args[2]).intValue();
-        int column = new Integer(args[3]).intValue();
-        String type = args[4];
-        
-        File tabFile = new File(tabFileName);
-        File rotatedImageFile = null; 
-        
-        TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator(); 
-        
-        /*
-        try {
-            rotatedImageFile = subsetGenerator.getRotatedImage(tabFile, varcount, casecount);
-        } catch (IOException ex) {
-            System.out.println(ex.getMessage());
-        }
-        */
-        
-        //System.out.println("\nFinished generating \"rotated\" column image file."); 
-                
-        //System.out.println("\nOffsets:");
-        
-        MathContext doubleMathContext = new MathContext(15, RoundingMode.HALF_EVEN);
-        String FORMAT_IEEE754 = "%+#.15e";
-        
-        try {
-            //subsetGenerator.reverseRotatedImage(rotatedImageFile, varcount, casecount);
-            //String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount);
-            if ("string".equals(type)) {
-                String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount);
-                for (int i = 0; i < casecount; i++) {
-                    System.out.println(columns[i]);
-                }
-            } else {
-
-                Double[] columns = subsetGenerator.subsetDoubleVector(tabFile, column, varcount, casecount);
-                for (int i = 0; i < casecount; i++) {
-                    if (columns[i] != null) {
-                        BigDecimal outBigDecimal = new BigDecimal(columns[i], doubleMathContext);
-                        System.out.println(String.format(FORMAT_IEEE754, outBigDecimal));
-                    } else {
-                        System.out.println("NA");
-                    }
-                    //System.out.println(columns[i]);
-                }
-            }
-        } catch (IOException ex) {
-            System.out.println(ex.getMessage());
-        }
-    }
-}
-
-
+        scanner.next();
+    }   
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java
deleted file mode 100644
index 89e033353c1..00000000000
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package edu.harvard.iq.dataverse.dataaccess;
-
-import edu.harvard.iq.dataverse.DataFile;
-import edu.harvard.iq.dataverse.datavariable.DataVariable;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.logging.Logger;
-
-/**
- *
- * @author Leonid Andreev
- */
-public class TabularSubsetInputStream extends InputStream {
-    private static final Logger logger = Logger.getLogger(TabularSubsetInputStream.class.getCanonicalName());
-    
-    private TabularSubsetGenerator subsetGenerator = null;
-    private int numberOfSubsetVariables;
-    private int numberOfObservations; 
-    private int numberOfObservationsRead = 0;
-    private byte[] leftoverBytes = null; 
-    
-    public TabularSubsetInputStream(DataFile datafile, List<DataVariable> variables) throws IOException {
-        if (datafile == null) {
-            throw new IOException("Null datafile in subset request");
-        }
-        if (!datafile.isTabularData()) {
-            throw new IOException("Subset requested on a non-tabular data file");
-        }
-        numberOfObservations = datafile.getDataTable().getCaseQuantity().intValue();
-        
-        if (variables == null || variables.size() < 1) {
-            throw new IOException("Null or empty list of variables in subset request.");
-        }
-        numberOfSubsetVariables = variables.size();
-        subsetGenerator = new TabularSubsetGenerator(datafile, variables);
-
-    }
-    
-    //@Override
-    public int read() throws IOException {
-        throw new IOException("read() method not implemented; do not use.");
-    }
-
-    //@Override
-    public int read(byte[] b) throws IOException {
-        // TODO: 
-        // Move this code into TabularSubsetGenerator
-        logger.fine("subset input stream: read request, on a "+b.length+" byte buffer;");
-        
-        if (numberOfSubsetVariables == 1) {
-            logger.fine("calling the single variable subset read method");
-            return subsetGenerator.readSingleColumnSubset(b);
-        }
-        
-        int bytesread = 0; 
-        byte [] linebuffer; 
-        
-        // do we have a leftover?
-        if (leftoverBytes != null) {
-            if (leftoverBytes.length < b.length) {
-                System.arraycopy(leftoverBytes, 0, b, 0, leftoverBytes.length);
-                bytesread = leftoverBytes.length; 
-                leftoverBytes = null; 
-
-            } else {
-                // shouldn't really happen... unless it's a very large subset, 
-                // or a very long string, etc.
-                System.arraycopy(leftoverBytes, 0, b, 0, b.length);
-                byte[] tmp = new byte[leftoverBytes.length - b.length];
-                System.arraycopy(leftoverBytes, b.length, tmp, 0, leftoverBytes.length - b.length);
-                leftoverBytes = tmp; 
-                tmp = null; 
-                return b.length; 
-            }
-        }
-        
-        while (bytesread < b.length && numberOfObservationsRead < numberOfObservations) {
-            linebuffer = subsetGenerator.readSubsetLineBytes();
-            numberOfObservationsRead++;
-
-            if (bytesread + linebuffer.length < b.length) {
-                // copy linebuffer into the return buffer:
-                System.arraycopy(linebuffer, 0, b, bytesread, linebuffer.length);
-                bytesread += linebuffer.length;
-            } else {
-                System.arraycopy(linebuffer, 0, b, bytesread, b.length - bytesread);
-                // save the leftover;
-                if (bytesread + linebuffer.length > b.length) {
-                    leftoverBytes = new byte[bytesread + linebuffer.length - b.length];
-                    System.arraycopy(linebuffer, b.length - bytesread, leftoverBytes, 0, bytesread + linebuffer.length - b.length);
-                }
-                return b.length; 
-            }
-        }
-        
-        // and this means we've reached the end of the tab file!
-        
-        return bytesread > 0 ? bytesread : -1;
-    }
-    
-    //@Override
-    public void close() {
-        if (subsetGenerator != null) {
-            subsetGenerator.close();
-        }
-    }
-}
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
index 5119b4b96c7..edd01ae98a3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
@@ -545,6 +545,16 @@ private void createDataFileDDI(XMLStreamWriter xmlw, Set<String> excludedFieldSe
             List<DataVariable> vars = variableService.findByDataTableId(dt.getId());
             if (checkField("catgry", excludedFieldSet, includedFieldSet)) {
                 if (checkIsWithoutFrequencies(vars)) {
+                    // @todo: the method called here to calculate frequencies 
+                    // when they are missing from the database (for whatever
+                    // reasons) subsets the physical tab-delimited file and 
+                    // calculates them in real time. this is very expensive operation
+                    // potentially. let's make sure that, when we do this, we 
+                    // save the resulting frequencies in the database, so that 
+                    // we don't have to do this again. Also, let's double check 
+                    // whether the "checkIsWithoutFrequencies()" method is doing
+                    // the right thing - as it appears to return true when there 
+                    // are no categorical variables in the DataTable (?)
                     calculateFrequencies(df, vars);
                 }
             }
@@ -580,6 +590,7 @@ private boolean checkIsWithoutFrequencies(List<DataVariable> vars) {
 
     private void calculateFrequencies(DataFile df, List<DataVariable> vars)
     {
+        // @todo: see the comment in the part of the code that calls this method
         try {
             DataConverter dc = new DataConverter();
             File tabFile = dc.downloadFromStorageIO(df.getStorageIO());
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
index 233f746fb17..9bacafd173f 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java
@@ -726,27 +726,17 @@ public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFil
     }
     
     public void produceContinuousSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException {
-
-        /* 
-        // quick, but memory-inefficient way:
-        // - this method just loads the entire file-worth of continuous vectors 
-        // into a Double[][] matrix. 
-        //Double[][] variableVectors = subsetContinuousVectors(dataFile);
-        //calculateContinuousSummaryStatistics(dataFile, variableVectors);
-        
-        // A more sophisticated way: this subsets one column at a time, using 
-        // the new optimized subsetting that does not have to read any extra 
-        // bytes from the file to extract the column:
-        
-        TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator();
-        */
         
         for (int i = 0; i < dataFile.getDataTable().getVarQuantity(); i++) {
             if (dataFile.getDataTable().getDataVariables().get(i).isIntervalContinuous()) {
                 logger.fine("subsetting continuous vector");
 
                 if ("float".equals(dataFile.getDataTable().getDataVariables().get(i).getFormat())) {
-                    Float[] variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue());
+                    Float[] variableVector = TabularSubsetGenerator.subsetFloatVector(
+                            new FileInputStream(generatedTabularFile), 
+                            i, 
+                            dataFile.getDataTable().getCaseQuantity().intValue(),
+                            dataFile.getDataTable().isStoredWithVariableHeader());
                     logger.fine("Calculating summary statistics on a Float vector;");
                     calculateContinuousSummaryStatistics(dataFile, i, variableVector);
                     // calculate the UNF while we are at it:
@@ -754,7 +744,11 @@ public void produceContinuousSummaryStatistics(DataFile dataFile, File generated
                     calculateUNF(dataFile, i, variableVector);
                     variableVector = null; 
                 } else {
-                    Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue());
+                    Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector(
+                            new FileInputStream(generatedTabularFile), 
+                            i, 
+                            dataFile.getDataTable().getCaseQuantity().intValue(), 
+                            dataFile.getDataTable().isStoredWithVariableHeader());
                     logger.fine("Calculating summary statistics on a Double vector;");
                     calculateContinuousSummaryStatistics(dataFile, i, variableVector);
                     // calculate the UNF while we are at it:
@@ -776,7 +770,11 @@ public void produceDiscreteNumericSummaryStatistics(DataFile dataFile, File gene
                     && dataFile.getDataTable().getDataVariables().get(i).isTypeNumeric()) {
                 logger.fine("subsetting discrete-numeric vector");
 
-                Long[] variableVector = TabularSubsetGenerator.subsetLongVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue());
+                Long[] variableVector = TabularSubsetGenerator.subsetLongVector(
+                        new FileInputStream(generatedTabularFile), 
+                        i, 
+                        dataFile.getDataTable().getCaseQuantity().intValue(), 
+                        dataFile.getDataTable().isStoredWithVariableHeader());
                 // We are discussing calculating the same summary stats for 
                 // all numerics (the same kind of sumstats that we've been calculating
                 // for numeric continuous type)  -- L.A. Jul. 2014
@@ -810,7 +808,11 @@ public void produceCharacterSummaryStatistics(DataFile dataFile, File generatedT
             if (dataFile.getDataTable().getDataVariables().get(i).isTypeCharacter()) {
 
                 logger.fine("subsetting character vector");
-                String[] variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue());
+                String[] variableVector = TabularSubsetGenerator.subsetStringVector(
+                        new FileInputStream(generatedTabularFile), 
+                        i, 
+                        dataFile.getDataTable().getCaseQuantity().intValue(),
+                        dataFile.getDataTable().isStoredWithVariableHeader());
                 //calculateCharacterSummaryStatistics(dataFile, i, variableVector);
                 // calculate the UNF while we are at it:
                 logger.fine("Calculating UNF on a String vector");
@@ -828,20 +830,29 @@ public static void produceFrequencyStatistics(DataFile dataFile, File generatedT
         produceFrequencies(generatedTabularFile, vars);
     }
 
-    public static void produceFrequencies( File generatedTabularFile, List<DataVariable> vars) throws IOException {
+    public static void produceFrequencies(File generatedTabularFile, List<DataVariable> vars) throws IOException {
 
         for (int i = 0; i < vars.size(); i++) {
 
             Collection<VariableCategory> cats = vars.get(i).getCategories();
             int caseQuantity = vars.get(i).getDataTable().getCaseQuantity().intValue();
             boolean isNumeric = vars.get(i).isTypeNumeric();
+            boolean skipVariableHeaderLine = vars.get(i).getDataTable().isStoredWithVariableHeader();
             Object[] variableVector = null;
             if (cats.size() > 0) {
                 if (isNumeric) {
-                    variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, caseQuantity);
+                    variableVector = TabularSubsetGenerator.subsetFloatVector(
+                            new FileInputStream(generatedTabularFile), 
+                            i, 
+                            caseQuantity,
+                            skipVariableHeaderLine);
                 }
                 else {
-                    variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, caseQuantity);
+                    variableVector = TabularSubsetGenerator.subsetStringVector(
+                            new FileInputStream(generatedTabularFile), 
+                            i, 
+                            caseQuantity,
+                            skipVariableHeaderLine);
                 }
                 if (variableVector != null) {
                     Hashtable<Object, Double> freq = calculateFrequency(variableVector);
@@ -923,6 +934,7 @@ public boolean ingestAsTabular(Long datafile_id) {
         DataFile dataFile = fileService.find(datafile_id);
         boolean ingestSuccessful = false;
         boolean forceTypeCheck = false;
+        boolean storingWithVariableHeader = systemConfig.isStoringIngestedFilesWithHeaders();
         
         // Never attempt to ingest a file that's already ingested!
         if (dataFile.isTabularData()) {
@@ -1024,11 +1036,7 @@ public boolean ingestAsTabular(Long datafile_id) {
         
         TabularDataIngest tabDataIngest = null; 
         try {
-            if (additionalData != null) {
-                tabDataIngest = ingestPlugin.read(inputStream, additionalData);
-            } else {
-                tabDataIngest = ingestPlugin.read(inputStream, null);
-            }
+            tabDataIngest = ingestPlugin.read(inputStream, storingWithVariableHeader, additionalData);
         } catch (IOException ingestEx) {
             dataFile.SetIngestProblem();
             FileUtil.createIngestFailureReport(dataFile, ingestEx.getMessage());
@@ -1081,6 +1089,7 @@ public boolean ingestAsTabular(Long datafile_id) {
                 dataFile.setDataTable(tabDataIngest.getDataTable());
                 tabDataIngest.getDataTable().setDataFile(dataFile);
                 tabDataIngest.getDataTable().setOriginalFileName(originalFileName);
+                dataFile.getDataTable().setStoredWithVariableHeader(storingWithVariableHeader);
                 
                 try {
                     produceSummaryStatistics(dataFile, tabFile);
@@ -1172,6 +1181,7 @@ public boolean ingestAsTabular(Long datafile_id) {
 
                     // Replace contents of the file with the tab-delimited data produced:
                     dataAccess.savePath(Paths.get(tabFile.getAbsolutePath()));
+                    
                     // Reset the file size: 
                     dataFile.setFilesize(dataAccess.getSize());
                     
@@ -2297,7 +2307,7 @@ public static void main(String[] args) {
         TabularDataIngest tabDataIngest = null;
         
         try {
-            tabDataIngest = ingestPlugin.read(fileInputStream, null);
+            tabDataIngest = ingestPlugin.read(fileInputStream, false, null);
         } catch (IOException ingestEx) {
             System.err.println("Caught an exception trying to ingest file "+file+".");
             System.exit(1);
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java
index 223b171dfb5..0f23a3d9781 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java
@@ -20,10 +20,13 @@
 
 package edu.harvard.iq.dataverse.ingest.tabulardata;
 
+import edu.harvard.iq.dataverse.datavariable.DataVariable;
 import edu.harvard.iq.dataverse.ingest.tabulardata.spi.*;
 //import edu.harvard.iq.dataverse.ingest.plugin.metadata.*;
 import java.io.*;
 import static java.lang.System.*;
+import java.util.Iterator;
+import java.util.List;
 import java.util.regex.Matcher;
 
 /**
@@ -98,7 +101,7 @@ public void setDataLanguageEncoding(String dataLanguageEncoding) {
      *
      * @throws java.io.IOException if a reading error occurs.
      */
-    public abstract TabularDataIngest read(BufferedInputStream stream, File dataFile)
+    public abstract TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile)
         throws IOException;
 
     
@@ -176,5 +179,26 @@ protected String escapeCharacterString(String rawString) {
         
         return escapedString;
     }
+    
+    protected String generateVariableHeader(List<DataVariable> dvs) {
+        String varHeader = null;
+
+        if (dvs != null) {
+            Iterator<DataVariable> iter = dvs.iterator();
+            DataVariable dv;
+
+            if (iter.hasNext()) {
+                dv = iter.next();
+                varHeader = dv.getName();
+            }
+
+            while (iter.hasNext()) {
+                dv = iter.next();
+                varHeader = varHeader + "\t" + dv.getName();
+            }
+        }
+
+        return varHeader;
+    }
 
 }
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java
index 57f76df3802..f8816ababb4 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java
@@ -110,7 +110,7 @@ private void init() throws IOException {
      * @throws java.io.IOException if a reading error occurs.
      */
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException {
         init();
 
         if (stream == null) {
@@ -124,7 +124,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
         File tabFileDestination = File.createTempFile("data-", ".tab");
         PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath());
 
-        int lineCount = readFile(localBufferedReader, dataTable, tabFileWriter);
+        int lineCount = readFile(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter);
 
         logger.fine("Tab file produced: " + tabFileDestination.getAbsolutePath());
 
@@ -136,14 +136,17 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
 
     }
 
-    public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException {
+    public int readFile(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter finalOut) throws IOException {
 
         List<DataVariable> variableList = new ArrayList<>();
         CSVParser parser = new CSVParser(csvReader, inFormat.withHeader());
         Map<String, Integer> headers = parser.getHeaderMap();
 
         int i = 0;
+        String variableNameHeader = null;
+        
         for (String varName : headers.keySet()) {
+            // @todo: is .keySet() guaranteed to return the names in the right order?
             if (varName == null || varName.isEmpty()) {
                 // TODO:
                 // Add a sensible variable name validation algorithm.
@@ -158,6 +161,13 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f
 
             dv.setTypeCharacter();
             dv.setIntervalDiscrete();
+            
+            if (saveWithVariableHeader) {
+                    variableNameHeader = variableNameHeader == null
+                            ? varName 
+                            : variableNameHeader.concat("\t" + varName);
+                }
+            
             i++;
         }
 
@@ -342,6 +352,14 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f
         try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) {
             parser = new CSVParser(secondPassReader, inFormat.withHeader());
             String[] caseRow = new String[headers.size()];
+            
+            // Save the variable name header, if requested
+            if (saveWithVariableHeader) {
+                if (variableNameHeader == null) {
+                    throw new IOException("failed to generate the Variable Names header");
+                }
+                finalOut.println(variableNameHeader);
+            }
 
             for (CSVRecord record : parser) {
                 if (!record.isConsistent()) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java
index 2dec701592e..73818f8fb62 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java
@@ -505,7 +505,7 @@ private void init() throws IOException {
     }
 
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException {
         dbgLog.info("***** DTAFileReader: read() start *****");
         
         if (dataFile != null) {
@@ -519,7 +519,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
             if (releaseNumber!=104) {
                 decodeExpansionFields(stream);
             }
-            decodeData(stream);
+            decodeData(stream, storeWithVariableHeader);
             decodeValueLabels(stream);
 
             ingesteddata.setDataTable(dataTable);
@@ -1665,7 +1665,7 @@ private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOEx
         dbgLog.fine("parseValueLabelsRelease108(): end");
     }
 
-    private void decodeData(BufferedInputStream stream) throws IOException {
+    private void decodeData(BufferedInputStream stream, boolean saveWithVariableHeader) throws IOException {
 
         dbgLog.fine("\n***** decodeData(): start *****");
 
@@ -1719,6 +1719,11 @@ private void decodeData(BufferedInputStream stream) throws IOException {
           BUT, this needs to be reviewed/confirmed etc! 
          */
         //String[][] dateFormat = new String[nvar][nobs];
+        
+        // add the variable header here, if needed
+        if (saveWithVariableHeader) {
+            pwout.println(generateVariableHeader(dataTable.getDataVariables())); 
+        }
 
         for (int i = 0; i < nobs; i++) {
             byte[] dataRowBytes = new byte[bytes_per_row];
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java
index 22581834676..53607d541de 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java
@@ -339,7 +339,7 @@ private void init() throws IOException {
     }
 
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException {
         logger.fine("NewDTAFileReader: read() start");
 
         // shit ton of diagnostics (still) needed here!!  -- L.A.
@@ -363,7 +363,13 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
         // "characteristics" - STATA-proprietary information
         // (we are skipping it)
         readCharacteristics(dataReader);
-        readData(dataReader);
+        
+        String variableHeaderLine = null; 
+        
+        if (storeWithVariableHeader) {
+            variableHeaderLine = generateVariableHeader(dataTable.getDataVariables());
+        }
+        readData(dataReader, variableHeaderLine);
 
         // (potentially) large, (potentially) non-ASCII character strings
         // saved outside the <data> section, and referenced 
@@ -707,7 +713,7 @@ private void readCharacteristics(DataReader reader) throws IOException {
 
     }
 
-    private void readData(DataReader reader) throws IOException {
+    private void readData(DataReader reader, String variableHeaderLine) throws IOException {
         logger.fine("Data section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_data());
         logger.fine("readData(): start");
         reader.readOpeningTag(TAG_DATA);
@@ -731,6 +737,11 @@ private void readData(DataReader reader) throws IOException {
         FileOutputStream fileOutTab = new FileOutputStream(tabDelimitedDataFile);
         PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);
 
+        // add the variable header here, if needed
+        if (variableHeaderLine != null) {
+            pwout.println(variableHeaderLine); 
+        }
+        
         logger.fine("Beginning to read data stream.");
 
         for (int i = 0; i < nobs; i++) {
@@ -999,6 +1010,8 @@ private void readSTRLs(DataReader reader) throws IOException {
             int nobs = dataTable.getCaseQuantity().intValue();
 
             String[] line;
+            
+            //@todo: adjust for the case of storing the file with the variable header
 
             for (int obsindex = 0; obsindex < nobs; obsindex++) {
                 if (scanner.hasNext()) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java
index c90b0ea6950..2ee966c3e31 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java
@@ -180,7 +180,7 @@ private void init() throws IOException {
     }
     
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File additionalData) throws IOException{
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File additionalData) throws IOException{
         dbgLog.fine("PORFileReader: read() start");
         
         if (additionalData != null) {
@@ -226,7 +226,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t
                     headerId = "8S";
                 }
 
-                decode(headerId, bfReader);
+                decode(headerId, bfReader, storeWithVariableHeader);
 
                 
                 // for last iteration
@@ -382,7 +382,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t
         return ingesteddata;
     }
     
-    private void decode(String headerId, BufferedReader reader) throws IOException{
+    private void decode(String headerId, BufferedReader reader, boolean storeWithVariableHeader) throws IOException{
         if (headerId.equals("1")) decodeProductName(reader);
         else if (headerId.equals("2")) decodeLicensee(reader);
         else if (headerId.equals("3")) decodeFileLabel(reader);
@@ -398,7 +398,7 @@ private void decode(String headerId, BufferedReader reader) throws IOException{
         else if (headerId.equals("C")) decodeVariableLabel(reader);
         else if (headerId.equals("D")) decodeValueLabel(reader);
         else if (headerId.equals("E")) decodeDocument(reader);
-        else if (headerId.equals("F")) decodeData(reader);
+        else if (headerId.equals("F")) decodeData(reader, storeWithVariableHeader);
     }
     
 
@@ -1099,7 +1099,7 @@ private void decodeDocument(BufferedReader reader) throws IOException {
     }
 
 
-    private void decodeData(BufferedReader reader) throws IOException {
+    private void decodeData(BufferedReader reader, boolean storeWithVariableHeader) throws IOException {
         dbgLog.fine("decodeData(): start");
         // TODO: get rid of this "variableTypeFinal"; -- L.A. 4.0 beta
         int[] variableTypeFinal= new int[varQnty];
@@ -1126,6 +1126,9 @@ private void decodeData(BufferedReader reader) throws IOException {
             // contents (variable) checker concering decimals
             Arrays.fill(variableTypeFinal, 0);
 
+            if (storeWithVariableHeader) {
+                pwout.println(StringUtils.join(variableNameList, "\t"));
+            } 
             // raw-case counter
             int j = 0; // case
 
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
index eb1353fd792..50f2f89e354 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java
@@ -473,7 +473,7 @@ private void init() throws IOException {
    * @throws java.io.IOException if a reading error occurs.
    */
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException {
 
         init();
 
@@ -509,7 +509,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
             File tabFileDestination = File.createTempFile("data-", ".tab");
             PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath(), "UTF-8");
         
-            int lineCount = csvFileReader.read(localBufferedReader, dataTable, tabFileWriter);
+            int lineCount = csvFileReader.read(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter);
 
             LOG.fine("RDATAFileReader: successfully read "+lineCount+" lines of tab-delimited data.");
         
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java
index f60b7733463..fbe7e401b57 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java
@@ -61,8 +61,8 @@ public RTabFileParser (char delimiterChar) {
     // should be used.
 
 
-  public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout) throws IOException {
-    dbgLog.warning("RTabFileParser: Inside R Tab file parser");
+    public int read(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter pwout) throws IOException {
+        dbgLog.fine("RTabFileParser: Inside R Tab file parser");
       
         int varQnty = 0;
 
@@ -94,14 +94,17 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout
         boolean[] isTimeVariable = new boolean[varQnty];
         boolean[] isBooleanVariable = new boolean[varQnty];
         
+        String variableNameHeader = null;
+        
         if (dataTable.getDataVariables() != null) {
             for (int i = 0; i < varQnty; i++) {
                 DataVariable var = dataTable.getDataVariables().get(i);
                 if (var == null) {
-                    // throw exception!
+                    throw new IOException ("null dataVariable passed to the parser");
+                    
                 }
                 if (var.getType() == null) {
-                    // throw exception!
+                    throw new IOException ("null dataVariable type passed to the parser");
                 }
                 if (var.isTypeCharacter()) {
                     isCharacterVariable[i] = true; 
@@ -128,13 +131,24 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout
                         }
                     }
                 } else {
-                    // throw excepion "unknown variable format type" - ?
+                     throw new IOException ("unknown dataVariable format passed to the parser");
                 }
                 
-                
+                if (saveWithVariableHeader) {
+                    variableNameHeader = variableNameHeader == null  
+                            ? var.getName() 
+                            : variableNameHeader.concat("\t" + var.getName());
+                }
             }
         } else {
-            // throw exception!
+            throw new IOException ("null dataVariables list passed to the parser");
+        }
+        
+        if (saveWithVariableHeader) {
+            if (variableNameHeader == null) {
+                throw new IOException ("failed to generate the Variable Names header");
+            }
+            pwout.println(variableNameHeader);
         }
         
         while ((line = csvReader.readLine()) != null) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java
index 682b8f1166c..5eecbdfb666 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java
@@ -338,7 +338,7 @@ private void init() throws IOException {
         }
     }
 
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException{
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException{
         dbgLog.info("SAVFileReader: read() start");
         
         if (dataFile != null) {
@@ -422,7 +422,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
 
 	    methodCurrentlyExecuted = "decodeRecordTypeData";
 	    dbgLog.fine("***** SAVFileReader: executing method decodeRecordTypeData");
-	    decodeRecordTypeData(stream); 
+	    decodeRecordTypeData(stream, storeWithVariableHeader); 
 
 		
 	} catch (IllegalArgumentException e) {
@@ -2308,7 +2308,7 @@ void decodeRecordType999(BufferedInputStream stream) throws IOException {
     
     
 
-    void decodeRecordTypeData(BufferedInputStream stream) throws IOException {
+    void decodeRecordTypeData(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException {
         dbgLog.fine("decodeRecordTypeData(): start");
 
 	///String fileUnfValue = null;
@@ -2320,9 +2320,9 @@ void decodeRecordTypeData(BufferedInputStream stream) throws IOException {
             throw new IllegalArgumentException("stream == null!");
         }
         if (isDataSectionCompressed){
-            decodeRecordTypeDataCompressed(stream);
+            decodeRecordTypeDataCompressed(stream, storeWithVariableHeader);
         } else {
-            decodeRecordTypeDataUnCompressed(stream);
+            decodeRecordTypeDataUnCompressed(stream, storeWithVariableHeader);
         }
             
         /* UNF calculation was here... */
@@ -2362,7 +2362,7 @@ PrintWriter createOutputWriter (BufferedInputStream stream) throws IOException {
 
     }
 
-    void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException {
+    void decodeRecordTypeDataCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException {
 
         dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****");
 
@@ -2395,7 +2395,10 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti
         dbgLog.fine("printFormatTable:\n" + printFormatTable);
         variableFormatTypeList = new String[varQnty];
 
-
+        // write the variable header out, if instructed to do so
+        if (storeWithVariableHeader) {
+            pwout.println(generateVariableHeader(dataTable.getDataVariables()));
+        }
 
         for (int i = 0; i < varQnty; i++) {
             variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE.get(
@@ -2947,7 +2950,7 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti
     }
 
 
-    void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException {
+    void decodeRecordTypeDataUnCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException {
         dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****");
 
         if (stream ==null){
@@ -3013,6 +3016,11 @@ void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOExcep
         ///dataTable2 = new Object[varQnty][caseQnty];
 	// storage of date formats to pass to UNF	
         ///dateFormats = new String[varQnty][caseQnty];
+        
+        // write the variable header out, if instructed to do so
+        if (storeWithVariableHeader) {
+            pwout.println(generateVariableHeader(dataTable.getDataVariables()));
+        }
 
         try {
             for (int i = 0; ; i++){  // case-wise loop
diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java
index ea3f3868f24..ef91793690e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java
@@ -36,7 +36,6 @@
 import org.apache.commons.lang3.StringUtils;
 
 import org.apache.poi.xssf.eventusermodel.XSSFReader;
-import org.apache.poi.xssf.usermodel.XSSFRichTextString;
 import org.apache.poi.xssf.model.SharedStrings;
 import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.xml.sax.Attributes;
@@ -81,7 +80,9 @@ private void init() throws IOException {
      * @throws java.io.IOException if a reading error occurs.
      */
     @Override
-    public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException {
+    public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException {
+        // @todo: implement handling of "saveWithVariableHeader" option
+        
         init();
         
         TabularDataIngest ingesteddata = new TabularDataIngest();
@@ -118,6 +119,10 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws
         String[] caseRow = new String[varQnty];
         String[] valueTokens;
 
+        // add the variable header here, if needed
+        if (storeWithVariableHeader) {
+            finalWriter.println(generateVariableHeader(dataTable.getDataVariables())); 
+        }
         
         while ((line = secondPassReader.readLine()) != null) {
             // chop the line:
@@ -549,7 +554,7 @@ public static void main(String[] args) throws Exception {
         
         BufferedInputStream xlsxInputStream = new BufferedInputStream(new FileInputStream(new File(args[0])));
         
-        TabularDataIngest dataIngest = testReader.read(xlsxInputStream, null);
+        TabularDataIngest dataIngest = testReader.read(xlsxInputStream, false, null);
         
         dataTable = dataIngest.getDataTable();
         
diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
index 627cef08d8b..3b7632f3d9e 100644
--- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java
@@ -598,7 +598,12 @@ Whether Harvesting (OAI) service is enabled
          * Allows an instance admin to disable Solr search facets on the collection
          * and dataset pages instantly
          */
-        DisableSolrFacets
+        DisableSolrFacets,
+        /**
+         * When ingesting tabular data files, store the generated tab-delimited 
+         * files *with* the variable names line up top. 
+         */
+        StoreIngestedTabularFilesWithVarHeaders
         ;
 
         @Override
diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
index 3c6992f8ec3..ded394833f1 100644
--- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
+++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
@@ -1173,4 +1173,12 @@ public boolean isStorageQuotasEnforced() {
     public Long getTestStorageQuotaLimit() {
         return settingsService.getValueForKeyAsLong(SettingsServiceBean.Key.StorageQuotaSizeInBytes);
     }
+    /**
+     * Should we store tab-delimited files produced during ingest *with* the 
+     * variable name header line included? 
+     * @return boolean - defaults to false.
+     */
+    public boolean isStoringIngestedFilesWithHeaders() {
+        return settingsService.isTrueForKey(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders, false);
+    }
 }
diff --git a/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql
new file mode 100644
index 00000000000..7c52a00107a
--- /dev/null
+++ b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql
@@ -0,0 +1 @@
+ALTER TABLE datatable ADD COLUMN IF NOT EXISTS storedWithVariableHeader BOOLEAN DEFAULT FALSE;
diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
index 915f82a6de2..cfc6f9335b3 100644
--- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
+++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
@@ -16,6 +16,7 @@
 import io.restassured.path.xml.XmlPath;
 import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
 import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.util.FileUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import java.io.File;
 import java.io.IOException;
@@ -33,6 +34,8 @@
 import jakarta.json.JsonObjectBuilder;
 
 import static jakarta.ws.rs.core.Response.Status.*;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.Matchers;
 import org.junit.jupiter.api.AfterAll;
@@ -2483,4 +2486,129 @@ public void testCollectionStorageQuotas() {
         
         UtilIT.deleteSetting(SettingsServiceBean.Key.UseStorageQuotas);
     }
+    
+    @Test
+    public void testIngestWithAndWithoutVariableHeader() throws NoSuchAlgorithmException {
+        msgt("testIngestWithAndWithoutVariableHeader");
+        
+        // The compact Stata file we'll be using for this test: 
+        // (this file is provided by Stata inc. - it's genuine quality)
+        String pathToFile = "scripts/search/data/tabular/stata13-auto.dta";
+        // The pre-calculated MD5 signature of the *complete* tab-delimited 
+        // file as seen by the final Access API user (i.e., with the variable 
+        // header line in it):
+        String tabularFileMD5 = "f298c2567cc8eb544e36ad83edf6f595";
+        // Expected byte sizes of the generated tab-delimited file as stored, 
+        // with and without the header:
+        int tabularFileSizeWoutHeader = 4026; 
+        int tabularFileSizeWithHeader = 4113; 
+
+        String apiToken = createUserGetToken();
+        String dataverseAlias = createDataverseGetAlias(apiToken);
+        Integer datasetIdA = createDatasetGetId(dataverseAlias, apiToken);
+        
+        // Before we do anything else, make sure that the instance is configured 
+        // the "old" way, i.e., to store ingested files without the headers:
+        UtilIT.deleteSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders);
+        
+        Response addResponse = UtilIT.uploadFileViaNative(datasetIdA.toString(), pathToFile, apiToken);
+        addResponse.prettyPrint();
+
+        addResponse.then().assertThat()
+                .body("data.files[0].dataFile.contentType", equalTo("application/x-stata-13"))
+                .body("data.files[0].label", equalTo("stata13-auto.dta"))
+                .statusCode(OK.getStatusCode());
+
+        Long fileIdA = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id");
+        assertNotNull(fileIdA);
+
+        // Give file time to ingest
+        assertTrue(UtilIT.sleepForLock(datasetIdA.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToFile + "(A)");
+
+        // Check the metadata to confirm that the file has ingested: 
+
+        Response fileDataResponse = UtilIT.getFileData(fileIdA.toString(), apiToken);
+        fileDataResponse.prettyPrint();
+        fileDataResponse.then().assertThat()
+                .body("data.dataFile.filename", equalTo("stata13-auto.tab"))
+                .body("data.dataFile.contentType", equalTo("text/tab-separated-values"))
+                .body("data.dataFile.filesize", equalTo(tabularFileSizeWoutHeader))
+                .statusCode(OK.getStatusCode());
+        
+
+        // Download the file, verify the checksum: 
+
+        Response fileDownloadResponse = UtilIT.downloadFile(fileIdA.intValue(), apiToken);
+        fileDownloadResponse.then().assertThat()
+                .statusCode(OK.getStatusCode()); 
+        
+        byte[] fileDownloadBytes = fileDownloadResponse.body().asByteArray(); 
+        MessageDigest messageDigest = MessageDigest.getInstance("MD5");
+        messageDigest.update(fileDownloadBytes);
+        byte[] rawDigestBytes = messageDigest.digest();
+        String tabularFileMD5calculated = FileUtil.checksumDigestToString(rawDigestBytes);
+        
+        msgt("md5 of the downloaded file (saved without the variable name header): "+tabularFileMD5calculated);
+        
+        assertEquals(tabularFileMD5, tabularFileMD5calculated);
+
+        // Repeat the whole thing, in another dataset (because we will be uploading 
+        // an identical file), but with the "store with the header setting enabled): 
+        
+        UtilIT.enableSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders);
+        
+        Integer datasetIdB = createDatasetGetId(dataverseAlias, apiToken);
+        
+        addResponse = UtilIT.uploadFileViaNative(datasetIdB.toString(), pathToFile, apiToken);
+        addResponse.prettyPrint();
+
+        addResponse.then().assertThat()
+                .body("data.files[0].dataFile.contentType", equalTo("application/x-stata-13"))
+                .body("data.files[0].label", equalTo("stata13-auto.dta"))
+                .statusCode(OK.getStatusCode());
+
+        Long fileIdB = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id");
+        assertNotNull(fileIdB);
+
+        // Give file time to ingest
+        assertTrue(UtilIT.sleepForLock(datasetIdB.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToFile + "(B)");
+        
+        // Check the metadata to confirm that the file has ingested: 
+
+        fileDataResponse = UtilIT.getFileData(fileIdB.toString(), apiToken);
+        fileDataResponse.prettyPrint();
+        fileDataResponse.then().assertThat()
+                .body("data.dataFile.filename", equalTo("stata13-auto.tab"))
+                .body("data.dataFile.contentType", equalTo("text/tab-separated-values"))
+                .body("data.dataFile.filesize", equalTo(tabularFileSizeWithHeader))
+                .statusCode(OK.getStatusCode());
+        
+
+        // Download the file, verify the checksum, again
+
+        fileDownloadResponse = UtilIT.downloadFile(fileIdB.intValue(), apiToken);
+        fileDownloadResponse.then().assertThat()
+                .statusCode(OK.getStatusCode()); 
+        
+        fileDownloadBytes = fileDownloadResponse.body().asByteArray(); 
+        messageDigest.reset();
+        messageDigest.update(fileDownloadBytes);
+        rawDigestBytes = messageDigest.digest();
+        tabularFileMD5calculated = FileUtil.checksumDigestToString(rawDigestBytes);
+        
+        msgt("md5 of the downloaded file (saved with the variable name header): "+tabularFileMD5calculated);
+        
+        assertEquals(tabularFileMD5, tabularFileMD5calculated);
+
+        // In other words, whether the file was saved with, or without the header, 
+        // as downloaded by the user, the end result must be the same in both cases!
+        // In other words, whether that first line with the variable names is already
+        // in the physical file, or added by Dataverse on the fly, the downloaded
+        // content must be identical. 
+        
+        UtilIT.deleteSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders);
+        
+        // @todo: cleanup? 
+    }
+    
 }
diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java
index 96e314324ab..ca64bcc794f 100644
--- a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java
@@ -99,7 +99,7 @@ private DataFile readFileCalcFreq(String fileName, String type ) {
         TabularDataIngest tabDataIngest = null;
 
         try {
-            tabDataIngest = ingestPlugin.read(fileInputStream, null);
+            tabDataIngest = ingestPlugin.read(fileInputStream, false, null);
         } catch (IOException ingestEx) {
             tabDataIngest = null;
             System.out.println("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage());
diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java
index fc066ef195e..9afb35918a4 100644
--- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java
@@ -52,7 +52,7 @@ public void testRead() {
         try (BufferedInputStream stream = new BufferedInputStream(
                 new FileInputStream(testFile))) {
             CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ',');
-            File outFile = instance.read(stream, null).getTabDelimitedFile();
+            File outFile = instance.read(stream, false, null).getTabDelimitedFile();
             result = new BufferedReader(new FileReader(outFile));
             logger.fine("Final pass: " + outFile.getPath());
         } catch (IOException ex) {
@@ -104,7 +104,7 @@ public void testVariables() {
         try (BufferedInputStream stream = new BufferedInputStream(
                 new FileInputStream(testFile))) {
             CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ',');
-            result = instance.read(stream, null).getDataTable();
+            result = instance.read(stream, false, null).getDataTable();
         } catch (IOException ex) {
             fail("" + ex);
         }
@@ -154,7 +154,7 @@ public void testSubset() {
                 new FileInputStream(testFile))) {
             CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ',');
 
-            ingestResult = instance.read(stream, null);
+            ingestResult = instance.read(stream, false, null);
 
             generatedTabFile = ingestResult.getTabDelimitedFile();
             generatedDataTable = ingestResult.getDataTable();
@@ -195,7 +195,7 @@ public void testSubset() {
                 fail("Failed to open generated tab-delimited file for reading" + ioex);
             }
 
-            Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+            Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
             assertArrayEquals(floatVectors[vectorCount++], columnVector, "column " + i + ":");
         }
@@ -229,7 +229,7 @@ public void testSubset() {
                 fail("Failed to open generated tab-delimited file for reading" + ioex);
             }
 
-            Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+            Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
             assertArrayEquals(longVectors[vectorCount++], columnVector, "column " + i + ":");
         }
@@ -256,7 +256,7 @@ public void testSubset() {
                 fail("Failed to open generated tab-delimited file for reading" + ioex);
             }
 
-            String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+            String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
             assertArrayEquals(stringVectors[vectorCount++], columnVector, "column " + i + ":");
         }
@@ -298,7 +298,7 @@ public void testVariableUNFs() {
                 new FileInputStream(testFile))) {
             CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ',');
 
-            ingestResult = instance.read(stream, null);
+            ingestResult = instance.read(stream, false, null);
 
             generatedTabFile = ingestResult.getTabDelimitedFile();
             generatedDataTable = ingestResult.getDataTable();
@@ -327,7 +327,7 @@ public void testVariableUNFs() {
                     fail("Failed to open generated tab-delimited file for reading" + ioex);
                 }
 
-                Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+                Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
                 try {
                     unf = UNFUtil.calculateUNF(columnVector);
                 } catch (IOException | UnfException ioex) {
@@ -345,7 +345,7 @@ public void testVariableUNFs() {
                     fail("Failed to open generated tab-delimited file for reading" + ioex);
                 }
 
-                Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+                Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
                 try {
                     unf = UNFUtil.calculateUNF(columnVector);
@@ -363,7 +363,7 @@ public void testVariableUNFs() {
                     fail("Failed to open generated tab-delimited file for reading" + ioex);
                 }
 
-                String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue());
+                String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false);
 
                 String[] dateFormats = null;
 
@@ -401,7 +401,7 @@ public void testVariableUNFs() {
     public void testBrokenCSV() {
         String brokenFile = "src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/BrokenCSV.csv";
         try {
-            new CSVFileReader(new CSVFileReaderSpi(), ',').read(null, null);
+            new CSVFileReader(new CSVFileReaderSpi(), ',').read(null, false, null);
             fail("IOException not thrown on null csv");
         } catch (NullPointerException ex) {
             String expMessage = null;
@@ -412,7 +412,7 @@ public void testBrokenCSV() {
         }
         try (BufferedInputStream stream = new BufferedInputStream(
                 new FileInputStream(brokenFile))) {
-            new CSVFileReader(new CSVFileReaderSpi(), ',').read(stream, null);
+            new CSVFileReader(new CSVFileReaderSpi(), ',').read(stream, false, null);
             fail("IOException was not thrown when collumns do not align.");
         } catch (IOException ex) {
             String expMessage = BundleUtil.getStringFromBundle("ingest.csv.recordMismatch",
diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java
index 113e9be6b54..8af36d6466d 100644
--- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java
@@ -16,7 +16,7 @@ public class DTAFileReaderTest {
 
     @Test
     public void testOs() throws IOException {
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/50by1000.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/50by1000.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("rel_8_or_9", result.getDataTable().getOriginalFormatVersion());
         assertEquals(50, result.getDataTable().getDataVariables().size());
diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java
index c963346b05e..0f14054f472 100644
--- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java
+++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java
@@ -25,7 +25,7 @@ public void testAuto() throws IOException {
         instance = new NewDTAFileReader(null, 117);
         // From https://www.stata-press.com/data/r13/auto.dta
         // `strings` shows "<stata_dta><header><release>117</release>"
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/stata13-auto.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/stata13-auto.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion());
         assertEquals(12, result.getDataTable().getDataVariables().size());
@@ -39,7 +39,7 @@ public void testAuto() throws IOException {
     @Test
     public void testStrl() throws IOException {
         instance = new NewDTAFileReader(null, 118);
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "strl.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "strl.dta"))), false, nullDataFile);
         DataTable table = result.getDataTable();
         assertEquals("application/x-stata", table.getOriginalFileFormat());
         assertEquals("STATA 14", table.getOriginalFormatVersion());
@@ -58,7 +58,7 @@ public void testStrl() throws IOException {
     @Test
     public void testDates() throws IOException {
         instance = new NewDTAFileReader(null, 118);
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "dates.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "dates.dta"))), false, nullDataFile);
         DataTable table = result.getDataTable();
         assertEquals("application/x-stata", table.getOriginalFileFormat());
         assertEquals("STATA 14", table.getOriginalFormatVersion());
@@ -77,7 +77,7 @@ public void testDates() throws IOException {
     @Test
     void testNull() {
         instance = new NewDTAFileReader(null, 117);
-        assertThrows(IOException.class, () -> instance.read(null, new File("")));
+        assertThrows(IOException.class, () -> instance.read(null, false, new File("")));
     }
 
     // TODO: Can we create a small file to check into the code base that exercises the value-label names non-zero offset issue?
@@ -87,7 +87,7 @@ public void testFirstCategoryNonZeroOffset() throws IOException {
         instance = new NewDTAFileReader(null, 117);
 
         // https://dataverse.harvard.edu/file.xhtml?fileId=2865667 Stata 13 HouseImputingCivilRightsInfo.dta md5=7dd144f27cdb9f8d1c3f4eb9c4744c42
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/HouseImputingCivilRightsInfo.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/HouseImputingCivilRightsInfo.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion());
         assertEquals(5, result.getDataTable().getDataVariables().size());
@@ -107,7 +107,7 @@ public void testFirstCategoryNonZeroOffset() throws IOException {
     public void testFirstCategoryNonZeroOffset1() throws IOException {
         instance = new NewDTAFileReader(null, 118);
         // https://dataverse.harvard.edu/file.xhtml?fileId=3140457 Stata 14: 2018_04_06_Aggregated_dataset_v2.dta
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/2018_04_06_Aggregated_dataset_v2.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/2018_04_06_Aggregated_dataset_v2.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("STATA 14", result.getDataTable().getOriginalFormatVersion());
         assertEquals(227, result.getDataTable().getDataVariables().size());
@@ -136,7 +136,7 @@ public void test33k() throws IOException {
     @Test
     public void testCharacteristics() throws IOException {
         instance = new NewDTAFileReader(null, 117);
-        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/15aa6802ee5-5d2ed1bf55a5.dta"))), nullDataFile);
+        TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/15aa6802ee5-5d2ed1bf55a5.dta"))), false, nullDataFile);
         assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat());
         assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion());
         assertEquals(441, result.getDataTable().getDataVariables().size());