diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b2be8f531c4..1430ba951a6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -56,7 +56,7 @@ If you are interested in working on the main Dataverse code, great! Before you s
Please read http://guides.dataverse.org/en/latest/developers/version-control.html to understand how we use the "git flow" model of development and how we will encourage you to create a GitHub issue (if it doesn't exist already) to associate with your pull request. That page also includes tips on making a pull request.
-After making your pull request, your goal should be to help it advance through our kanban board at https://github.com/orgs/IQSS/projects/2 . If no one has moved your pull request to the code review column in a timely manner, please reach out. Note that once a pull request is created for an issue, we'll remove the issue from the board so that we only track one card (the pull request).
+After making your pull request, your goal should be to help it advance through our kanban board at https://github.com/orgs/IQSS/projects/34 . If no one has moved your pull request to the code review column in a timely manner, please reach out. Note that once a pull request is created for an issue, we'll remove the issue from the board so that we only track one card (the pull request).
Thanks for your contribution!
@@ -64,4 +64,4 @@ Thanks for your contribution!
[Community Call]: https://dataverse.org/community-calls
[dataverse-dev Google Group]: https://groups.google.com/group/dataverse-dev
[community contributors]: https://docs.google.com/spreadsheets/d/1o9DD-MQ0WkrYaEFTD5rF_NtyL8aUISgURsAXSL7Budk/edit?usp=sharing
-[dev efforts]: https://github.com/orgs/IQSS/projects/2#column-5298405
+[dev efforts]: https://github.com/orgs/IQSS/projects/34/views/6
diff --git a/conf/localstack/buckets.sh b/conf/localstack/buckets.sh
new file mode 100755
index 00000000000..fe940d9890d
--- /dev/null
+++ b/conf/localstack/buckets.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+# https://stackoverflow.com/questions/53619901/auto-create-s3-buckets-on-localstack
+awslocal s3 mb s3://mybucket
diff --git a/conf/solr/9.3.0/solrconfig.xml b/conf/solr/9.3.0/solrconfig.xml
index b89315cdaa9..36ed4f23390 100644
--- a/conf/solr/9.3.0/solrconfig.xml
+++ b/conf/solr/9.3.0/solrconfig.xml
@@ -588,6 +588,7 @@
check for "Circuit Breakers tripped" in logs and the corresponding error message should tell
you what transpired (if the failure was caused by tripped circuit breakers).
-->
+
+
+
- 6.0
+ 6.1
17
UTF-8
@@ -152,7 +152,7 @@
42.6.0
9.3.0
1.12.290
- 0.177.0
+ 26.30.0
8.0.0
diff --git a/pom.xml b/pom.xml
index 52bef929c1c..bf5bf16d423 100644
--- a/pom.xml
+++ b/pom.xml
@@ -27,7 +27,7 @@
war
1.2.18.4
- 9.21.2
+ 9.22.1
1.20.1
5.2.1
2.4.1
@@ -466,7 +466,7 @@
org.duracloud
common
- 7.1.1
+ 8.0.0
org.slf4j
@@ -481,7 +481,7 @@
org.duracloud
storeclient
- 7.1.1
+ 8.0.0
org.slf4j
@@ -612,6 +612,11 @@
3.0.0
test
+
+ org.testcontainers
+ localstack
+ test
+
docker-build
- 13
+ 16
gdcc/dataverse:${app.image.tag}
unstable
diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv
index a3a8e7efd58..ce481c1bf84 100644
--- a/scripts/api/data/metadatablocks/geospatial.tsv
+++ b/scripts/api/data/metadatablocks/geospatial.tsv
@@ -8,10 +8,10 @@
otherGeographicCoverage Other Other information on the geographic coverage of the data. text 4 #VALUE, FALSE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial
geographicUnit Geographic Unit Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region. text 5 TRUE FALSE TRUE TRUE FALSE FALSE geospatial
geographicBoundingBox Geographic Bounding Box The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. none 6 FALSE FALSE TRUE FALSE FALSE FALSE geospatial
- westLongitude West Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
- eastLongitude East Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
- northLongitude North Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
- southLongitude South Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
+ westLongitude Westernmost (Left) Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
+ eastLongitude Easternmost (Right) Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
+ northLongitude Northernmost (Top) Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
+ southLongitude Southernmost (Bottom) Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial
#controlledVocabulary DatasetField Value identifier displayOrder
country Afghanistan 0
country Albania 1
diff --git a/scripts/installer/install.py b/scripts/installer/install.py
index 5a7b9f75696..18995695638 100644
--- a/scripts/installer/install.py
+++ b/scripts/installer/install.py
@@ -422,9 +422,13 @@
conn.close()
if int(pg_major_version) >= 15:
+ admin_conn_string = "dbname='"+pgDb+"' user='postgres' password='"+pgAdminPassword+"' host='"+pgHost+"'"
+ conn = psycopg2.connect(admin_conn_string)
+ conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
+ cur = conn.cursor()
conn_cmd = "GRANT CREATE ON SCHEMA public TO "+pgUser+";"
- print("PostgreSQL 15 or higher detected. Running " + conn_cmd)
try:
+ print("PostgreSQL 15 or higher detected. Running " + conn_cmd)
cur.execute(conn_cmd)
except:
if force:
diff --git a/scripts/intellij/cpwebapp.sh b/scripts/intellij/cpwebapp.sh
new file mode 100755
index 00000000000..6ecad367048
--- /dev/null
+++ b/scripts/intellij/cpwebapp.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+#
+# cpwebapp
+#
+# Usage:
+#
+# Add a File watcher by importing watchers.xml into IntelliJ IDEA, and let it do the copying whenever you save a
+# file under webapp.
+#
+# https://www.jetbrains.com/help/idea/settings-tools-file-watchers.html
+#
+# Alternatively, you can add an External tool and trigger via menu or shortcut to do the copying manually:
+#
+# https://www.jetbrains.com/help/idea/configuring-third-party-tools.html
+#
+
+PROJECT_DIR=$1
+FILE_TO_COPY=$2
+RELATIVE_PATH="${FILE_TO_COPY#$PROJECT_DIR/}"
+
+# Check if RELATIVE_PATH starts with 'src/main/webapp', otherwise ignore
+if [[ $RELATIVE_PATH == src/main/webapp* ]]; then
+ # Get current version. Any other way to do this? A simple VERSION file would help.
+ VERSION=`perl -ne 'print $1 if /(.*?)<\/revision>/' ./modules/dataverse-parent/pom.xml`
+ RELATIVE_PATH_WITHOUT_WEBAPP="${RELATIVE_PATH#src/main/webapp/}"
+ TARGET_DIR=./docker-dev-volumes/glassfish/applications/dataverse-$VERSION
+ TARGET_PATH="${TARGET_DIR}/${RELATIVE_PATH_WITHOUT_WEBAPP}"
+
+ mkdir -p "$(dirname "$TARGET_PATH")"
+ cp "$FILE_TO_COPY" "$TARGET_PATH"
+
+ echo "File $FILE_TO_COPY copied to $TARGET_PATH"
+fi
diff --git a/scripts/intellij/watchers.xml b/scripts/intellij/watchers.xml
new file mode 100644
index 00000000000..e118fea558f
--- /dev/null
+++ b/scripts/intellij/watchers.xml
@@ -0,0 +1,22 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
index 8c96f98ce39..363622ba3bf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java
@@ -2,6 +2,7 @@
package edu.harvard.iq.dataverse;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -46,6 +47,8 @@ public class AuxiliaryFileServiceBean implements java.io.Serializable {
@EJB
private SystemConfig systemConfig;
+ @EJB
+ StorageUseServiceBean storageUseService;
public AuxiliaryFile find(Object pk) {
return em.find(AuxiliaryFile.class, pk);
@@ -126,6 +129,13 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile
}
dataFile.getAuxiliaryFiles().add(auxFile);
}
+ // We've just added this file to storage; increment the StorageUse
+ // record if needed.
+ if (auxFile.getFileSize() != null
+ && auxFile.getFileSize() > 0
+ && dataFile.getOwner() != null ) {
+ storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), auxFile.getFileSize());
+ }
} catch (IOException ioex) {
logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage());
throw new InternalServerErrorException();
@@ -181,6 +191,7 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form
if (af == null) {
throw new FileNotFoundException();
}
+ Long auxFileSize = af.getFileSize();
em.remove(af);
StorageIO> storageIO;
storageIO = dataFile.getStorageIO();
@@ -188,6 +199,14 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form
if (storageIO.isAuxObjectCached(auxExtension)) {
storageIO.deleteAuxObject(auxExtension);
}
+ // We've just deleted this file from storage; update the StorageUse
+ // record if needed.
+ if (auxFileSize != null
+ && auxFileSize > 0
+ && dataFile.getOwner() != null) {
+ storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), (0L - auxFileSize));
+ }
+
}
public List findAuxiliaryFiles(DataFile dataFile) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
index 407282a5372..3d8086b142b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
@@ -640,7 +640,7 @@ public String getFriendlySize() {
return BundleUtil.getStringFromBundle("file.sizeNotAvailable");
}
}
-
+
public boolean isRestricted() {
return restricted;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
index 332a39912d2..c9d50bbed9d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
@@ -8,6 +8,9 @@
import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
import edu.harvard.iq.dataverse.search.SolrSearchResult;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -41,8 +44,6 @@
*
* @author Leonid Andreev
*
- * Basic skeleton of the new DataFile service for DVN 4.0
- *
*/
@Stateless
@@ -66,6 +67,9 @@ public class DataFileServiceBean implements java.io.Serializable {
@EJB SystemConfig systemConfig;
+ @EJB
+ StorageUseServiceBean storageUseService;
+
@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;
@@ -139,39 +143,6 @@ public class DataFileServiceBean implements java.io.Serializable {
*/
public static final String MIME_TYPE_PACKAGE_FILE = "application/vnd.dataverse.file-package";
- public class UserStorageQuota {
- private Long totalAllocatedInBytes = 0L;
- private Long totalUsageInBytes = 0L;
-
- public UserStorageQuota(Long allocated, Long used) {
- this.totalAllocatedInBytes = allocated;
- this.totalUsageInBytes = used;
- }
-
- public Long getTotalAllocatedInBytes() {
- return totalAllocatedInBytes;
- }
-
- public void setTotalAllocatedInBytes(Long totalAllocatedInBytes) {
- this.totalAllocatedInBytes = totalAllocatedInBytes;
- }
-
- public Long getTotalUsageInBytes() {
- return totalUsageInBytes;
- }
-
- public void setTotalUsageInBytes(Long totalUsageInBytes) {
- this.totalUsageInBytes = totalUsageInBytes;
- }
-
- public Long getRemainingQuotaInBytes() {
- if (totalUsageInBytes > totalAllocatedInBytes) {
- return 0L;
- }
- return totalAllocatedInBytes - totalUsageInBytes;
- }
- }
-
public DataFile find(Object pk) {
return em.find(DataFile.class, pk);
}
@@ -965,7 +936,7 @@ public boolean isThumbnailAvailable (DataFile file) {
}
// If thumbnails are not even supported for this class of files,
- // there's notthing to talk about:
+ // there's nothing to talk about:
if (!FileUtil.isThumbnailSupported(file)) {
return false;
}
@@ -980,16 +951,16 @@ public boolean isThumbnailAvailable (DataFile file) {
is more important...
*/
-
- if (ImageThumbConverter.isThumbnailAvailable(file)) {
- file = this.find(file.getId());
- file.setPreviewImageAvailable(true);
- this.save(file);
- return true;
- }
-
- return false;
+ file = this.find(file.getId());
+ if (ImageThumbConverter.isThumbnailAvailable(file)) {
+ file.setPreviewImageAvailable(true);
+ this.save(file);
+ return true;
+ }
+ file.setPreviewImageFail(true);
+ this.save(file);
+ return false;
}
@@ -1396,28 +1367,38 @@ public Embargo findEmbargo(Long id) {
return d.getEmbargo();
}
- public Long getStorageUsageByCreator(AuthenticatedUser user) {
- Query query = em.createQuery("SELECT SUM(o.filesize) FROM DataFile o WHERE o.creator.id=:creatorId");
-
- try {
- Long totalSize = (Long)query.setParameter("creatorId", user.getId()).getSingleResult();
- logger.info("total size for user: "+totalSize);
- return totalSize == null ? 0L : totalSize;
- } catch (NoResultException nre) { // ?
- logger.info("NoResultException, returning 0L");
- return 0L;
+ /**
+ * Checks if the supplied DvObjectContainer (Dataset or Collection; although
+ * only collection-level storage quotas are officially supported as of now)
+ * has a quota configured, and if not, keeps checking if any of the direct
+ * ancestor Collections further up have a configured quota. If it finds one,
+ * it will retrieve the current total content size for that specific ancestor
+ * dvObjectContainer and use it to define the quota limit for the upload
+ * session in progress.
+ *
+ * @param parent - DvObjectContainer, Dataset or Collection
+ * @return upload session size limit spec, or null if quota not defined on
+ * any of the ancestor DvObjectContainers
+ */
+ public UploadSessionQuotaLimit getUploadSessionQuotaLimit(DvObjectContainer parent) {
+ DvObjectContainer testDvContainer = parent;
+ StorageQuota quota = testDvContainer.getStorageQuota();
+ while (quota == null && testDvContainer.getOwner() != null) {
+ testDvContainer = testDvContainer.getOwner();
+ quota = testDvContainer.getStorageQuota();
+ if (quota != null) {
+ break;
+ }
+ }
+ if (quota == null || quota.getAllocation() == null) {
+ return null;
}
- }
-
- public UserStorageQuota getUserStorageQuota(AuthenticatedUser user, Dataset dataset) {
- // this is for testing only - one pre-set, installation-wide quota limit
- // for everybody:
- Long totalAllocated = systemConfig.getTestStorageQuotaLimit();
- // again, this is for testing only - we are only counting the total size
- // of all the files created by this user; it will likely be a much more
- // complex calculation in real life applications:
- Long totalUsed = getStorageUsageByCreator(user);
- return new UserStorageQuota(totalAllocated, totalUsed);
+ // Note that we are checking the recorded storage use not on the
+ // immediate parent necessarily, but on the specific ancestor
+ // DvObjectContainer on which the storage quota is defined:
+ Long currentSize = storageUseService.findStorageSizeByDvContainerId(testDvContainer.getId());
+
+ return new UploadSessionQuotaLimit(quota.getAllocation(), currentSize);
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataTable.java b/src/main/java/edu/harvard/iq/dataverse/DataTable.java
index a17d8c65138..95f3aed0f40 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataTable.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataTable.java
@@ -112,6 +112,16 @@ public DataTable() {
@Column( nullable = true )
private String originalFileName;
+
+ /**
+ * The physical tab-delimited file is in storage with the list of variable
+ * names saved as the 1st line. This means that we do not need to generate
+ * this line on the fly. (Also means that direct download mechanism can be
+ * used for this file!)
+ */
+ @Column(nullable = false)
+ private boolean storedWithVariableHeader = false;
+
/*
* Getter and Setter methods:
*/
@@ -206,6 +216,14 @@ public void setOriginalFileName(String originalFileName) {
this.originalFileName = originalFileName;
}
+ public boolean isStoredWithVariableHeader() {
+ return storedWithVariableHeader;
+ }
+
+ public void setStoredWithVariableHeader(boolean storedWithVariableHeader) {
+ this.storedWithVariableHeader = storedWithVariableHeader;
+ }
+
/*
* Custom overrides for hashCode(), equals() and toString() methods:
*/
diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
index 245bdf0efd2..a2f560bc959 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java
@@ -35,6 +35,7 @@
import jakarta.persistence.TemporalType;
import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.storageuse.StorageUse;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -189,6 +190,10 @@ public void setTemplate(Template template) {
}
public Dataset() {
+ this(false);
+ }
+
+ public Dataset(boolean isHarvested) {
DatasetVersion datasetVersion = new DatasetVersion();
datasetVersion.setDataset(this);
datasetVersion.setVersionState(DatasetVersion.VersionState.DRAFT);
@@ -196,6 +201,11 @@ public Dataset() {
datasetVersion.setVersionNumber((long) 1);
datasetVersion.setMinorVersionNumber((long) 0);
versions.add(datasetVersion);
+
+ if (!isHarvested) {
+ StorageUse storageUse = new StorageUse(this);
+ this.setStorageUse(storageUse);
+ }
}
/**
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
index b6c21014f04..610bb70ff49 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java
@@ -8,9 +8,7 @@
import edu.harvard.iq.dataverse.DatasetFieldType.FieldType;
import java.text.ParseException;
import java.text.SimpleDateFormat;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.GregorianCalendar;
+import java.util.*;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import jakarta.validation.ConstraintValidator;
@@ -34,7 +32,6 @@ public void initialize(ValidateDatasetFieldType constraintAnnotation) {
}
public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext context) {
-
context.disableDefaultConstraintViolation(); // we do this so we can have different messages depending on the different issue
boolean lengthOnly = false;
@@ -55,6 +52,38 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte
return true;
}
+ // verify no junk in individual fields and values are within range
+ if (dsfType.getName() != null && (dsfType.getName().equals(DatasetFieldConstant.northLatitude) || dsfType.getName().equals(DatasetFieldConstant.southLatitude) ||
+ dsfType.getName().equals(DatasetFieldConstant.westLongitude) || dsfType.getName().equals(DatasetFieldConstant.eastLongitude))) {
+ try {
+ verifyBoundingBoxCoordinatesWithinRange(dsfType.getName(), value.getValue());
+ } catch (IllegalArgumentException iae) {
+ try {
+ context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEntry")).addConstraintViolation();
+ } catch (NullPointerException e) {
+ }
+ return false;
+ }
+ }
+
+ // validate fields that are siblings and depend on each others values
+ if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null &&
+ value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().getValidationMessage() == null) {
+ Optional failureMessage = validateChildConstraints(value.getDatasetField());
+ if (failureMessage.isPresent()) {
+ try {
+ context.buildConstraintViolationWithTemplate(dsfType.getParentDatasetFieldType().getDisplayName() + " " +
+ BundleUtil.getStringFromBundle(failureMessage.get()) ).addConstraintViolation();
+
+ // save the failure message in the parent so we don't keep validating the children
+ value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().setValidationMessage(failureMessage.get());
+
+ } catch (NullPointerException npe) {
+ }
+ return false;
+ }
+ }
+
if (fieldType.equals(FieldType.TEXT) && !lengthOnly && value.getDatasetField().getDatasetFieldType().getValidationFormat() != null) {
boolean valid = value.getValue().matches(value.getDatasetField().getDatasetFieldType().getValidationFormat());
if (!valid) {
@@ -216,4 +245,60 @@ public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) {
return pattern.matcher(userInput).matches();
}
+ // Validate child fields against each other and return failure message or Optional.empty() if success
+ public Optional validateChildConstraints(DatasetField dsf) {
+ final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : "";
+ Optional returnFailureMessage = Optional.empty();
+
+ // Validate Child Constraint for Geospatial Bounding Box
+ // validate the four points of the box to insure proper layout
+ if (fieldName.equals(DatasetFieldConstant.northLatitude) || fieldName.equals(DatasetFieldConstant.westLongitude)
+ || fieldName.equals(DatasetFieldConstant.eastLongitude) || fieldName.equals(DatasetFieldConstant.southLatitude)) {
+ final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates";
+
+ try {
+ final Map coords = new HashMap<>();
+ dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields().forEach(f -> {
+ coords.put(f.getDatasetFieldType().getName(), f.getValue());
+ });
+ if (!validateBoundingBox(coords.get(DatasetFieldConstant.westLongitude),
+ coords.get(DatasetFieldConstant.eastLongitude),
+ coords.get(DatasetFieldConstant.northLatitude),
+ coords.get(DatasetFieldConstant.southLatitude))) {
+ returnFailureMessage = Optional.of(failureMessage);
+ }
+ } catch (IllegalArgumentException e) { // IllegalArgumentException NumberFormatException
+ returnFailureMessage = Optional.of(failureMessage);
+ }
+ }
+
+ return returnFailureMessage;
+ }
+
+ public static boolean validateBoundingBox(final String westLon, final String eastLon, final String northLat, final String southLat) {
+ boolean returnVal = false;
+
+ try {
+ Float west = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.westLongitude, westLon);
+ Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon);
+ Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat);
+ Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat);
+ returnVal = west <= east && south <= north;
+ } catch (IllegalArgumentException e) {
+ returnVal = false;
+ }
+
+ return returnVal;
+ }
+
+ private static Float verifyBoundingBoxCoordinatesWithinRange(final String name, final String value) throws IllegalArgumentException {
+ int max = name.equals(DatasetFieldConstant.westLongitude) || name.equals(DatasetFieldConstant.eastLongitude) ? 180 : 90;
+ int min = max * -1;
+
+ final Float returnVal = value != null ? Float.parseFloat(value) : Float.NaN;
+ if (returnVal.isNaN() || returnVal < min || returnVal > max) {
+ throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal.isNaN() ? "missing" : returnVal, min, max));
+ }
+ return returnVal;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
index fc18257196d..b79f387f20b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -11,6 +11,9 @@
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.branding.BrandingUtil;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO;
+import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
import edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO;
import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil;
@@ -361,6 +364,8 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) {
* other boolean.
*/
private boolean versionHasTabular = false;
+ private boolean versionHasGlobus = false;
+ private boolean globusTransferRequested = false;
private boolean showIngestSuccess;
@@ -506,7 +511,7 @@ public String getThumbnailString() {
thumbnailString = datasetThumbnail.getBase64image();
} else {
- thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(dataset,
+ thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsUrl(dataset,
workingVersion.getId(),
!workingVersion.isDraft(),
ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE);
@@ -754,17 +759,29 @@ public boolean isIndexedVersion() {
if (isIndexedVersion != null) {
return isIndexedVersion;
}
+
+ // Just like on the collection page, facets on the Dataset page can be
+ // disabled instance-wide by an admin:
+ if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) {
+ return isIndexedVersion = false;
+ }
+
// The version is SUPPOSED to be indexed if it's the latest published version, or a
- // draft. So if none of the above is true, we return false right away:
-
+ // draft. So if none of the above is true, we can return false right away.
if (!(workingVersion.isDraft() || isThisLatestReleasedVersion())) {
return isIndexedVersion = false;
}
-
- // ... but if it is the latest published version or a draft, we want to test
- // and confirm that this version *has* actually been indexed and is searchable
- // (and that solr is actually up and running!), by running a quick solr search:
- return isIndexedVersion = isThisVersionSearchable();
+ // If this is the latest published version, we want to confirm that this
+ // version was successfully indexed after the last publication
+
+ if (isThisLatestReleasedVersion()) {
+ return isIndexedVersion = (workingVersion.getDataset().getIndexTime() != null)
+ && workingVersion.getDataset().getIndexTime().after(workingVersion.getReleaseTime());
+ }
+
+ // Drafts don't have the indextime stamps set/incremented when indexed,
+ // so we'll just assume it is indexed, and will then hope for the best.
+ return isIndexedVersion = true;
}
/**
@@ -820,8 +837,18 @@ public List getFileTagsFacetLabels() {
/**
* Verifies that solr is running and that the version is indexed and searchable
* @return boolean
- */
+ * Commenting out this method for now, since we have decided it was not
+ * necessary, to query solr just to figure out if we can query solr. We will
+ * rely solely on the latest-relesed status and the indexed timestamp from
+ * the database for that. - L.A.
+ *
public boolean isThisVersionSearchable() {
+ // Just like on the collection page, facets on the Dataset page can be
+ // disabled instance-wide by an admin:
+ if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) {
+ return false;
+ }
+
SolrQuery solrQuery = new SolrQuery();
solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, workingVersion.getDataset().getId().toString()));
@@ -856,6 +883,7 @@ public boolean isThisVersionSearchable() {
return false;
}
+ */
/**
* Finds the list of numeric datafile ids in the Version specified, by running
@@ -967,10 +995,19 @@ public Set getFileIdsInVersionFromSolr(Long datasetVersionId, String patte
logger.fine("Remote Solr Exception: " + ex.getLocalizedMessage());
String msg = ex.getLocalizedMessage();
if (msg.contains(SearchFields.FILE_DELETED)) {
+ // This is a backward compatibility hook put in place many versions
+ // ago, to accommodate instances running Solr with schemas that
+ // don't include this flag yet. Running Solr with an up-to-date
+ // schema has been a hard requirement for a while now; should we
+ // remove it at this point? - L.A.
fileDeletedFlagNotIndexed = true;
+ } else {
+ isIndexedVersion = false;
+ return resultIds;
}
} catch (Exception ex) {
logger.warning("Solr exception: " + ex.getLocalizedMessage());
+ isIndexedVersion = false;
return resultIds;
}
@@ -983,6 +1020,7 @@ public Set getFileIdsInVersionFromSolr(Long datasetVersionId, String patte
queryResponse = solrClientService.getSolrClient().query(solrQuery);
} catch (Exception ex) {
logger.warning("Caught a Solr exception (again!): " + ex.getLocalizedMessage());
+ isIndexedVersion = false;
return resultIds;
}
}
@@ -2150,10 +2188,19 @@ private String init(boolean initFull) {
// the total "originals" size of the dataset with direct custom queries;
// then we'll be able to drop the lookup hint for DataTable from the
// findDeep() method for the version and further speed up the lookup
- // a little bit.
+ // a little bit.
+ boolean globusDownloadEnabled = systemConfig.isGlobusDownload();
for (FileMetadata fmd : workingVersion.getFileMetadatas()) {
- if (fmd.getDataFile().isTabularData()) {
+ DataFile df = fmd.getDataFile();
+ if (df.isTabularData()) {
versionHasTabular = true;
+ }
+ if(globusDownloadEnabled) {
+ if(GlobusAccessibleStore.isGlobusAccessible(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) {
+ versionHasGlobus= true;
+ }
+ }
+ if(versionHasTabular &&(!globusDownloadEnabled || versionHasGlobus)) {
break;
}
}
@@ -2450,6 +2497,10 @@ private DefaultTreeNode createFileTreeNode(FileMetadata fileMetadata, TreeNode p
public boolean isVersionHasTabular() {
return versionHasTabular;
}
+
+ public boolean isVersionHasGlobus() {
+ return versionHasGlobus;
+ }
public boolean isReadOnly() {
return readOnly;
@@ -3056,6 +3107,26 @@ public void setSelectedNonDownloadableFiles(List selectedNonDownlo
this.selectedNonDownloadableFiles = selectedNonDownloadableFiles;
}
+ private List selectedGlobusTransferableFiles;
+
+ public List getSelectedGlobusTransferableFiles() {
+ return selectedGlobusTransferableFiles;
+ }
+
+ public void setSelectedGlobusTransferableFiles(List selectedGlobusTransferableFiles) {
+ this.selectedGlobusTransferableFiles = selectedGlobusTransferableFiles;
+ }
+
+ private List selectedNonGlobusTransferableFiles;
+
+ public List getSelectedNonGlobusTransferableFiles() {
+ return selectedNonGlobusTransferableFiles;
+ }
+
+ public void setSelectedNonGlobusTransferableFiles(List selectedNonGlobusTransferableFiles) {
+ this.selectedNonGlobusTransferableFiles = selectedNonGlobusTransferableFiles;
+ }
+
public String getSizeOfDataset() {
return DatasetUtil.getDownloadSize(workingVersion, false);
}
@@ -3167,7 +3238,7 @@ private void startDownload(boolean downloadOriginal){
boolean guestbookRequired = isDownloadPopupRequired();
boolean validate = validateFilesForDownload(downloadOriginal);
if (validate) {
- updateGuestbookResponse(guestbookRequired, downloadOriginal);
+ updateGuestbookResponse(guestbookRequired, downloadOriginal, false);
if(!guestbookRequired && !getValidateFilesOutcome().equals("Mixed")){
startMultipleFileDownload();
}
@@ -3214,8 +3285,8 @@ public boolean validateFilesForDownload(boolean downloadOriginal){
}
}
- //if there are two or more files with a total size
- //over the zip limit post a "too large" popup
+ //if there are two or more files, with a total size
+ //over the zip limit, post a "too large" popup
if (bytes > settingsWrapper.getZipDownloadLimit() && selectedDownloadableFiles.size() > 1) {
setValidateFilesOutcome("FailSize");
return false;
@@ -3224,16 +3295,18 @@ public boolean validateFilesForDownload(boolean downloadOriginal){
// If some of the files were restricted and we had to drop them off the
// list, and NONE of the files are left on the downloadable list
// - we show them a "you're out of luck" popup:
- if (getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) {
+ if (getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) {
setValidateFilesOutcome("FailRestricted");
return false;
}
- if (!getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) {
+ //Some are selected and there are non-downloadable ones or there are both downloadable and globus transferable files
+ if ((!(getSelectedDownloadableFiles().isEmpty() && getSelectedGlobusTransferableFiles().isEmpty())
+ && (!getSelectedNonDownloadableFiles().isEmpty()) || (!getSelectedDownloadableFiles().isEmpty() && !getSelectedGlobusTransferableFiles().isEmpty()))) {
setValidateFilesOutcome("Mixed");
return true;
}
-
+ //ToDo - should Mixed not trigger this?
if (isTermsPopupRequired() || isGuestbookPopupRequiredAtDownload()) {
setValidateFilesOutcome("GuestbookRequired");
}
@@ -3241,15 +3314,23 @@ public boolean validateFilesForDownload(boolean downloadOriginal){
}
- private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal) {
+ private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal, boolean isGlobusTransfer) {
// Note that the GuestbookResponse object may still have information from
// the last download action performed by the user. For example, it may
// still have the non-null Datafile in it, if the user has just downloaded
// a single file; or it may still have the format set to "original" -
// even if that's not what they are trying to do now.
// So make sure to reset these values:
- guestbookResponse.setDataFile(null);
- guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString());
+ if(fileMetadataForAction == null) {
+ guestbookResponse.setDataFile(null);
+ } else {
+ guestbookResponse.setDataFile(fileMetadataForAction.getDataFile());
+ }
+ if(isGlobusTransfer) {
+ guestbookResponse.setSelectedFileIds(getFilesIdsString(getSelectedGlobusTransferableFiles()));
+ } else {
+ guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString());
+ }
if (downloadOriginal) {
guestbookResponse.setFileFormat("original");
} else {
@@ -3269,15 +3350,32 @@ private boolean filterSelectedFiles(){
setSelectedNonDownloadableFiles(new ArrayList<>());
setSelectedRestrictedFiles(new ArrayList<>());
setSelectedUnrestrictedFiles(new ArrayList<>());
+ setSelectedGlobusTransferableFiles(new ArrayList<>());
+ setSelectedNonGlobusTransferableFiles(new ArrayList<>());
boolean someFiles = false;
+ boolean globusDownloadEnabled = settingsWrapper.isGlobusDownload();
for (FileMetadata fmd : this.selectedFiles){
- if(this.fileDownloadHelper.canDownloadFile(fmd)){
+ boolean downloadable=this.fileDownloadHelper.canDownloadFile(fmd);
+
+ boolean globusTransferable = false;
+ if(globusDownloadEnabled) {
+ String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier());
+ globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId);
+ downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId);
+ }
+ if(downloadable){
getSelectedDownloadableFiles().add(fmd);
someFiles=true;
} else {
getSelectedNonDownloadableFiles().add(fmd);
}
+ if(globusTransferable) {
+ getSelectedGlobusTransferableFiles().add(fmd);
+ someFiles=true;
+ } else {
+ getSelectedNonGlobusTransferableFiles().add(fmd);
+ }
if(fmd.isRestricted()){
getSelectedRestrictedFiles().add(fmd); //might be downloadable to user or not
someFiles=true;
@@ -5215,35 +5313,6 @@ public boolean isFileAccessRequestMultiButtonEnabled(){
return false;
}
- private Boolean downloadButtonAllEnabled = null;
-
- public boolean isDownloadAllButtonEnabled() {
-
- if (downloadButtonAllEnabled == null) {
- for (FileMetadata fmd : workingVersion.getFileMetadatas()) {
- if (!this.fileDownloadHelper.canDownloadFile(fmd)) {
- downloadButtonAllEnabled = false;
- break;
- }
- }
- downloadButtonAllEnabled = true;
- }
- return downloadButtonAllEnabled;
- }
-
- public boolean isDownloadSelectedButtonEnabled(){
-
- if( this.selectedFiles == null || this.selectedFiles.isEmpty() ){
- return false;
- }
- for (FileMetadata fmd : this.selectedFiles){
- if (this.fileDownloadHelper.canDownloadFile(fmd)){
- return true;
- }
- }
- return false;
- }
-
public boolean isFileAccessRequestMultiSignUpButtonRequired(){
if (isSessionUserAuthenticated()){
return false;
@@ -5910,14 +5979,7 @@ public void setFolderPresort(boolean folderPresort) {
public void explore(ExternalTool externalTool) {
ApiToken apiToken = null;
User user = session.getUser();
- if (user instanceof AuthenticatedUser) {
- apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
- } else if (user instanceof PrivateUrlUser) {
- PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
- PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
- apiToken = new ApiToken();
- apiToken.setTokenString(privUrl.getToken());
- }
+ apiToken = authService.getValidApiTokenForUser(user);
ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataset, apiToken, session.getLocaleCode());
PrimeFaces.current().executeScript(externalToolHandler.getExploreScript());
}
@@ -5925,8 +5987,9 @@ public void explore(ExternalTool externalTool) {
public void configure(ExternalTool externalTool) {
ApiToken apiToken = null;
User user = session.getUser();
+ //Not enabled for PrivateUrlUsers (who wouldn't have write permissions anyway)
if (user instanceof AuthenticatedUser) {
- apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
+ apiToken = authService.getValidApiTokenForAuthenticatedUser((AuthenticatedUser) user);
}
ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataset, apiToken, session.getLocaleCode());
PrimeFaces.current().executeScript(externalToolHandler.getConfigureScript());
@@ -6250,18 +6313,49 @@ public boolean isHasPublicStore() {
return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId()));
}
- public void startGlobusTransfer() {
- ApiToken apiToken = null;
- User user = session.getUser();
- if (user instanceof AuthenticatedUser) {
- apiToken = authService.findApiTokenByUser((AuthenticatedUser) user);
- } else if (user instanceof PrivateUrlUser) {
- PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
- PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
- apiToken = new ApiToken();
- apiToken.setTokenString(privUrl.getToken());
- }
- PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken));
+ public boolean isGlobusTransferRequested() {
+ return globusTransferRequested;
+ }
+
+ /**
+ * Analagous with the startDownload method, this method is called when the user
+ * tries to start a Globus transfer out (~download). The
+ * validateFilesForDownload call checks to see if there are some files that can
+ * be Globus transfered and, if so and there are no files that can't be
+ * transferre, this method will launch the globus transfer app. If there is a
+ * mix of files or if the guestbook popup is required, the method passes back to
+ * the UI so those popup(s) can be shown. Once they are, this method is called
+ * with the popupShown param true and the app will be shown.
+ *
+ * @param transferAll - when called from the dataset Access menu, this should be
+ * true so that all files are included in the processing.
+ * When it is called from the file table, the current
+ * selection is used and the param should be false.
+ * @param popupShown - This method is called twice if the the mixed files or
+ * guestbook popups are needed. On the first call, popupShown
+ * is false so that the transfer is not started and those
+ * popups can be shown. On the second call, popupShown is
+ * true and processing will occur as long as there are some
+ * valid files to transfer.
+ */
+ public void startGlobusTransfer(boolean transferAll, boolean popupShown) {
+ if (transferAll) {
+ this.setSelectedFiles(workingVersion.getFileMetadatas());
+ }
+ boolean guestbookRequired = isDownloadPopupRequired();
+
+ boolean validated = validateFilesForDownload(true);
+ if (validated) {
+ globusTransferRequested = true;
+ boolean mixed = "Mixed".equals(getValidateFilesOutcome());
+ // transfer is
+ updateGuestbookResponse(guestbookRequired, true, true);
+ if ((!guestbookRequired && !mixed) || popupShown) {
+ boolean doNotSaveGuestbookResponse = workingVersion.isDraft();
+ globusService.writeGuestbookAndStartTransfer(guestbookResponse, doNotSaveGuestbookResponse);
+ globusTransferRequested = false;
+ }
+ }
}
public String getWebloaderUrlForDataset(Dataset d) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
index 78fd896c897..99c3c65e3b8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java
@@ -260,22 +260,27 @@ private Predicate createSearchCriteriaPredicate(DatasetVersion datasetVersion,
return criteriaBuilder.and(predicates.toArray(new Predicate[]{}));
}
- private Order createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder,
- FileOrderCriteria orderCriteria,
- Root fileMetadataRoot) {
+ private List createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder,
+ FileOrderCriteria orderCriteria,
+ Root fileMetadataRoot) {
Path label = fileMetadataRoot.get("label");
Path dataFile = fileMetadataRoot.get("dataFile");
Path publicationDate = dataFile.get("publicationDate");
Path createDate = dataFile.get("createDate");
Expression orderByLifetimeExpression = criteriaBuilder.selectCase().when(publicationDate.isNotNull(), publicationDate).otherwise(createDate);
- return switch (orderCriteria) {
- case NameZA -> criteriaBuilder.desc(label);
- case Newest -> criteriaBuilder.desc(orderByLifetimeExpression);
- case Oldest -> criteriaBuilder.asc(orderByLifetimeExpression);
- case Size -> criteriaBuilder.asc(dataFile.get("filesize"));
- case Type -> criteriaBuilder.asc(dataFile.get("contentType"));
- default -> criteriaBuilder.asc(label);
- };
+ List orderList = new ArrayList<>();
+ switch (orderCriteria) {
+ case NameZA -> orderList.add(criteriaBuilder.desc(label));
+ case Newest -> orderList.add(criteriaBuilder.desc(orderByLifetimeExpression));
+ case Oldest -> orderList.add(criteriaBuilder.asc(orderByLifetimeExpression));
+ case Size -> orderList.add(criteriaBuilder.asc(dataFile.get("filesize")));
+ case Type -> {
+ orderList.add(criteriaBuilder.asc(dataFile.get("contentType")));
+ orderList.add(criteriaBuilder.asc(label));
+ }
+ default -> orderList.add(criteriaBuilder.asc(label));
+ }
+ return orderList;
}
private long getOriginalTabularFilesSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
index cd3291e6222..1ee517c9831 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java
@@ -825,7 +825,7 @@ public Long getThumbnailByVersionId(Long versionId) {
+ "AND df.id = o.id "
+ "AND fm.datasetversion_id = dv.id "
+ "AND fm.datafile_id = df.id "
- // + "AND o.previewImageAvailable = false "
+ + "AND o.previewimagefail = false "
+ "AND df.restricted = false "
+ "AND df.embargo_id is null "
+ "AND df.contenttype LIKE 'image/%' "
@@ -859,7 +859,7 @@ public Long getThumbnailByVersionId(Long versionId) {
+ "AND df.id = o.id "
+ "AND fm.datasetversion_id = dv.id "
+ "AND fm.datafile_id = df.id "
- // + "AND o.previewImageAvailable = false "
+ + "AND o.previewimagefail = false "
+ "AND df.restricted = false "
+ "AND df.embargo_id is null "
+ "AND df.contenttype = 'application/pdf' "
diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java
index 682c1dc6744..c1de9d63410 100644
--- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java
@@ -3,6 +3,7 @@
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.authorization.DataverseRole;
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch;
+import edu.harvard.iq.dataverse.storageuse.StorageUse;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -103,7 +104,11 @@ public enum DataverseType {
* dataverses.
*/
protected boolean permissionRoot;
-
+
+ public Dataverse() {
+ StorageUse storageUse = new StorageUse(this);
+ this.setStorageUse(storageUse);
+ }
public DataverseType getDataverseType() {
return dataverseType;
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java
index c4749be0cb3..a3425987bf8 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java
@@ -30,8 +30,9 @@
@NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdDatasetFieldTypeId",
query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id = :datasetFieldTypeId"),
@NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdAndDatasetFieldTypeIdList",
- query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList")
-
+ query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList"),
+ @NamedQuery(name = "DataverseFieldTypeInputLevel.findRequiredByDataverseId",
+ query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.required = 'true' ")
})
@Table(name="DataverseFieldTypeInputLevel"
, uniqueConstraints={
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java
index 66c700f59ce..1bd290ecc4d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java
@@ -88,6 +88,16 @@ public DataverseFieldTypeInputLevel findByDataverseIdDatasetFieldTypeId(Long dat
return null;
}
}
+
+ public List findRequiredByDataverseId(Long dataverseId) {
+ Query query = em.createNamedQuery("DataverseFieldTypeInputLevel.findRequiredByDataverseId", DataverseFieldTypeInputLevel.class);
+ query.setParameter("dataverseId", dataverseId);
+ try{
+ return query.getResultList();
+ } catch ( NoResultException nre ) {
+ return null;
+ }
+ }
public void delete(DataverseFieldTypeInputLevel dataverseFieldTypeInputLevel) {
em.remove(em.merge(dataverseFieldTypeInputLevel));
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
index 943a74327d5..3dbc22902b0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java
@@ -362,7 +362,7 @@ public void initFeaturedDataverses() {
List featuredSource = new ArrayList<>();
List featuredTarget = new ArrayList<>();
featuredSource.addAll(dataverseService.findAllPublishedByOwnerId(dataverse.getId()));
- featuredSource.addAll(linkingService.findLinkingDataverses(dataverse.getId()));
+ featuredSource.addAll(linkingService.findLinkedDataverses(dataverse.getId()));
List featuredList = featuredDataverseService.findByDataverseId(dataverse.getId());
for (DataverseFeaturedDataverse dfd : featuredList) {
Dataverse fd = dfd.getFeaturedDataverse();
diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
index 549b8310122..10b5d800c21 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
@@ -18,8 +18,11 @@
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
import edu.harvard.iq.dataverse.search.SolrSearchResult;
+import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
@@ -42,7 +45,15 @@
import jakarta.persistence.NonUniqueResultException;
import jakarta.persistence.PersistenceContext;
import jakarta.persistence.TypedQuery;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrServerException;
+import org.everit.json.schema.Schema;
+import org.everit.json.schema.ValidationException;
+import org.everit.json.schema.loader.SchemaLoader;
+import org.json.JSONObject;
+import org.json.JSONTokener;
/**
*
@@ -80,6 +91,9 @@ public class DataverseServiceBean implements java.io.Serializable {
@EJB
PermissionServiceBean permissionService;
+ @EJB
+ DataverseFieldTypeInputLevelServiceBean dataverseFieldTypeInputLevelService;
+
@EJB
SystemConfig systemConfig;
@@ -346,51 +360,6 @@ public String getDataverseLogoThumbnailAsBase64ById(Long dvId) {
}
return null;
}
-
- /*
- public boolean isDataverseLogoThumbnailAvailable(Dataverse dataverse, User user) {
- if (dataverse == null) {
- return false;
- }
-
- // First, check if the dataverse has a defined logo:
-
- //if (dataverse.getDataverseTheme() != null && dataverse.getDataverseTheme().getLogo() != null && !dataverse.getDataverseTheme().getLogo().equals("")) {
- File dataverseLogoFile = getLogo(dataverse);
- if (dataverseLogoFile != null) {
- String logoThumbNailPath = null;
-
- if (dataverseLogoFile.exists()) {
- logoThumbNailPath = ImageThumbConverter.generateImageThumbnailFromFile(dataverseLogoFile.getAbsolutePath(), 48);
- if (logoThumbNailPath != null) {
- return true;
- }
- }
- }
- //}
- */
- // If there's no uploaded logo for this dataverse, go through its
- // [released] datasets and see if any of them have card images:
- //
- // TODO:
- // Discuss/Decide if we really want to do this - i.e., go through every
- // file in every dataset below...
- // -- L.A. 4.0 beta14
- /*
- for (Dataset dataset : datasetService.findPublishedByOwnerId(dataverse.getId())) {
- if (dataset != null) {
- DatasetVersion releasedVersion = dataset.getReleasedVersion();
-
- if (releasedVersion != null) {
- if (datasetService.isDatasetCardImageAvailable(releasedVersion, user)) {
- return true;
- }
- }
- }
- } */
- /*
- return false;
- } */
private File getLogo(Dataverse dataverse) {
if (dataverse.getId() == null) {
@@ -919,5 +888,294 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) {
return em.createNativeQuery(cqString).getResultList();
}
+
+ public String getCollectionDatasetSchema(String dataverseAlias) {
+
+ Dataverse testDV = this.findByAlias(dataverseAlias);
+
+ while (!testDV.isMetadataBlockRoot()) {
+ if (testDV.getOwner() == null) {
+ break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value
+ }
+ testDV = testDV.getOwner();
+ }
+
+ /* Couldn't get the 'return base if no extra required fields to work with the path provided
+ leaving it as 'out of scope' for now SEK 11/27/2023
+
+ List required = new ArrayList<>();
+
+ required = dataverseFieldTypeInputLevelService.findRequiredByDataverseId(testDV.getId());
+
+ if (required == null || required.isEmpty()){
+ String pathToJsonFile = "src/main/resources/edu/harvas/iq/dataverse/baseDatasetSchema.json";
+ String baseSchema = getBaseSchemaStringFromFile(pathToJsonFile);
+ if (baseSchema != null && !baseSchema.isEmpty()){
+ return baseSchema;
+ }
+ }
+
+ */
+ List selectedBlocks = new ArrayList<>();
+ List requiredDSFT = new ArrayList<>();
+
+ selectedBlocks.addAll(testDV.getMetadataBlocks());
+
+ for (MetadataBlock mdb : selectedBlocks) {
+ for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) {
+ if (!dsft.isChild()) {
+ DataverseFieldTypeInputLevel dsfIl = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), dsft.getId());
+ if (dsfIl != null) {
+ dsft.setRequiredDV(dsfIl.isRequired());
+ dsft.setInclude(dsfIl.isInclude());
+ } else {
+ dsft.setRequiredDV(dsft.isRequired());
+ dsft.setInclude(true);
+ }
+ if (dsft.isHasChildren()) {
+ for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) {
+ DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId());
+ if (dsfIlChild != null) {
+ child.setRequiredDV(dsfIlChild.isRequired());
+ child.setInclude(dsfIlChild.isInclude());
+ } else {
+ // in the case of conditionally required (child = true, parent = false)
+ // we set this to false; i.e this is the default "don't override" value
+ child.setRequiredDV(child.isRequired() && dsft.isRequired());
+ child.setInclude(true);
+ }
+ }
+ }
+ if(dsft.isRequiredDV()){
+ requiredDSFT.add(dsft);
+ }
+ }
+ }
+
+ }
+
+ String reqMDBNames = "";
+ List hasReqFields = new ArrayList<>();
+ String retval = datasetSchemaPreface;
+ for (MetadataBlock mdb : selectedBlocks) {
+ for (DatasetFieldType dsft : requiredDSFT) {
+ if (dsft.getMetadataBlock().equals(mdb)) {
+ hasReqFields.add(mdb);
+ if (!reqMDBNames.isEmpty()) reqMDBNames += ",";
+ reqMDBNames += "\"" + mdb.getName() + "\"";
+ break;
+ }
+ }
+ }
+ int countMDB = 0;
+ for (MetadataBlock mdb : hasReqFields) {
+ if (countMDB>0){
+ retval += ",";
+ }
+ retval += getCustomMDBSchema(mdb, requiredDSFT);
+ countMDB++;
+ }
+
+ retval += "\n }";
+
+ retval += endOfjson.replace("blockNames", reqMDBNames);
+
+ return retval;
+
+ }
+
+ private String getCustomMDBSchema (MetadataBlock mdb, List requiredDSFT){
+ String retval = "";
+ boolean mdbHasReqField = false;
+ int numReq = 0;
+ List requiredThisMDB = new ArrayList<>();
+
+ for (DatasetFieldType dsft : requiredDSFT ){
+
+ if(dsft.getMetadataBlock().equals(mdb)){
+ numReq++;
+ mdbHasReqField = true;
+ requiredThisMDB.add(dsft);
+ }
+ }
+ if (mdbHasReqField){
+ retval += startOfMDB.replace("blockName", mdb.getName());
+
+ retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size()));
+ int count = 0;
+ for (DatasetFieldType dsft:requiredThisMDB ){
+ count++;
+ String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName());
+ if (count < requiredThisMDB.size()){
+ retval += reqValImp + "\n";
+ } else {
+ reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1);
+ retval += reqValImp+ "\n";
+ retval += endOfReqVal;
+ }
+ }
+
+ }
+
+ return retval;
+ }
+
+ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) {
+ JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias)));
+
+ try {
+ Schema schema = SchemaLoader.load(rawSchema);
+ schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid
+ } catch (ValidationException vx) {
+ logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage());
+ String accumulatedexceptions = "";
+ for (ValidationException va : vx.getCausingExceptions()){
+ accumulatedexceptions = accumulatedexceptions + va;
+ accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " ");
+ }
+ if (!accumulatedexceptions.isEmpty()){
+ return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions;
+ } else {
+ return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage();
+ }
+
+ } catch (Exception ex) {
+ logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage());
+ return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage();
+ }
+
+ return BundleUtil.getStringFromBundle("dataverses.api.validate.json.succeeded");
+ }
+
+ static String getBaseSchemaStringFromFile(String pathToJsonFile) {
+ File datasetSchemaJson = new File(pathToJsonFile);
+ try {
+ String datasetSchemaAsJson = new String(Files.readAllBytes(Paths.get(datasetSchemaJson.getAbsolutePath())));
+ return datasetSchemaAsJson;
+ } catch (IOException ex) {
+ logger.info("IO - failed to get schema file - will build on fly " +ex.getMessage());
+ return null;
+ } catch (Exception e){
+ logger.info("Other exception - failed to get schema file - will build on fly. " + e.getMessage());
+ return null;
+ }
+ }
+
+ private String datasetSchemaPreface =
+ "{\n" +
+ " \"$schema\": \"http://json-schema.org/draft-04/schema#\",\n" +
+ " \"$defs\": {\n" +
+ " \"field\": {\n" +
+ " \"type\": \"object\",\n" +
+ " \"required\": [\"typeClass\", \"multiple\", \"typeName\"],\n" +
+ " \"properties\": {\n" +
+ " \"value\": {\n" +
+ " \"anyOf\": [\n" +
+ " {\n" +
+ " \"type\": \"array\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"type\": \"string\"\n" +
+ " },\n" +
+ " {\n" +
+ " \"$ref\": \"#/$defs/field\"\n" +
+ " }\n" +
+ " ]\n" +
+ " },\n" +
+ " \"typeClass\": {\n" +
+ " \"type\": \"string\"\n" +
+ " },\n" +
+ " \"multiple\": {\n" +
+ " \"type\": \"boolean\"\n" +
+ " },\n" +
+ " \"typeName\": {\n" +
+ " \"type\": \"string\"\n" +
+ " }\n" +
+ " }\n" +
+ " }\n" +
+ "},\n" +
+ "\"type\": \"object\",\n" +
+ "\"properties\": {\n" +
+ " \"datasetVersion\": {\n" +
+ " \"type\": \"object\",\n" +
+ " \"properties\": {\n" +
+ " \"license\": {\n" +
+ " \"type\": \"object\",\n" +
+ " \"properties\": {\n" +
+ " \"name\": {\n" +
+ " \"type\": \"string\"\n" +
+ " },\n" +
+ " \"uri\": {\n" +
+ " \"type\": \"string\",\n" +
+ " \"format\": \"uri\"\n" +
+ " }\n" +
+ " },\n" +
+ " \"required\": [\"name\", \"uri\"]\n" +
+ " },\n" +
+ " \"metadataBlocks\": {\n" +
+ " \"type\": \"object\",\n" +
+ " \"properties\": {\n" +
+ "" ;
+ private String startOfMDB = "" +
+" \"blockName\": {\n" +
+" \"type\": \"object\",\n" +
+" \"properties\": {\n" +
+" \"fields\": {\n" +
+" \"type\": \"array\",\n" +
+" \"items\": {\n" +
+" \"$ref\": \"#/$defs/field\"\n" +
+" },";
+
+ private String reqValTemplate = " {\n" +
+" \"contains\": {\n" +
+" \"properties\": {\n" +
+" \"typeName\": {\n" +
+" \"const\": \"reqFieldTypeName\"\n" +
+" }\n" +
+" }\n" +
+" }\n" +
+" },";
+
+ private String minItemsTemplate = "\n \"minItems\": numMinItems,\n" +
+" \"allOf\": [\n";
+ private String endOfReqVal = " ]\n" +
+" }\n" +
+" },\n" +
+" \"required\": [\"fields\"]\n" +
+" }";
+
+ private String endOfjson = ",\n" +
+" \"required\": [blockNames]\n" +
+" }\n" +
+" },\n" +
+" \"required\": [\"metadataBlocks\"]\n" +
+" }\n" +
+" },\n" +
+" \"required\": [\"datasetVersion\"]\n" +
+"}\n";
+
+ public void saveStorageQuota(Dataverse target, Long allocation) {
+ StorageQuota storageQuota = target.getStorageQuota();
+
+ if (storageQuota != null) {
+ storageQuota.setAllocation(allocation);
+ em.merge(storageQuota);
+ } else {
+ storageQuota = new StorageQuota();
+ storageQuota.setDefinitionPoint(target);
+ storageQuota.setAllocation(allocation);
+ target.setStorageQuota(storageQuota);
+ em.persist(storageQuota);
+ }
+ em.flush();
+ }
+
+ public void disableStorageQuota(StorageQuota storageQuota) {
+ if (storageQuota != null && storageQuota.getAllocation() != null) {
+ storageQuota.setAllocation(null);
+ em.merge(storageQuota);
+ em.flush();
+ }
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
index 9e7f3f3fe96..cc5d7620969 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java
@@ -2,6 +2,7 @@
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
@@ -155,7 +156,7 @@ public String visit(DataFile df) {
private String identifier;
private boolean identifierRegistered;
-
+
private transient GlobalId globalId = null;
@OneToMany(mappedBy = "dvObject", cascade = CascadeType.ALL, orphanRemoval = true)
@@ -177,6 +178,9 @@ public void setAlternativePersistentIndentifiers(Set roleAssignments;
+
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
index f7d361d76f5..82057315fbb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
+++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java
@@ -2,11 +2,13 @@
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
import edu.harvard.iq.dataverse.settings.JvmSettings;
+import edu.harvard.iq.dataverse.storageuse.StorageUse;
import edu.harvard.iq.dataverse.util.SystemConfig;
-import java.util.Locale;
+import jakarta.persistence.CascadeType;
import java.util.Optional;
import jakarta.persistence.MappedSuperclass;
+import jakarta.persistence.OneToOne;
import org.apache.commons.lang3.StringUtils;
/**
@@ -40,6 +42,9 @@ public boolean isEffectivelyPermissionRoot() {
private String metadataLanguage=null;
private Boolean guestbookAtRequest = null;
+
+ @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE, CascadeType.PERSIST}, orphanRemoval=true)
+ private StorageUse storageUse;
public String getEffectiveStorageDriverId() {
String id = storageDriver;
@@ -160,5 +165,14 @@ public String getCurationLabelSetName() {
public void setCurationLabelSetName(String setName) {
this.externalLabelSetName = setName;
}
-
+
+ /**
+ * Should only be used in constructors for DvObjectContainers (Datasets and
+ * Collections), to make sure new entries are created and persisted in the
+ * database StorageUse table for every DvObject container we create.
+ * @param storageUse
+ */
+ public void setStorageUse(StorageUse storageUse) {
+ this.storageUse = storageUse;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
index a942830b19e..a6f31e24764 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java
@@ -2,7 +2,6 @@
import edu.harvard.iq.dataverse.provenance.ProvPopupFragmentBean;
import edu.harvard.iq.dataverse.DataFile.ChecksumType;
-import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
import edu.harvard.iq.dataverse.api.AbstractApiBean;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.Permission;
@@ -38,6 +37,7 @@
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.Setting;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.JsfHelper;
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -206,7 +206,7 @@ public enum Referrer {
private final int NUMBER_OF_SCROLL_ROWS = 25;
private DataFile singleFile = null;
- private UserStorageQuota userStorageQuota = null;
+ private UploadSessionQuotaLimit uploadSessionQuota = null;
public DataFile getSingleFile() {
return singleFile;
@@ -359,7 +359,7 @@ public String getHumanMaxTotalUploadSizeInBytes() {
}
public boolean isStorageQuotaEnforced() {
- return userStorageQuota != null;
+ return uploadSessionQuota != null;
}
public Long getMaxIngestSizeInBytes() {
@@ -530,8 +530,10 @@ public String initCreateMode(String modeToken, DatasetVersion version, MutableBo
this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId());
if (systemConfig.isStorageQuotasEnforced()) {
- this.userStorageQuota = datafileService.getUserStorageQuota((AuthenticatedUser) session.getUser(), dataset);
- this.maxTotalUploadSizeInBytes = userStorageQuota.getRemainingQuotaInBytes();
+ this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset);
+ if (this.uploadSessionQuota != null) {
+ this.maxTotalUploadSizeInBytes = uploadSessionQuota.getRemainingQuotaInBytes();
+ }
} else {
this.maxTotalUploadSizeInBytes = null;
}
@@ -547,7 +549,7 @@ public String initCreateMode(String modeToken, DatasetVersion version, MutableBo
}
public boolean isQuotaExceeded() {
- return systemConfig.isStorageQuotasEnforced() && userStorageQuota != null && userStorageQuota.getRemainingQuotaInBytes() == 0;
+ return systemConfig.isStorageQuotasEnforced() && uploadSessionQuota != null && uploadSessionQuota.getRemainingQuotaInBytes() == 0;
}
public String init() {
@@ -592,8 +594,10 @@ public String init() {
clone = workingVersion.cloneDatasetVersion();
this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId());
if (systemConfig.isStorageQuotasEnforced()) {
- this.userStorageQuota = datafileService.getUserStorageQuota((AuthenticatedUser) session.getUser(), dataset);
- this.maxTotalUploadSizeInBytes = userStorageQuota.getRemainingQuotaInBytes();
+ this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset);
+ if (this.uploadSessionQuota != null) {
+ this.maxTotalUploadSizeInBytes = uploadSessionQuota.getRemainingQuotaInBytes();
+ }
}
this.maxIngestSizeInBytes = systemConfig.getTabularIngestSizeLimit();
this.humanPerFormatTabularLimits = populateHumanPerFormatTabularLimits();
@@ -1098,7 +1102,7 @@ public String save() {
}
// Try to save the NEW files permanently:
- List filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true);
+ List filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true);
// reset the working list of fileMetadatas, as to only include the ones
// that have been added to the version successfully:
@@ -1529,7 +1533,7 @@ public void handleDropBoxUpload(ActionEvent event) {
// zip file.
//datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream");
//CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", null, null, systemConfig);
- Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, dropBoxStream, fileName, "application/octet-stream", null, userStorageQuota, null);
+ Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, dropBoxStream, fileName, "application/octet-stream", null, uploadSessionQuota, null);
CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
datafiles = createDataFilesResult.getDataFiles();
Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage));
@@ -2068,9 +2072,9 @@ public void handleFileUpload(FileUploadEvent event) throws IOException {
// dataset that does not yet exist in the database. We must
// use the version of the Create New Files constructor that takes
// the parent Dataverse as the extra argument:
- cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, userStorageQuota, null, null, null, workingVersion.getDataset().getOwner());
+ cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, uploadSessionQuota, null, null, null, workingVersion.getDataset().getOwner());
} else {
- cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, userStorageQuota, null);
+ cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, uploadSessionQuota, null);
}
CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
@@ -2169,7 +2173,7 @@ public void handleExternalUpload() {
- Max size specified in db: check too make sure file is within limits
// ---------------------------- */
/**
- * @todo: this size check is probably redundant here, since the new
+ * @todo: this file size limit check is now redundant here, since the new
* CreateNewFilesCommand is going to perform it (and the quota
* checks too, if enabled
*/
@@ -2208,7 +2212,7 @@ public void handleExternalUpload() {
try {
- Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, null, fileName, contentType, fullStorageIdentifier, userStorageQuota, checksumValue, checksumType, fileSize, parent);
+ Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, null, fileName, contentType, fullStorageIdentifier, uploadSessionQuota, checksumValue, checksumType, fileSize, parent);
CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
datafiles = createDataFilesResult.getDataFiles();
Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage));
diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java
index bad8903c091..5a689c06019 100644
--- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java
+++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java
@@ -31,6 +31,7 @@
import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.ConstraintViolationUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -184,6 +185,9 @@ public class EjbDataverseEngine {
@EJB
ConfirmEmailServiceBean confirmEmailService;
+ @EJB
+ StorageUseServiceBean storageUseService;
+
@EJB
EjbDataverseEngineInner innerEngine;
@@ -528,6 +532,12 @@ public DataverseLinkingServiceBean dvLinking() {
public DatasetLinkingServiceBean dsLinking() {
return dsLinking;
}
+
+ @Override
+ public StorageUseServiceBean storageUse() {
+ return storageUseService;
+ }
+
@Override
public DataverseEngine engine() {
return new DataverseEngine() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java
index a6ae7223d9d..4d8100124ec 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java
@@ -9,6 +9,7 @@
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
import edu.harvard.iq.dataverse.externaltools.ExternalTool;
+import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.JsfHelper;
@@ -53,6 +54,9 @@ public class FileDownloadHelper implements java.io.Serializable {
@EJB
DataFileServiceBean datafileService;
+
+ @EJB
+ GlobusServiceBean globusService;
private final Map fileDownloadPermissionMap = new HashMap<>(); // { FileMetadata.id : Boolean }
@@ -60,32 +64,32 @@ public FileDownloadHelper() {
this.filesForRequestAccess = new ArrayList<>();
}
- // See also @Size(max = 255) in GuestbookResponse
- private boolean testResponseLength(String value) {
- return !(value != null && value.length() > 255);
- }
-
// This helper method is called from the Download terms/guestbook/etc. popup,
// when the user clicks the "ok" button. We use it, instead of calling
// downloadServiceBean directly, in order to differentiate between single
// file downloads and multiple (batch) downloads - since both use the same
// terms/etc. popup.
- public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse) {
+ public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse, boolean isGlobusTransfer) {
PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()");
guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD);
// Note that this method is only ever called from the file-download-popup -
// meaning we know for the fact that we DO want to save this
// guestbookResponse permanently in the database.
- if (guestbookResponse.getSelectedFileIds() != null) {
- // this is a batch (multiple file) download.
- // Although here's a chance that this is not really a batch download - i.e.,
- // there may only be one file on the file list. But the fileDownloadService
- // method below will check for that, and will redirect to the single download, if
- // that's the case. -- L.A.
- fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse);
- } else if (guestbookResponse.getDataFile() != null) {
- // this a single file download:
- fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse);
+ if(isGlobusTransfer) {
+ globusService.writeGuestbookAndStartTransfer(guestbookResponse, true);
+ } else {
+ if (guestbookResponse.getSelectedFileIds() != null) {
+ // this is a batch (multiple file) download.
+ // Although here's a chance that this is not really a batch download - i.e.,
+ // there may only be one file on the file list. But the fileDownloadService
+ // method below will check for that, and will redirect to the single download,
+ // if
+ // that's the case. -- L.A.
+ fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse);
+ } else if (guestbookResponse.getDataFile() != null) {
+ // this a single file download:
+ fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse);
+ }
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
index 55817d4a746..de3f4d2ab56 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java
@@ -4,7 +4,6 @@
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.authorization.users.ApiToken;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
-import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
@@ -16,13 +15,13 @@
import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler;
import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean;
import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry;
-import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
-import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.JsfHelper;
import edu.harvard.iq.dataverse.util.StringUtil;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
+
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
@@ -75,8 +74,6 @@ public class FileDownloadServiceBean implements java.io.Serializable {
@EJB
AuthenticationServiceBean authService;
@EJB
- PrivateUrlServiceBean privateUrlService;
- @EJB
SettingsServiceBean settingsService;
@EJB
MailServiceBean mailService;
@@ -315,13 +312,19 @@ private void redirectToCustomZipDownloadService(String customZipServiceUrl, Stri
}
}
- private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, Long fileMetadataId) {
- String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, fileMetadataId);
- logger.fine("Redirecting to file download url: " + fileDownloadUrl);
- try {
- FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl);
- } catch (IOException ex) {
- logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex);
+ private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten,
+ Long fileMetadataId) {
+ String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten,
+ fileMetadataId);
+ if ("GlobusTransfer".equals(downloadType)) {
+ PrimeFaces.current().executeScript(URLTokenUtil.getScriptForUrl(fileDownloadUrl));
+ } else {
+ logger.fine("Redirecting to file download url: " + fileDownloadUrl);
+ try {
+ FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl);
+ } catch (IOException ex) {
+ logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex);
+ }
}
}
@@ -352,7 +355,7 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter
User user = session.getUser();
DatasetVersion version = fmd.getDatasetVersion();
if (version.isDraft() || fmd.getDatasetVersion().isDeaccessioned() || (fmd.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fmd))) {
- apiToken = getApiToken(user);
+ apiToken = authService.getValidApiTokenForUser(user);
}
DataFile dataFile = null;
if (fmd != null) {
@@ -379,24 +382,6 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter
}
}
- public ApiToken getApiToken(User user) {
- ApiToken apiToken = null;
- if (user instanceof AuthenticatedUser) {
- AuthenticatedUser authenticatedUser = (AuthenticatedUser) user;
- apiToken = authService.findApiTokenByUser(authenticatedUser);
- if (apiToken == null || apiToken.isExpired()) {
- //No un-expired token
- apiToken = authService.generateApiTokenForUser(authenticatedUser);
- }
- } else if (user instanceof PrivateUrlUser) {
- PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
- PrivateUrl privateUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
- apiToken = new ApiToken();
- apiToken.setTokenString(privateUrl.getToken());
- }
- return apiToken;
- }
-
public void downloadDatasetCitationXML(Dataset dataset) {
downloadCitationXML(null, dataset, false);
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java
index bfae80ade27..479c8a429c6 100644
--- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java
@@ -1069,7 +1069,7 @@ public String preview(ExternalTool externalTool) {
ApiToken apiToken = null;
User user = session.getUser();
if (fileMetadata.getDatasetVersion().isDraft() || fileMetadata.getDatasetVersion().isDeaccessioned() || (fileMetadata.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fileMetadata))) {
- apiToken=fileDownloadService.getApiToken(user);
+ apiToken=authService.getValidApiTokenForUser(user);
}
if(externalTool == null){
return "";
diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
index 9fb584a9133..f54b1fb6117 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java
@@ -288,19 +288,21 @@ public String save() {
Command cmd;
try {
+ // Per recent #dv-tech conversation w/ Jim - copying the code
+ // below from his QDR branch; the code that used to be here called
+ // UpdateDataverseCommand when saving new guestbooks, and that involved
+ // an unnecessary reindexing of the dataverse (and, in some cases,
+ // reindexing of the underlying datasets). - L.A.
if (editMode == EditMode.CREATE || editMode == EditMode.CLONE ) {
guestbook.setCreateTime(new Timestamp(new Date().getTime()));
- guestbook.setUsageCount(new Long(0));
+ guestbook.setUsageCount(Long.valueOf(0));
guestbook.setEnabled(true);
dataverse.getGuestbooks().add(guestbook);
- cmd = new UpdateDataverseCommand(dataverse, null, null, dvRequestService.getDataverseRequest(), null);
- commandEngine.submit(cmd);
create = true;
- } else {
- cmd = new UpdateDataverseGuestbookCommand(dataverse, guestbook, dvRequestService.getDataverseRequest());
- commandEngine.submit(cmd);
- }
-
+ }
+ cmd = new UpdateDataverseGuestbookCommand(dataverse, guestbook, dvRequestService.getDataverseRequest());
+ commandEngine.submit(cmd);
+
} catch (EJBException ex) {
StringBuilder error = new StringBuilder();
error.append(ex).append(" ");
diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java
index 976f1e084ac..9041ccf887c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java
@@ -99,7 +99,7 @@ public class GuestbookResponse implements Serializable {
*/
public static final String ACCESS_REQUEST = "AccessRequest";
- static final String DOWNLOAD = "Download";
+ public static final String DOWNLOAD = "Download";
static final String SUBSET = "Subset";
static final String EXPLORE = "Explore";
diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
index b0cc41eb448..6c043b78941 100644
--- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java
@@ -432,7 +432,7 @@ public Long findCountByGuestbookId(Long guestbookId, Long dataverseId) {
Query query = em.createNativeQuery(queryString);
return (Long) query.getSingleResult();
} else {
- String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + "and o.guestbook_id = " + guestbookId;
+ String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + " and o.guestbook_id = " + guestbookId;
Query query = em.createNativeQuery(queryString);
return (Long) query.getSingleResult();
}
@@ -914,7 +914,7 @@ public void save(GuestbookResponse guestbookResponse) {
public Long getDownloadCountByDataFileId(Long dataFileId) {
// datafile id is null, will return 0
- Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.datafile_id = " + dataFileId + "and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
+ Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.datafile_id = " + dataFileId + " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
return (Long) query.getSingleResult();
}
@@ -928,7 +928,7 @@ public Long getDownloadCountByDatasetId(Long datasetId, LocalDate date) {
if(date != null) {
query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId + " and responsetime < '" + date.toString() + "' and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
}else {
- query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId+ "and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
+ query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId+ " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'");
}
return (Long) query.getSingleResult();
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java
index 6dbba34920b..0b66b652e0c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java
+++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java
@@ -30,6 +30,8 @@
import jakarta.faces.view.ViewScoped;
import jakarta.inject.Inject;
import jakarta.inject.Named;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.commons.lang3.StringUtils;
/**
@@ -430,44 +432,92 @@ public boolean isSessionUserAuthenticated() {
return false;
}
+ // The numbers of datasets and deleted/exported records below are used
+ // in rendering rules on the page. They absolutely need to be cached
+ // on the first lookup.
+
+ Map cachedSetInfoNumDatasets = new HashMap<>();
+
public int getSetInfoNumOfDatasets(OAISet oaiSet) {
if (oaiSet.isDefaultSet()) {
return getSetInfoNumOfExported(oaiSet);
}
+ if (cachedSetInfoNumDatasets.get(oaiSet.getSpec()) != null) {
+ return cachedSetInfoNumDatasets.get(oaiSet.getSpec());
+ }
+
String query = oaiSet.getDefinition();
try {
int num = oaiSetService.validateDefinitionQuery(query);
if (num > -1) {
+ cachedSetInfoNumDatasets.put(oaiSet.getSpec(), num);
return num;
}
} catch (OaiSetException ose) {
- // do notghin - will return zero.
+ // do nothing - will return zero.
}
+ cachedSetInfoNumDatasets.put(oaiSet.getSpec(), 0);
return 0;
}
+ Map cachedSetInfoNumExported = new HashMap<>();
+ Integer defaultSetNumExported = null;
+
public int getSetInfoNumOfExported(OAISet oaiSet) {
+ if (oaiSet.isDefaultSet() && defaultSetNumExported != null) {
+ return defaultSetNumExported;
+ } else if (cachedSetInfoNumExported.get(oaiSet.getSpec()) != null) {
+ return cachedSetInfoNumExported.get(oaiSet.getSpec());
+ }
+
List records = oaiRecordService.findActiveOaiRecordsBySetName(oaiSet.getSpec());
+ int num;
+
if (records == null || records.isEmpty()) {
- return 0;
+ num = 0;
+ } else {
+ num = records.size();
}
- return records.size();
+ if (oaiSet.isDefaultSet()) {
+ defaultSetNumExported = num;
+ } else {
+ cachedSetInfoNumExported.put(oaiSet.getSpec(), num);
+ }
+ return num;
}
+ Map cachedSetInfoNumDeleted = new HashMap<>();
+ Integer defaultSetNumDeleted = null;
+
public int getSetInfoNumOfDeleted(OAISet oaiSet) {
+ if (oaiSet.isDefaultSet() && defaultSetNumDeleted != null) {
+ return defaultSetNumDeleted;
+ } else if (cachedSetInfoNumDeleted.get(oaiSet.getSpec()) != null) {
+ return cachedSetInfoNumDeleted.get(oaiSet.getSpec());
+ }
+
List records = oaiRecordService.findDeletedOaiRecordsBySetName(oaiSet.getSpec());
+ int num;
+
if (records == null || records.isEmpty()) {
- return 0;
+ num = 0;
+ } else {
+ num = records.size();
}
- return records.size();
+ if (oaiSet.isDefaultSet()) {
+ defaultSetNumDeleted = num;
+ } else {
+ cachedSetInfoNumDeleted.put(oaiSet.getSpec(), num);
+ }
+ return num;
}
public void validateSetQuery() {
diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
index a1de33a764e..8fb762e3e5b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java
@@ -41,6 +41,9 @@
import java.util.stream.Collectors;
import static java.util.stream.Collectors.toList;
import jakarta.persistence.Query;
+import jakarta.persistence.criteria.CriteriaBuilder;
+import jakarta.persistence.criteria.CriteriaQuery;
+import jakarta.persistence.criteria.Root;
/**
* Your one-stop-shop for deciding which user can do what action on which
@@ -448,8 +451,9 @@ private boolean isPublicallyDownloadable(DvObject dvo) {
if (!df.isRestricted()) {
if (df.getOwner().getReleasedVersion() != null) {
- if (df.getOwner().getReleasedVersion().getFileMetadatas() != null) {
- for (FileMetadata fm : df.getOwner().getReleasedVersion().getFileMetadatas()) {
+ List fileMetadatas = df.getOwner().getReleasedVersion().getFileMetadatas();
+ if (fileMetadatas != null) {
+ for (FileMetadata fm : fileMetadatas) {
if (df.equals(fm.getDataFile())) {
return true;
}
@@ -837,4 +841,57 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio
return false;
}
+ /**
+ * Checks if a DataverseRequest can download at least one file of the target DatasetVersion.
+ *
+ * @param dataverseRequest DataverseRequest to check
+ * @param datasetVersion DatasetVersion to check
+ * @return boolean indicating whether the user can download at least one file or not
+ */
+ public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) {
+ if (hasUnrestrictedReleasedFiles(datasetVersion)) {
+ return true;
+ }
+ List fileMetadatas = datasetVersion.getFileMetadatas();
+ for (FileMetadata fileMetadata : fileMetadatas) {
+ DataFile dataFile = fileMetadata.getDataFile();
+ Set roleAssignees = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile));
+ roleAssignees.add(dataverseRequest.getUser());
+ if (hasGroupPermissionsFor(roleAssignees, dataFile, EnumSet.of(Permission.DownloadFile))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Checks if a DatasetVersion has unrestricted released files.
+ *
+ * This method is mostly based on {@link #isPublicallyDownloadable(DvObject)} although in this case, instead of basing
+ * the search on a particular file, it searches for the total number of files in the target version that are present
+ * in the released version.
+ *
+ * @param targetDatasetVersion DatasetVersion to check
+ * @return boolean indicating whether the dataset version has released files or not
+ */
+ private boolean hasUnrestrictedReleasedFiles(DatasetVersion targetDatasetVersion) {
+ Dataset targetDataset = targetDatasetVersion.getDataset();
+ if (!targetDataset.isReleased()) {
+ return false;
+ }
+ CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder();
+ CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class);
+ Root datasetVersionRoot = criteriaQuery.from(DatasetVersion.class);
+ Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class);
+ criteriaQuery
+ .select(criteriaBuilder.count(fileMetadataRoot))
+ .where(criteriaBuilder.and(
+ criteriaBuilder.equal(fileMetadataRoot.get("dataFile").get("restricted"), false),
+ criteriaBuilder.equal(datasetVersionRoot.get("dataset"), targetDataset),
+ criteriaBuilder.equal(datasetVersionRoot.get("versionState"), DatasetVersion.VersionState.RELEASED),
+ fileMetadataRoot.in(targetDatasetVersion.getFileMetadatas()),
+ fileMetadataRoot.in(datasetVersionRoot.get("fileMetadatas"))));
+ Long result = em.createQuery(criteriaQuery).getSingleResult();
+ return result > 0;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java
index 059d5a8ffd3..88acc1916cf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java
@@ -11,6 +11,7 @@
import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupServiceBean;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.authorization.users.GuestUser;
+import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.mydata.MyDataFilterParams;
import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil;
@@ -96,18 +97,18 @@ public RoleAssignee getRoleAssignee(String identifier, Boolean augmented) {
if (identifier == null || identifier.isEmpty()) {
throw new IllegalArgumentException("Identifier cannot be null or empty string.");
}
- switch (identifier.charAt(0)) {
- case ':':
+ switch (identifier.substring(0,1)) {
+ case ":":
return predefinedRoleAssignees.get(identifier);
- case '@':
+ case AuthenticatedUser.IDENTIFIER_PREFIX:
if (!augmented){
return authSvc.getAuthenticatedUser(identifier.substring(1));
} else {
return authSvc.getAuthenticatedUserWithProvider(identifier.substring(1));
- }
- case '&':
+ }
+ case Group.IDENTIFIER_PREFIX:
return groupSvc.getGroup(identifier.substring(1));
- case '#':
+ case PrivateUrlUser.PREFIX:
return PrivateUrlUtil.identifier2roleAssignee(identifier);
default:
throw new IllegalArgumentException("Unsupported assignee identifier '" + identifier + "'");
diff --git a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java
index 6be768321c4..68912969003 100644
--- a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java
+++ b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java
@@ -6,6 +6,7 @@
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.util.JsfHelper;
import edu.harvard.iq.dataverse.util.MailUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
import java.util.Optional;
@@ -217,6 +218,7 @@ public String sendMessage() {
}
logger.fine("sending feedback: " + feedback);
mailService.sendMail(feedback.getFromEmail(), feedback.getToEmail(), feedback.getCcEmail(), feedback.getSubject(), feedback.getBody());
+ JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("contact.sent"));
return null;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
index 0a1d0effc03..8ab1e87aef2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java
@@ -6,6 +6,9 @@
package edu.harvard.iq.dataverse;
import edu.harvard.iq.dataverse.branding.BrandingUtil;
+import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO;
+import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.Setting;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
@@ -333,12 +336,29 @@ public boolean isGlobusFileDownload() {
}
public boolean isGlobusEnabledStorageDriver(String driverId) {
- if (globusStoreList == null) {
- globusStoreList = systemConfig.getGlobusStoresList();
+ return (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId));
+ }
+
+ public boolean isDownloadable(FileMetadata fmd) {
+ boolean downloadable=true;
+ if(isGlobusFileDownload()) {
+ String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier());
+
+ downloadable = downloadable && !AbstractRemoteOverlayAccessIO.isNotDataverseAccessible(driverId);
+ }
+ return downloadable;
+ }
+
+ public boolean isGlobusTransferable(FileMetadata fmd) {
+ boolean globusTransferable=true;
+ if(isGlobusFileDownload()) {
+ String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier());
+ globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId);
}
- return globusStoreList.contains(driverId);
+ return globusTransferable;
}
+
public String getGlobusAppUrl() {
if (globusAppUrl == null) {
globusAppUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost");
diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
index 319ae06eefb..b6ab23848e2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java
@@ -6,24 +6,21 @@
package edu.harvard.iq.dataverse;
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
-import edu.harvard.iq.dataverse.dataaccess.StorageIO;
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
-
-import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail;
+import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.dataset.DatasetUtil;
import edu.harvard.iq.dataverse.search.SolrSearchResult;
-import edu.harvard.iq.dataverse.util.FileUtil;
+import edu.harvard.iq.dataverse.util.SystemConfig;
import java.io.IOException;
-import java.io.InputStream;
-import java.util.Base64;
import java.util.HashMap;
import java.util.Map;
+import java.util.logging.Logger;
+
import jakarta.ejb.EJB;
import jakarta.enterprise.context.RequestScoped;
-import jakarta.faces.view.ViewScoped;
import jakarta.inject.Inject;
import jakarta.inject.Named;
-import org.apache.commons.io.IOUtils;
/**
*
@@ -33,6 +30,9 @@
@RequestScoped
@Named
public class ThumbnailServiceWrapper implements java.io.Serializable {
+
+ private static final Logger logger = Logger.getLogger(ThumbnailServiceWrapper.class.getCanonicalName());
+
@Inject
PermissionsWrapper permissionsWrapper;
@EJB
@@ -46,49 +46,7 @@ public class ThumbnailServiceWrapper implements java.io.Serializable {
private Map dvobjectThumbnailsMap = new HashMap<>();
private Map dvobjectViewMap = new HashMap<>();
-
- private String getAssignedDatasetImage(Dataset dataset, int size) {
- if (dataset == null) {
- return null;
- }
-
- DataFile assignedThumbnailFile = dataset.getThumbnailFile();
-
- if (assignedThumbnailFile != null) {
- Long assignedThumbnailFileId = assignedThumbnailFile.getId();
-
- if (this.dvobjectThumbnailsMap.containsKey(assignedThumbnailFileId)) {
- // Yes, return previous answer
- //logger.info("using cached result for ... "+assignedThumbnailFileId);
- if (!"".equals(this.dvobjectThumbnailsMap.get(assignedThumbnailFileId))) {
- return this.dvobjectThumbnailsMap.get(assignedThumbnailFileId);
- }
- return null;
- }
-
- String imageSourceBase64 = ImageThumbConverter.getImageThumbnailAsBase64(assignedThumbnailFile,
- size);
- //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
-
- if (imageSourceBase64 != null) {
- this.dvobjectThumbnailsMap.put(assignedThumbnailFileId, imageSourceBase64);
- return imageSourceBase64;
- }
-
- // OK - we can't use this "assigned" image, because of permissions, or because
- // the thumbnail failed to generate, etc... in this case we'll
- // mark this dataset in the lookup map - so that we don't have to
- // do all these lookups again...
- this.dvobjectThumbnailsMap.put(assignedThumbnailFileId, "");
-
- // TODO: (?)
- // do we need to cache this datafile object in the view map?
- // -- L.A., 4.2.2
- }
-
- return null;
-
- }
+ private Map hasThumbMap = new HashMap<>();
// it's the responsibility of the user - to make sure the search result
// passed to this method is of the Datafile type!
@@ -130,7 +88,7 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) {
if ((!((DataFile)result.getEntity()).isRestricted()
|| permissionsWrapper.hasDownloadFilePermission(result.getEntity()))
- && dataFileService.isThumbnailAvailable((DataFile) result.getEntity())) {
+ && isThumbnailAvailable((DataFile) result.getEntity())) {
cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64(
(DataFile) result.getEntity(),
@@ -156,9 +114,16 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) {
return null;
}
+ public boolean isThumbnailAvailable(DataFile entity) {
+ if(!hasThumbMap.containsKey(entity.getId())) {
+ hasThumbMap.put(entity.getId(), dataFileService.isThumbnailAvailable(entity));
+ }
+ return hasThumbMap.get(entity.getId());
+ }
+
// it's the responsibility of the user - to make sure the search result
// passed to this method is of the Dataset type!
- public String getDatasetCardImageAsBase64Url(SolrSearchResult result) {
+ public String getDatasetCardImageAsUrl(SolrSearchResult result) {
// Before we do anything else, check if it's a harvested dataset;
// no need to check anything else if so (harvested datasets never have
// thumbnails)
@@ -176,13 +141,14 @@ public String getDatasetCardImageAsBase64Url(SolrSearchResult result) {
return null;
}
Dataset dataset = (Dataset)result.getEntity();
+ dataset.setId(result.getEntityId());
Long versionId = result.getDatasetVersionId();
- return getDatasetCardImageAsBase64Url(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
+ return getDatasetCardImageAsUrl(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
}
- public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, boolean autoselect, int size) {
+ public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean autoselect, int size) {
Long datasetId = dataset.getId();
if (datasetId != null) {
if (this.dvobjectThumbnailsMap.containsKey(datasetId)) {
@@ -203,112 +169,34 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo
this.dvobjectThumbnailsMap.put(datasetId, "");
return null;
}
-
- String cardImageUrl = null;
- StorageIO dataAccess = null;
-
- try{
- dataAccess = DataAccess.getStorageIO(dataset);
- }
- catch(IOException ioex){
- // ignore
- }
-
- InputStream in = null;
- // See if the dataset already has a dedicated thumbnail ("logo") saved as
- // an auxilary file on the dataset level:
- // (don't bother checking if it exists; just try to open the input stream)
- try {
- in = dataAccess.getAuxFileAsInputStream(datasetLogoThumbnail + ".thumb" + size);
- //thumb48addedByImageThumbConverter);
- } catch (Exception ioex) {
- //ignore
- }
-
- if (in != null) {
- try {
- byte[] bytes = IOUtils.toByteArray(in);
- String base64image = Base64.getEncoder().encodeToString(bytes);
- cardImageUrl = FileUtil.DATA_URI_SCHEME + base64image;
- this.dvobjectThumbnailsMap.put(datasetId, cardImageUrl);
- return cardImageUrl;
- } catch (IOException ex) {
- this.dvobjectThumbnailsMap.put(datasetId, "");
- return null;
- // (alternatively, we could ignore the exception, and proceed with the
- // regular process of selecting the thumbnail from the available
- // image files - ?)
- } finally
- {
- IOUtils.closeQuietly(in);
- }
- }
-
- // If not, see if the dataset has one of its image files already assigned
- // to be the designated thumbnail:
- cardImageUrl = this.getAssignedDatasetImage(dataset, size);
-
- if (cardImageUrl != null) {
- //logger.info("dataset id " + result.getEntity().getId() + " has a dedicated image assigned; returning " + cardImageUrl);
- return cardImageUrl;
- }
-
- // And finally, try to auto-select the thumbnail (unless instructed not to):
-
- if (!autoselect) {
- return null;
- }
-
- // We attempt to auto-select via the optimized, native query-based method
- // from the DatasetVersionService:
- Long thumbnailImageFileId = datasetVersionService.getThumbnailByVersionId(versionId);
-
- if (thumbnailImageFileId != null) {
- //cardImageUrl = FILE_CARD_IMAGE_URL + thumbnailImageFileId;
- if (this.dvobjectThumbnailsMap.containsKey(thumbnailImageFileId)) {
- // Yes, return previous answer
- //logger.info("using cached result for ... "+datasetId);
- if (!"".equals(this.dvobjectThumbnailsMap.get(thumbnailImageFileId))) {
- return this.dvobjectThumbnailsMap.get(thumbnailImageFileId);
- }
- return null;
- }
+ DataFile thumbnailFile = dataset.getThumbnailFile();
- DataFile thumbnailImageFile = null;
+ if (thumbnailFile == null) {
- if (dvobjectViewMap.containsKey(thumbnailImageFileId)
- && dvobjectViewMap.get(thumbnailImageFileId).isInstanceofDataFile()) {
- thumbnailImageFile = (DataFile) dvobjectViewMap.get(thumbnailImageFileId);
- } else {
- thumbnailImageFile = dataFileService.findCheapAndEasy(thumbnailImageFileId);
- if (thumbnailImageFile != null) {
- // TODO:
- // do we need this file on the map? - it may not even produce
- // a thumbnail!
- dvobjectViewMap.put(thumbnailImageFileId, thumbnailImageFile);
- } else {
- this.dvobjectThumbnailsMap.put(thumbnailImageFileId, "");
- return null;
+ boolean hasDatasetLogo = false;
+ StorageIO storageIO = null;
+ try {
+ storageIO = DataAccess.getStorageIO(dataset);
+ if (storageIO.isAuxObjectCached(DatasetUtil.datasetLogoFilenameFinal)) {
+ // If not, return null/use the default, otherwise pass the logo URL
+ hasDatasetLogo = true;
}
+ } catch (IOException ioex) {
+ logger.warning("getDatasetCardImageAsUrl(): Failed to initialize dataset StorageIO for "
+ + dataset.getStorageIdentifier() + " (" + ioex.getMessage() + ")");
}
-
- if (dataFileService.isThumbnailAvailable(thumbnailImageFile)) {
- cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64(
- thumbnailImageFile,
- size);
- //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE);
- }
-
- if (cardImageUrl != null) {
- this.dvobjectThumbnailsMap.put(thumbnailImageFileId, cardImageUrl);
- } else {
- this.dvobjectThumbnailsMap.put(thumbnailImageFileId, "");
+ // If no other logo we attempt to auto-select via the optimized, native
+ // query-based method
+ // from the DatasetVersionService:
+ if (!hasDatasetLogo && datasetVersionService.getThumbnailByVersionId(versionId) == null) {
+ return null;
}
}
-
- //logger.info("dataset id " + result.getEntityId() + ", returning " + cardImageUrl);
-
- return cardImageUrl;
+ String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo";
+ logger.fine("getDatasetCardImageAsUrl: " + url);
+ this.dvobjectThumbnailsMap.put(datasetId,url);
+ return url;
+
}
// it's the responsibility of the user - to make sure the search result
@@ -320,6 +208,7 @@ public String getDataverseCardImageAsBase64Url(SolrSearchResult result) {
public void resetObjectMaps() {
dvobjectThumbnailsMap = new HashMap<>();
dvobjectViewMap = new HashMap<>();
+ hasThumbMap = new HashMap<>();
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
index 0a0861fa1c9..bc94d7f0bcc 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
@@ -1,29 +1,6 @@
package edu.harvard.iq.dataverse.api;
-import edu.harvard.iq.dataverse.DataFile;
-import edu.harvard.iq.dataverse.DataFileServiceBean;
-import edu.harvard.iq.dataverse.Dataset;
-import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
-import edu.harvard.iq.dataverse.DatasetFieldType;
-import edu.harvard.iq.dataverse.DatasetLinkingDataverse;
-import edu.harvard.iq.dataverse.DatasetLinkingServiceBean;
-import edu.harvard.iq.dataverse.DatasetServiceBean;
-import edu.harvard.iq.dataverse.DatasetVersionServiceBean;
-import edu.harvard.iq.dataverse.Dataverse;
-import edu.harvard.iq.dataverse.DataverseLinkingDataverse;
-import edu.harvard.iq.dataverse.DataverseLinkingServiceBean;
-import edu.harvard.iq.dataverse.DataverseRoleServiceBean;
-import edu.harvard.iq.dataverse.DataverseServiceBean;
-import edu.harvard.iq.dataverse.DvObject;
-import edu.harvard.iq.dataverse.DvObjectServiceBean;
-import edu.harvard.iq.dataverse.EjbDataverseEngine;
-import edu.harvard.iq.dataverse.GuestbookResponseServiceBean;
-import edu.harvard.iq.dataverse.MetadataBlock;
-import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
-import edu.harvard.iq.dataverse.PermissionServiceBean;
-import edu.harvard.iq.dataverse.RoleAssigneeServiceBean;
-import edu.harvard.iq.dataverse.UserNotificationServiceBean;
-import edu.harvard.iq.dataverse.UserServiceBean;
+import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.DataverseRole;
@@ -40,42 +17,41 @@
import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean;
import edu.harvard.iq.dataverse.license.LicenseServiceBean;
-import edu.harvard.iq.dataverse.metrics.MetricsServiceBean;
import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean;
+import edu.harvard.iq.dataverse.metrics.MetricsServiceBean;
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
+import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
import edu.harvard.iq.dataverse.util.json.JsonParser;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean;
-import java.io.StringReader;
-import java.net.URI;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.UUID;
-import java.util.concurrent.Callable;
-import java.util.logging.Level;
-import java.util.logging.Logger;
import jakarta.ejb.EJB;
import jakarta.ejb.EJBException;
-import jakarta.json.Json;
-import jakarta.json.JsonArray;
-import jakarta.json.JsonArrayBuilder;
-import jakarta.json.JsonObject;
-import jakarta.json.JsonObjectBuilder;
-import jakarta.json.JsonReader;
-import jakarta.json.JsonValue;
+import jakarta.json.*;
import jakarta.json.JsonValue.ValueType;
import jakarta.persistence.EntityManager;
import jakarta.persistence.NoResultException;
import jakarta.persistence.PersistenceContext;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.ws.rs.container.ContainerRequestContext;
-import jakarta.ws.rs.core.*;
+import jakarta.ws.rs.core.Context;
+import jakarta.ws.rs.core.MediaType;
+import jakarta.ws.rs.core.Response;
import jakarta.ws.rs.core.Response.ResponseBuilder;
import jakarta.ws.rs.core.Response.Status;
+import java.io.InputStream;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.UUID;
+import java.util.concurrent.Callable;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
import static org.apache.commons.lang3.StringUtils.isNumeric;
/**
@@ -132,23 +108,21 @@ public Response refineResponse( String message ) {
* In the common case of the wrapped response being of type JSON,
* return the message field it has (if any).
* @return the content of a message field, or {@code null}.
+ * @throws JsonException when JSON parsing fails.
*/
String getWrappedMessageWhenJson() {
if ( response.getMediaType().equals(MediaType.APPLICATION_JSON_TYPE) ) {
Object entity = response.getEntity();
if ( entity == null ) return null;
- String json = entity.toString();
- try ( StringReader rdr = new StringReader(json) ){
- JsonReader jrdr = Json.createReader(rdr);
- JsonObject obj = jrdr.readObject();
- if ( obj.containsKey("message") ) {
- JsonValue message = obj.get("message");
- return message.getValueType() == ValueType.STRING ? obj.getString("message") : message.toString();
- } else {
- return null;
- }
+ JsonObject obj = JsonUtil.getJsonObject(entity.toString());
+ if ( obj.containsKey("message") ) {
+ JsonValue message = obj.get("message");
+ return message.getValueType() == ValueType.STRING ? obj.getString("message") : message.toString();
+ } else {
+ return null;
}
+
} else {
return null;
}
@@ -661,7 +635,15 @@ protected Response ok( JsonArrayBuilder bld ) {
.add("data", bld).build())
.type(MediaType.APPLICATION_JSON).build();
}
-
+
+ protected Response ok( JsonArrayBuilder bld , long totalCount) {
+ return Response.ok(Json.createObjectBuilder()
+ .add("status", ApiConstants.STATUS_OK)
+ .add("totalCount", totalCount)
+ .add("data", bld).build())
+ .type(MediaType.APPLICATION_JSON).build();
+ }
+
protected Response ok( JsonArray ja ) {
return Response.ok(Json.createObjectBuilder()
.add("status", ApiConstants.STATUS_OK)
@@ -728,6 +710,11 @@ protected Response ok(String data, MediaType mediaType, String downloadFilename)
return res.build();
}
+ protected Response ok(InputStream inputStream) {
+ ResponseBuilder res = Response.ok().entity(inputStream).type(MediaType.valueOf(FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT));
+ return res.build();
+ }
+
protected Response created( String uri, JsonObjectBuilder bld ) {
return Response.created( URI.create(uri) )
.entity( Json.createObjectBuilder()
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
index 696fcb34920..297ec2d3681 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java
@@ -47,6 +47,7 @@
import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
import edu.harvard.iq.dataverse.dataaccess.DataFileZipper;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
import edu.harvard.iq.dataverse.dataaccess.OptionalAccessService;
import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
@@ -328,8 +329,8 @@ public Response datafile(@Context ContainerRequestContext crc, @PathParam("fileI
dInfo.addServiceAvailable(new OptionalAccessService("preprocessed", "application/json", "format=prep", "Preprocessed data in JSON"));
dInfo.addServiceAvailable(new OptionalAccessService("subset", "text/tab-separated-values", "variables=<LIST>", "Column-wise Subsetting"));
}
-
- if(systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList().contains(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) {
+ String driverId = DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier());
+ if(systemConfig.isGlobusFileDownload() && (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId))) {
dInfo.addServiceAvailable(new OptionalAccessService("GlobusTransfer", df.getContentType(), "format=GlobusTransfer", "Download via Globus"));
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
index fd3b9a89e54..48f9e19d835 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java
@@ -96,7 +96,6 @@
import edu.harvard.iq.dataverse.engine.command.impl.DeleteRoleCommand;
import edu.harvard.iq.dataverse.engine.command.impl.DeleteTemplateCommand;
import edu.harvard.iq.dataverse.engine.command.impl.RegisterDvObjectCommand;
-import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler;
import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.userdata.UserListMaker;
@@ -105,8 +104,10 @@
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
import edu.harvard.iq.dataverse.util.UrlSignerUtil;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStream;
@@ -122,6 +123,7 @@
import jakarta.ws.rs.QueryParam;
import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.StreamingOutput;
+import java.nio.file.Paths;
/**
* Where the secure, setup API calls live.
@@ -2418,12 +2420,60 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur
}
String baseUrl = urlInfo.getString("url");
- int timeout = urlInfo.getInt(ExternalToolHandler.TIMEOUT, 10);
- String method = urlInfo.getString(ExternalToolHandler.HTTP_METHOD, "GET");
+ int timeout = urlInfo.getInt(URLTokenUtil.TIMEOUT, 10);
+ String method = urlInfo.getString(URLTokenUtil.HTTP_METHOD, "GET");
String signedUrl = UrlSignerUtil.signUrl(baseUrl, timeout, userId, method, key);
- return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl));
+ return ok(Json.createObjectBuilder().add(URLTokenUtil.SIGNED_URL, signedUrl));
}
+ @DELETE
+ @Path("/clearThumbnailFailureFlag")
+ public Response clearThumbnailFailureFlag() {
+ em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE").executeUpdate();
+ return ok("Thumbnail Failure Flags cleared.");
+ }
+
+ @DELETE
+ @Path("/clearThumbnailFailureFlag/{id}")
+ public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String fileId) {
+ try {
+ DataFile df = findDataFileOrDie(fileId);
+ Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE where id = ?");
+ deleteQuery.setParameter(1, df.getId());
+ deleteQuery.executeUpdate();
+ return ok("Thumbnail Failure Flag cleared for file id=: " + df.getId() + ".");
+ } catch (WrappedResponse r) {
+ logger.info("Could not find file with the id: " + fileId);
+ return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId);
+ }
+ }
+
+ /**
+ * For testing only. Download a file from /tmp.
+ */
+ @GET
+ @AuthRequired
+ @Path("/downloadTmpFile")
+ public Response downloadTmpFile(@Context ContainerRequestContext crc, @QueryParam("fullyQualifiedPathToFile") String fullyQualifiedPathToFile) {
+ try {
+ AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc);
+ if (!user.isSuperuser()) {
+ return error(Response.Status.FORBIDDEN, "Superusers only.");
+ }
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ java.nio.file.Path normalizedPath = Paths.get(fullyQualifiedPathToFile).normalize();
+ if (!normalizedPath.toString().startsWith("/tmp")) {
+ return error(Status.BAD_REQUEST, "Path must begin with '/tmp' but after normalization was '" + normalizedPath +"'.");
+ }
+ try {
+ return ok(new FileInputStream(fullyQualifiedPathToFile));
+ } catch (IOException ex) {
+ return error(Status.BAD_REQUEST, ex.toString());
+ }
+ }
+
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
index 292aba0cee3..e3505cbbb33 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
@@ -1,9 +1,11 @@
package edu.harvard.iq.dataverse.api;
+import com.amazonaws.services.s3.model.PartETag;
import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.DatasetLock.Reason;
import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord;
import edu.harvard.iq.dataverse.api.auth.AuthRequired;
+import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.DataverseRole;
import edu.harvard.iq.dataverse.authorization.Permission;
@@ -13,6 +15,7 @@
import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode;
+import edu.harvard.iq.dataverse.dataaccess.*;
import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil;
import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse;
import edu.harvard.iq.dataverse.dataset.DatasetThumbnail;
@@ -23,91 +26,46 @@
import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams;
import edu.harvard.iq.dataverse.engine.command.Command;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
-import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.AddLockCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.CuratePublishedDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeaccessionDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetLinkingDataverseCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.GetPrivateUrlCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.ImportFromFileSystemCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.LinkDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.ListRoleAssignments;
-import edu.harvard.iq.dataverse.engine.command.impl.ListVersionsCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.MoveDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult;
-import edu.harvard.iq.dataverse.engine.command.impl.RemoveLockCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.ReturnDatasetToAuthorCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.SetDatasetCitationDateCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.SetCurationStatusCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetTargetURLCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException;
+import edu.harvard.iq.dataverse.engine.command.impl.*;
import edu.harvard.iq.dataverse.export.DDIExportServiceBean;
import edu.harvard.iq.dataverse.export.ExportService;
import edu.harvard.iq.dataverse.externaltools.ExternalTool;
import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler;
+import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
+import edu.harvard.iq.dataverse.globus.GlobusUtil;
import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
-import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
-import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO;
-import edu.harvard.iq.dataverse.dataaccess.DataAccess;
-import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter;
-import edu.harvard.iq.dataverse.dataaccess.S3AccessIO;
-import edu.harvard.iq.dataverse.dataaccess.StorageIO;
-import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
-import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException;
-import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand;
-import edu.harvard.iq.dataverse.engine.command.impl.UpdateDvObjectPIDMetadataCommand;
-import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitations;
-import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean;
-import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics;
-import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean;
-import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean;
+import edu.harvard.iq.dataverse.makedatacount.*;
import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry;
import edu.harvard.iq.dataverse.metrics.MetricsUtil;
-import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
+import edu.harvard.iq.dataverse.search.IndexServiceBean;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
-import edu.harvard.iq.dataverse.util.ArchiverUtil;
-import edu.harvard.iq.dataverse.util.BundleUtil;
-import edu.harvard.iq.dataverse.util.EjbUtil;
-import edu.harvard.iq.dataverse.util.FileUtil;
-import edu.harvard.iq.dataverse.util.MarkupChecker;
-import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.*;
import edu.harvard.iq.dataverse.util.bagit.OREMap;
-import edu.harvard.iq.dataverse.util.json.JSONLDUtil;
-import edu.harvard.iq.dataverse.util.json.JsonLDTerm;
-import edu.harvard.iq.dataverse.util.json.JsonParseException;
-import edu.harvard.iq.dataverse.util.json.JsonUtil;
-import edu.harvard.iq.dataverse.util.SignpostingResources;
-import edu.harvard.iq.dataverse.search.IndexServiceBean;
-
-import static edu.harvard.iq.dataverse.api.ApiConstants.*;
-import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*;
-import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
-import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
+import edu.harvard.iq.dataverse.util.json.*;
import edu.harvard.iq.dataverse.workflow.Workflow;
import edu.harvard.iq.dataverse.workflow.WorkflowContext;
-import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean;
import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType;
-
-import edu.harvard.iq.dataverse.globus.GlobusServiceBean;
+import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean;
+import jakarta.ejb.EJB;
+import jakarta.ejb.EJBException;
+import jakarta.inject.Inject;
+import jakarta.json.*;
+import jakarta.json.stream.JsonParsingException;
+import jakarta.servlet.http.HttpServletRequest;
+import jakarta.servlet.http.HttpServletResponse;
+import jakarta.ws.rs.*;
+import jakarta.ws.rs.container.ContainerRequestContext;
+import jakarta.ws.rs.core.*;
+import jakarta.ws.rs.core.Response.Status;
+import org.apache.commons.lang3.StringUtils;
+import org.glassfish.jersey.media.multipart.FormDataBodyPart;
+import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
+import org.glassfish.jersey.media.multipart.FormDataParam;
import java.io.IOException;
import java.io.InputStream;
@@ -117,48 +75,22 @@
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.LocalDateTime;
-import java.util.*;
-import java.util.concurrent.*;
-import java.util.function.Predicate;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
+import java.util.*;
import java.util.Map.Entry;
+import java.util.concurrent.ExecutionException;
+import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
-import jakarta.ejb.EJB;
-import jakarta.ejb.EJBException;
-import jakarta.inject.Inject;
-import jakarta.json.*;
-import jakarta.json.stream.JsonParsingException;
-import jakarta.servlet.http.HttpServletRequest;
-import jakarta.servlet.http.HttpServletResponse;
-import jakarta.ws.rs.BadRequestException;
-import jakarta.ws.rs.Consumes;
-import jakarta.ws.rs.DELETE;
-import jakarta.ws.rs.DefaultValue;
-import jakarta.ws.rs.GET;
-import jakarta.ws.rs.NotAcceptableException;
-import jakarta.ws.rs.POST;
-import jakarta.ws.rs.PUT;
-import jakarta.ws.rs.Path;
-import jakarta.ws.rs.PathParam;
-import jakarta.ws.rs.Produces;
-import jakarta.ws.rs.QueryParam;
-import jakarta.ws.rs.container.ContainerRequestContext;
-import jakarta.ws.rs.core.*;
-import jakarta.ws.rs.core.Response.Status;
+import static edu.harvard.iq.dataverse.api.ApiConstants.*;
+import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*;
+import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
-import org.apache.commons.lang3.StringUtils;
-import org.glassfish.jersey.media.multipart.FormDataBodyPart;
-import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
-import org.glassfish.jersey.media.multipart.FormDataParam;
-import com.amazonaws.services.s3.model.PartETag;
-import edu.harvard.iq.dataverse.settings.JvmSettings;
-
@Path("datasets")
public class Datasets extends AbstractApiBean {
@@ -268,12 +200,10 @@ public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id"
}, getRequestUser(crc));
}
- // TODO:
// This API call should, ideally, call findUserOrDie() and the GetDatasetCommand
// to obtain the dataset that we are trying to export - which would handle
// Auth in the process... For now, Auth isn't necessary - since export ONLY
// WORKS on published datasets, which are open to the world. -- L.A. 4.5
-
@GET
@Path("/export")
@Produces({"application/xml", "application/json", "application/html", "application/ld+json" })
@@ -470,14 +400,15 @@ public Response useDefaultCitationDate(@Context ContainerRequestContext crc, @Pa
@GET
@AuthRequired
@Path("{id}/versions")
- public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("includeFiles") Boolean includeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) {
+ public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("excludeFiles") Boolean excludeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) {
return response( req -> {
Dataset dataset = findDatasetOrDie(id);
+ Boolean deepLookup = excludeFiles == null ? true : !excludeFiles;
- return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit, (includeFiles == null ? true : includeFiles)) )
+ return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit, deepLookup) )
.stream()
- .map( d -> json(d, includeFiles == null ? true : includeFiles) )
+ .map( d -> json(d, deepLookup) )
.collect(toJsonArray()));
}, getRequestUser(crc));
}
@@ -488,21 +419,27 @@ public Response listVersions(@Context ContainerRequestContext crc, @PathParam("i
public Response getVersion(@Context ContainerRequestContext crc,
@PathParam("id") String datasetId,
@PathParam("versionId") String versionId,
- @QueryParam("includeFiles") Boolean includeFiles,
+ @QueryParam("excludeFiles") Boolean excludeFiles,
@QueryParam("includeDeaccessioned") boolean includeDeaccessioned,
@Context UriInfo uriInfo,
@Context HttpHeaders headers) {
return response( req -> {
- DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned);
+
+
+ //If excludeFiles is null the default is to provide the files and because of this we need to check permissions.
+ boolean checkPerms = excludeFiles == null ? true : !excludeFiles;
+
+ Dataset dst = findDatasetOrDie(datasetId);
+ DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, dst, uriInfo, headers, includeDeaccessioned, checkPerms);
if (dsv == null || dsv.getId() == null) {
return notFound("Dataset version not found");
}
- if (includeFiles == null ? true : includeFiles) {
+ if (excludeFiles == null ? true : !excludeFiles) {
dsv = datasetversionService.findDeep(dsv.getId());
}
- return ok(json(dsv, includeFiles == null ? true : includeFiles));
+ return ok(json(dsv, excludeFiles == null ? true : !excludeFiles));
}, getRequestUser(crc));
}
@@ -543,7 +480,8 @@ public Response getVersionFiles(@Context ContainerRequestContext crc,
} catch (IllegalArgumentException e) {
return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus)));
}
- return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)));
+ return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)),
+ datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria));
}, getRequestUser(crc));
}
@@ -840,7 +778,7 @@ public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @
@Path("{id}/metadata")
@Produces("application/ld+json, application/json-ld")
public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers) {
- return getVersionJsonLDMetadata(crc, id, DS_VERSION_DRAFT, uriInfo, headers);
+ return getVersionJsonLDMetadata(crc, id, DS_VERSION_LATEST, uriInfo, headers);
}
@PUT
@@ -1971,6 +1909,22 @@ public Response getDatasetThumbnail(@PathParam("id") String idSupplied) {
}
}
+ @GET
+ @Produces({ "image/png" })
+ @Path("{id}/logo")
+ public Response getDatasetLogo(@PathParam("id") String idSupplied) {
+ try {
+ Dataset dataset = findDatasetOrDie(idSupplied);
+ InputStream is = DatasetUtil.getLogoAsInputStream(dataset);
+ if (is == null) {
+ return notFound("Logo not available");
+ }
+ return Response.ok(is).build();
+ } catch (WrappedResponse wr) {
+ return notFound("Logo not available");
+ }
+ }
+
// TODO: Rather than only supporting looking up files by their database IDs (dataFileIdSupplied), consider supporting persistent identifiers.
@POST
@AuthRequired
@@ -2753,16 +2707,31 @@ public static T handleVersion(String versionId, DsVersionHandler hdl)
}
}
+ /*
+ * includeDeaccessioned default to false and checkPermsWhenDeaccessioned to false. Use it only when you are sure that the you don't need to work with
+ * a deaccessioned dataset.
+ */
private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse {
- return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, false);
+ //The checkPerms was added to check the permissions ONLY when the dataset is deaccessioned.
+ return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, false, false);
+ }
+
+ /*
+ * checkPermsWhenDeaccessioned default to true. Be aware that the version will be only be obtainable if the user has edit permissions.
+ */
+ private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned) throws WrappedResponse{
+ return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, includeDeaccessioned, true);
}
- private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned) throws WrappedResponse {
+ /*
+ * Will allow to define when the permissions should be checked when a deaccesioned dataset is requested. If the user doesn't have edit permissions will result in an error.
+ */
+ private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) throws WrappedResponse {
DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler>() {
@Override
public Command handleLatest() {
- return new GetLatestAccessibleDatasetVersionCommand(req, ds, includeDeaccessioned);
+ return new GetLatestAccessibleDatasetVersionCommand(req, ds, includeDeaccessioned, checkPermsWhenDeaccessioned);
}
@Override
@@ -2772,12 +2741,12 @@ public Command handleDraft() {
@Override
public Command handleSpecific(long major, long minor) {
- return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor, includeDeaccessioned);
+ return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor, includeDeaccessioned, checkPermsWhenDeaccessioned);
}
@Override
public Command handleLatestPublished() {
- return new GetLatestPublishedDatasetVersionCommand(req, ds, includeDeaccessioned);
+ return new GetLatestPublishedDatasetVersionCommand(req, ds, includeDeaccessioned, checkPermsWhenDeaccessioned);
}
}));
if (dsv == null || dsv.getId() == null) {
@@ -3442,15 +3411,246 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam("
}
+/****************************
+ * Globus Support Section:
+ *
+ * Globus transfer in (upload) and out (download) involve three basic steps: The
+ * app is launched and makes a callback to the
+ * globusUploadParameters/globusDownloadParameters method to get all of the info
+ * needed to set up it's display.
+ *
+ * At some point after that, the user will make a selection as to which files to
+ * transfer and the app will call requestGlobusUploadPaths/requestGlobusDownload
+ * to indicate a transfer is about to start. In addition to providing the
+ * details of where to transfer the files to/from, Dataverse also grants the
+ * Globus principal involved the relevant rw or r permission for the dataset.
+ *
+ * Once the transfer is started, the app records the task id and sends it to
+ * Dataverse in the addGlobusFiles/monitorGlobusDownload call. Dataverse then
+ * monitors the transfer task and when it ultimately succeeds for fails it
+ * revokes the principal's permission and, for the transfer in case, adds the
+ * files to the dataset. (The dataset is locked until the transfer completes.)
+ *
+ * (If no transfer is started within a specified timeout, permissions will
+ * automatically be revoked - see the GlobusServiceBean for details.)
+ *
+ * The option to reference a file at a remote endpoint (rather than transfer it)
+ * follows the first two steps of the process above but completes with a call to
+ * the normal /addFiles endpoint (as there is no transfer to monitor and the
+ * files can be added to the dataset immediately.)
+ */
+
+ /**
+ * Retrieve the parameters and signed URLs required to perform a globus
+ * transfer. This api endpoint is expected to be called as a signed callback
+ * after the globus-dataverse app/other app is launched, but it will accept
+ * other forms of authentication.
+ *
+ * @param crc
+ * @param datasetId
+ */
+ @GET
+ @AuthRequired
+ @Path("{id}/globusUploadParameters")
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+ @QueryParam(value = "locale") String locale) {
+ // -------------------------------------
+ // (1) Get the user from the ContainerRequestContext
+ // -------------------------------------
+ AuthenticatedUser authUser;
+ try {
+ authUser = getRequestAuthenticatedUserOrDie(crc);
+ } catch (WrappedResponse e) {
+ return e.getResponse();
+ }
+ // -------------------------------------
+ // (2) Get the Dataset Id
+ // -------------------------------------
+ Dataset dataset;
+
+ try {
+ dataset = findDatasetOrDie(datasetId);
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ String storeId = dataset.getEffectiveStorageDriverId();
+ // acceptsGlobusTransfers should only be true for an S3 or globus store
+ if (!GlobusAccessibleStore.acceptsGlobusTransfers(storeId)
+ && !GlobusAccessibleStore.allowsGlobusReferences(storeId)) {
+ return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled"));
+ }
+
+ URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
+
+ boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId);
+ String transferEndpoint = null;
+ JsonArray referenceEndpointsWithPaths = null;
+ if (managed) {
+ transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId);
+ } else {
+ referenceEndpointsWithPaths = GlobusAccessibleStore.getReferenceEndpointsWithPaths(storeId);
+ }
+
+ JsonObjectBuilder queryParams = Json.createObjectBuilder();
+ queryParams.add("queryParameters",
+ Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}"))
+ .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}"))
+ .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}"))
+ .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}"))
+ .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}")));
+ JsonObject substitutedParams = tokenUtil.getParams(queryParams.build());
+ JsonObjectBuilder params = Json.createObjectBuilder();
+ substitutedParams.keySet().forEach((key) -> {
+ params.add(key, substitutedParams.get(key));
+ });
+ params.add("managed", Boolean.toString(managed));
+ if (transferEndpoint != null) {
+ params.add("endpoint", transferEndpoint);
+ } else {
+ params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths);
+ }
+ int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class);
+ JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
+ String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths";
+ allowedApiCalls.add(
+ Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName).add(URLTokenUtil.HTTP_METHOD, "POST")
+ .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusUploadPaths")
+ .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+ if(managed) {
+ allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles")
+ .add(URLTokenUtil.HTTP_METHOD, "POST")
+ .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles")
+ .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+ } else {
+ allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addFiles")
+ .add(URLTokenUtil.HTTP_METHOD, "POST")
+ .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addFiles")
+ .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+ }
+ allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata")
+ .add(URLTokenUtil.HTTP_METHOD, "GET")
+ .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
+ .add(URLTokenUtil.TIMEOUT, 5));
+ allowedApiCalls.add(
+ Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET")
+ .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
+ .add(URLTokenUtil.TIMEOUT, 5));
+
+ return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build()));
+ }
+
+ /**
+ * Provides specific storageIdentifiers to use for each file amd requests permissions for a given globus user to upload to the dataset
+ *
+ * @param crc
+ * @param datasetId
+ * @param jsonData - an object that must include the id of the globus "principal" involved and the "numberOfFiles" that will be transferred.
+ * @return
+ * @throws IOException
+ * @throws ExecutionException
+ * @throws InterruptedException
+ */
+ @POST
+ @AuthRequired
+ @Path("{id}/requestGlobusUploadPaths")
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+ String jsonBody) throws IOException, ExecutionException, InterruptedException {
+
+ logger.info(" ==== (api allowGlobusUpload) jsonBody ====== " + jsonBody);
+
+ if (!systemConfig.isGlobusUpload()) {
+ return error(Response.Status.SERVICE_UNAVAILABLE,
+ BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
+ }
+
+ // -------------------------------------
+ // (1) Get the user from the ContainerRequestContext
+ // -------------------------------------
+ AuthenticatedUser authUser;
+ try {
+ authUser = getRequestAuthenticatedUserOrDie(crc);
+ } catch (WrappedResponse e) {
+ return e.getResponse();
+ }
+
+ // -------------------------------------
+ // (2) Get the Dataset Id
+ // -------------------------------------
+ Dataset dataset;
+
+ try {
+ dataset = findDatasetOrDie(datasetId);
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset)
+ .canIssue(UpdateDatasetVersionCommand.class)) {
+
+ JsonObject params = JsonUtil.getJsonObject(jsonBody);
+ if (!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) {
+ try {
+ JsonArray referencedFiles = params.getJsonArray("referencedFiles");
+ if (referencedFiles == null || referencedFiles.size() == 0) {
+ return badRequest("No referencedFiles specified");
+ }
+ JsonObject fileMap = globusService.requestReferenceFileIdentifiers(dataset, referencedFiles);
+ return (ok(fileMap));
+ } catch (Exception e) {
+ return badRequest(e.getLocalizedMessage());
+ }
+ } else {
+ try {
+ String principal = params.getString("principal");
+ int numberOfPaths = params.getInt("numberOfFiles");
+ if (numberOfPaths <= 0) {
+ return badRequest("numberOfFiles must be positive");
+ }
+
+ JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths);
+ switch (response.getInt("status")) {
+ case 201:
+ return ok(response.getJsonObject("paths"));
+ case 400:
+ return badRequest("Unable to grant permission");
+ case 409:
+ return conflict("Permission already exists");
+ default:
+ return error(null, "Unexpected error when granting permission");
+ }
+
+ } catch (NullPointerException | ClassCastException e) {
+ return badRequest("Error retrieving principal and numberOfFiles from JSON request body");
+
+ }
+ }
+ } else {
+ return forbidden("User doesn't have permission to upload to this dataset");
+ }
+
+ }
+
+ /** A method analogous to /addFiles that must also include the taskIdentifier of the transfer-in-progress to monitor
+ *
+ * @param crc
+ * @param datasetId
+ * @param jsonData - see /addFiles documentation, aditional "taskIdentifier" key in the main object is required.
+ * @param uriInfo
+ * @return
+ * @throws IOException
+ * @throws ExecutionException
+ * @throws InterruptedException
+ */
@POST
@AuthRequired
- @Path("{id}/addglobusFiles")
+ @Path("{id}/addGlobusFiles")
@Consumes(MediaType.MULTIPART_FORM_DATA)
public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
@PathParam("id") String datasetId,
@FormDataParam("jsonData") String jsonData,
- @Context UriInfo uriInfo,
- @Context HttpHeaders headers
+ @Context UriInfo uriInfo
) throws IOException, ExecutionException, InterruptedException {
logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData);
@@ -3480,6 +3680,15 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
} catch (WrappedResponse wr) {
return wr.getResponse();
}
+
+ JsonObject jsonObject = null;
+ try {
+ jsonObject = JsonUtil.getJsonObject(jsonData);
+ } catch (Exception ex) {
+ logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage());
+ return badRequest("Error parsing json body");
+
+ }
//------------------------------------
// (2b) Make sure dataset does not have package file
@@ -3510,32 +3719,279 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc,
}
- String requestUrl = headers.getRequestHeader("origin").get(0);
+ String requestUrl = SystemConfig.getDataverseSiteUrlStatic();
+
+ // Async Call
+ globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser);
+
+ return ok("Async call to Globus Upload started ");
+
+ }
+
+/**
+ * Retrieve the parameters and signed URLs required to perform a globus
+ * transfer/download. This api endpoint is expected to be called as a signed
+ * callback after the globus-dataverse app/other app is launched, but it will
+ * accept other forms of authentication.
+ *
+ * @param crc
+ * @param datasetId
+ * @param locale
+ * @param downloadId - an id to a cached object listing the files involved. This is generated via Dataverse and provided to the dataverse-globus app in a signedURL.
+ * @return - JSON containing the parameters and URLs needed by the dataverse-globus app. The format is analogous to that for external tools.
+ */
+ @GET
+ @AuthRequired
+ @Path("{id}/globusDownloadParameters")
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+ @QueryParam(value = "locale") String locale, @QueryParam(value = "downloadId") String downloadId) {
+ // -------------------------------------
+ // (1) Get the user from the ContainerRequestContext
+ // -------------------------------------
+ AuthenticatedUser authUser;
+ try {
+ authUser = getRequestAuthenticatedUserOrDie(crc);
+ } catch (WrappedResponse e) {
+ return e.getResponse();
+ }
+ // -------------------------------------
+ // (2) Get the Dataset Id
+ // -------------------------------------
+ Dataset dataset;
+
+ try {
+ dataset = findDatasetOrDie(datasetId);
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ String storeId = dataset.getEffectiveStorageDriverId();
+ // acceptsGlobusTransfers should only be true for an S3 or globus store
+ if (!(GlobusAccessibleStore.acceptsGlobusTransfers(storeId)
+ || GlobusAccessibleStore.allowsGlobusReferences(storeId))) {
+ return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
+ }
- if(requestUrl.contains("localhost")){
- requestUrl = "http://localhost:8080";
+ JsonObject files = globusService.getFilesForDownload(downloadId);
+ if (files == null) {
+ return notFound(BundleUtil.getStringFromBundle("datasets.api.globusdownloadnotfound"));
}
- // Async Call
- globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser);
+ URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale);
- return ok("Async call to Globus Upload started ");
+ boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId);
+ String transferEndpoint = null;
+ JsonObjectBuilder queryParams = Json.createObjectBuilder();
+ queryParams.add("queryParameters",
+ Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}"))
+ .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}"))
+ .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}"))
+ .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}"))
+ .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}")));
+ JsonObject substitutedParams = tokenUtil.getParams(queryParams.build());
+ JsonObjectBuilder params = Json.createObjectBuilder();
+ substitutedParams.keySet().forEach((key) -> {
+ params.add(key, substitutedParams.get(key));
+ });
+ params.add("managed", Boolean.toString(managed));
+ if (managed) {
+ transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId);
+ params.add("endpoint", transferEndpoint);
+ }
+ params.add("files", files);
+ int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class);
+ JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder();
+ allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "monitorGlobusDownload")
+ .add(URLTokenUtil.HTTP_METHOD, "POST")
+ .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/monitorGlobusDownload")
+ .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+ allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusDownload")
+ .add(URLTokenUtil.HTTP_METHOD, "POST")
+ .add(URLTokenUtil.URL_TEMPLATE,
+ "/api/v1/datasets/{datasetId}/requestGlobusDownload?downloadId=" + downloadId)
+ .add(URLTokenUtil.TIMEOUT, timeoutSeconds));
+ allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata")
+ .add(URLTokenUtil.HTTP_METHOD, "GET")
+ .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}")
+ .add(URLTokenUtil.TIMEOUT, 5));
+ allowedApiCalls.add(
+ Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET")
+ .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files")
+ .add(URLTokenUtil.TIMEOUT, 5));
+
+ return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build()));
}
+ /**
+ * Requests permissions for a given globus user to download the specified files
+ * the dataset and returns information about the paths to transfer from.
+ *
+ * When called directly rather than in response to being given a downloadId, the jsonData can include a "fileIds" key with an array of file ids to transfer.
+ *
+ * @param crc
+ * @param datasetId
+ * @param jsonData - a JSON object that must include the id of the Globus "principal" that will be transferring the files in the case where Dataverse manages the Globus endpoint. For remote endpoints, the principal is not required.
+ * @return - a JSON object containing a map of file ids to Globus endpoint/path
+ * @throws IOException
+ * @throws ExecutionException
+ * @throws InterruptedException
+ */
@POST
@AuthRequired
- @Path("{id}/deleteglobusRule")
- @Consumes(MediaType.MULTIPART_FORM_DATA)
- public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,@FormDataParam("jsonData") String jsonData
- ) throws IOException, ExecutionException, InterruptedException {
+ @Path("{id}/requestGlobusDownload")
+ @Consumes(MediaType.APPLICATION_JSON)
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response requestGlobusDownload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+ @QueryParam(value = "downloadId") String downloadId, String jsonBody)
+ throws IOException, ExecutionException, InterruptedException {
+ logger.info(" ==== (api allowGlobusDownload) jsonBody ====== " + jsonBody);
- logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData);
+ if (!systemConfig.isGlobusDownload()) {
+ return error(Response.Status.SERVICE_UNAVAILABLE,
+ BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
+ }
+ // -------------------------------------
+ // (1) Get the user from the ContainerRequestContext
+ // -------------------------------------
+ User user = getRequestUser(crc);
- if (!systemConfig.isHTTPUpload()) {
- return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled"));
+ // -------------------------------------
+ // (2) Get the Dataset Id
+ // -------------------------------------
+ Dataset dataset;
+
+ try {
+ dataset = findDatasetOrDie(datasetId);
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ JsonObject body = null;
+ if (jsonBody != null) {
+ body = JsonUtil.getJsonObject(jsonBody);
+ }
+ Set fileIds = null;
+ if (downloadId != null) {
+ JsonObject files = globusService.getFilesForDownload(downloadId);
+ if (files != null) {
+ fileIds = files.keySet();
+ }
+ } else {
+ if ((body!=null) && body.containsKey("fileIds")) {
+ Collection fileVals = body.getJsonArray("fileIds").getValuesAs(JsonValue.class);
+ fileIds = new HashSet(fileVals.size());
+ for (JsonValue fileVal : fileVals) {
+ String id = null;
+ switch (fileVal.getValueType()) {
+ case STRING:
+ id = ((JsonString) fileVal).getString();
+ break;
+ case NUMBER:
+ id = ((JsonNumber) fileVal).toString();
+ break;
+ default:
+ return badRequest("fileIds must be numeric or string (ids/PIDs)");
+ }
+ ;
+ fileIds.add(id);
+ }
+ } else {
+ return badRequest("fileIds JsonArray of file ids/PIDs required in POST body");
+ }
+ }
+
+ if (fileIds.isEmpty()) {
+ return notFound(BundleUtil.getStringFromBundle("datasets.api.globusdownloadnotfound"));
+ }
+ ArrayList dataFiles = new ArrayList(fileIds.size());
+ for (String id : fileIds) {
+ boolean published = false;
+ logger.info("File id: " + id);
+
+ DataFile df = null;
+ try {
+ df = findDataFileOrDie(id);
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ if (!df.getOwner().equals(dataset)) {
+ return badRequest("All files must be in the dataset");
+ }
+ dataFiles.add(df);
+
+ for (FileMetadata fm : df.getFileMetadatas()) {
+ if (fm.getDatasetVersion().isPublished()) {
+ published = true;
+ break;
+ }
+ }
+
+ if (!published) {
+ // If the file is not published, they can still download the file, if the user
+ // has the permission to view unpublished versions:
+
+ if (!permissionService.hasPermissionsFor(user, df.getOwner(),
+ EnumSet.of(Permission.ViewUnpublishedDataset))) {
+ return forbidden("User doesn't have permission to download file: " + id);
+ }
+ } else { // published and restricted and/or embargoed
+ if (df.isRestricted() || FileUtil.isActivelyEmbargoed(df))
+ // This line also handles all three authenticated session user, token user, and
+ // guest cases.
+ if (!permissionService.hasPermissionsFor(user, df, EnumSet.of(Permission.DownloadFile))) {
+ return forbidden("User doesn't have permission to download file: " + id);
+ }
+
+ }
+ }
+ // Allowed to download all requested files
+ JsonObject files = GlobusUtil.getFilesMap(dataFiles, dataset);
+ if (GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) {
+ // If managed, give the principal read permissions
+ int status = globusService.setPermissionForDownload(dataset, body.getString("principal"));
+ switch (status) {
+ case 201:
+ return ok(files);
+ case 400:
+ return badRequest("Unable to grant permission");
+ case 409:
+ return conflict("Permission already exists");
+ default:
+ return error(null, "Unexpected error when granting permission");
+ }
+
+ }
+
+ return ok(files);
+ }
+
+ /**
+ * Monitors a globus download and removes permissions on the dir/dataset when
+ * the specified transfer task is completed.
+ *
+ * @param crc
+ * @param datasetId
+ * @param jsonData - a JSON Object containing the key "taskIdentifier" with the
+ * Globus task to monitor.
+ * @return
+ * @throws IOException
+ * @throws ExecutionException
+ * @throws InterruptedException
+ */
+ @POST
+ @AuthRequired
+ @Path("{id}/monitorGlobusDownload")
+ @Consumes(MediaType.APPLICATION_JSON)
+ public Response monitorGlobusDownload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,
+ String jsonData) throws IOException, ExecutionException, InterruptedException {
+
+ logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData);
+
+ if (!systemConfig.isGlobusDownload()) {
+ return error(Response.Status.SERVICE_UNAVAILABLE,
+ BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled"));
}
// -------------------------------------
@@ -3562,7 +4018,6 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara
}
-
/**
* Add multiple Files to an existing Dataset
*
@@ -3574,9 +4029,8 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara
@AuthRequired
@Path("{id}/addFiles")
@Consumes(MediaType.MULTIPART_FORM_DATA)
- public Response addFilesToDataset(@Context ContainerRequestContext crc,
- @PathParam("id") String idSupplied,
- @FormDataParam("jsonData") String jsonData) {
+ public Response addFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied,
+ @FormDataParam("jsonData") String jsonData) {
if (!systemConfig.isHTTPUpload()) {
return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled"));
@@ -3784,7 +4238,7 @@ public Response getDatasetVersionArchivalStatus(@Context ContainerRequestContext
headers);
if (dsv.getArchivalCopyLocation() == null) {
- return error(Status.NO_CONTENT, "This dataset version has not been archived");
+ return error(Status.NOT_FOUND, "This dataset version has not been archived");
} else {
JsonObject status = JsonUtil.getJsonObject(dsv.getArchivalCopyLocation());
return ok(status);
@@ -3930,13 +4384,10 @@ public Response getExternalToolDVParams(@Context ContainerRequestContext crc,
}
ApiToken apiToken = null;
User u = getRequestUser(crc);
- if (u instanceof AuthenticatedUser) {
- apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u);
- }
-
+ apiToken = authSvc.getValidApiTokenForUser(u);
- ExternalToolHandler eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale);
- return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters()))));
+ URLTokenUtil eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale);
+ return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls())));
} catch (WrappedResponse wr) {
return wr.getResponse();
}
@@ -3995,9 +4446,14 @@ public Response getPrivateUrlDatasetVersionCitation(@PathParam("privateUrlToken"
@GET
@AuthRequired
@Path("{id}/versions/{versionId}/citation")
- public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) {
+ public Response getDatasetVersionCitation(@Context ContainerRequestContext crc,
+ @PathParam("id") String datasetId,
+ @PathParam("versionId") String versionId,
+ @QueryParam("includeDeaccessioned") boolean includeDeaccessioned,
+ @Context UriInfo uriInfo,
+ @Context HttpHeaders headers) {
return response(req -> ok(
- getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getCitation(true, false)), getRequestUser(crc));
+ getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned, false).getCitation(true, false)), getRequestUser(crc));
}
@POST
@@ -4008,7 +4464,7 @@ public Response deaccessionDataset(@Context ContainerRequestContext crc, @PathPa
return badRequest(BundleUtil.getStringFromBundle("datasets.api.deaccessionDataset.invalid.version.identifier.error", List.of(DS_VERSION_LATEST_PUBLISHED)));
}
return response(req -> {
- DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false);
+ DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers);
try {
JsonObject jsonObject = JsonUtil.getJsonObject(jsonBody);
datasetVersion.setVersionNote(jsonObject.getString("deaccessionReason"));
@@ -4132,4 +4588,19 @@ public Response getUserPermissionsOnDataset(@Context ContainerRequestContext crc
jsonObjectBuilder.add("canDeleteDatasetDraft", permissionService.userOn(requestUser, dataset).has(Permission.DeleteDatasetDraft));
return ok(jsonObjectBuilder);
}
+
+ @GET
+ @AuthRequired
+ @Path("{id}/versions/{versionId}/canDownloadAtLeastOneFile")
+ public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext crc,
+ @PathParam("id") String datasetId,
+ @PathParam("versionId") String versionId,
+ @QueryParam("includeDeaccessioned") boolean includeDeaccessioned,
+ @Context UriInfo uriInfo,
+ @Context HttpHeaders headers) {
+ return response(req -> {
+ DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned);
+ return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion));
+ }, getRequestUser(crc));
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
index d0711aefa5f..6c1bf42c02a 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java
@@ -41,9 +41,13 @@
import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand;
import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand;
import edu.harvard.iq.dataverse.engine.command.impl.CreateRoleCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.DeleteCollectionQuotaCommand;
import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand;
import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand;
import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetSchemaCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionQuotaCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionStorageUseCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand;
import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand;
import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand;
@@ -63,11 +67,13 @@
import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand;
import edu.harvard.iq.dataverse.engine.command.impl.RemoveRoleAssigneesFromExplicitGroupCommand;
import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.SetCollectionQuotaCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseDefaultContributorRoleCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateExplicitGroupCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetsCommand;
+import edu.harvard.iq.dataverse.engine.command.impl.ValidateDatasetJsonCommand;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
@@ -126,7 +132,6 @@
import java.util.Optional;
import java.util.stream.Collectors;
import jakarta.servlet.http.HttpServletResponse;
-import jakarta.validation.constraints.NotNull;
import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.Context;
import jakarta.ws.rs.core.StreamingOutput;
@@ -232,6 +237,40 @@ public Response addDataverse(@Context ContainerRequestContext crc, String body,
}
}
+
+ @POST
+ @AuthRequired
+ @Path("{identifier}/validateDatasetJson")
+ @Consumes("application/json")
+ public Response validateDatasetJson(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String idtf) {
+ User u = getRequestUser(crc);
+ try {
+ String validationMessage = execCommand(new ValidateDatasetJsonCommand(createDataverseRequest(u), findDataverseOrDie(idtf), body));
+ return ok(validationMessage);
+ } catch (WrappedResponse ex) {
+ Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex);
+ return ex.getResponse();
+ }
+ }
+
+ @GET
+ @AuthRequired
+ @Path("{identifier}/datasetSchema")
+ @Produces(MediaType.APPLICATION_JSON)
+ public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf) {
+ User u = getRequestUser(crc);
+
+ try {
+ String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf)));
+ JsonObject jsonObject = JsonUtil.getJsonObject(datasetSchema);
+ return Response.ok(jsonObject).build();
+ } catch (WrappedResponse ex) {
+ Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex);
+ return ex.getResponse();
+ }
+ }
+
+
@POST
@AuthRequired
@@ -937,7 +976,62 @@ public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam(
execCommand(new GetDataverseStorageSizeCommand(req, findDataverseOrDie(dvIdtf), includeCached)))), getRequestUser(crc));
}
+ @GET
+ @AuthRequired
+ @Path("{identifier}/storage/quota")
+ public Response getCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) throws WrappedResponse {
+ try {
+ Long bytesAllocated = execCommand(new GetCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf)));
+ if (bytesAllocated != null) {
+ return ok(MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.storage.quota.allocation"),bytesAllocated));
+ }
+ return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.notdefined"));
+ } catch (WrappedResponse ex) {
+ return ex.getResponse();
+ }
+ }
+
+ @POST
+ @AuthRequired
+ @Path("{identifier}/storage/quota/{bytesAllocated}")
+ public Response setCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @PathParam("bytesAllocated") Long bytesAllocated) throws WrappedResponse {
+ try {
+ execCommand(new SetCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf), bytesAllocated));
+ return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.updated"));
+ } catch (WrappedResponse ex) {
+ return ex.getResponse();
+ }
+ }
+
+ @DELETE
+ @AuthRequired
+ @Path("{identifier}/storage/quota")
+ public Response deleteCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) throws WrappedResponse {
+ try {
+ execCommand(new DeleteCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf)));
+ return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.deleted"));
+ } catch (WrappedResponse ex) {
+ return ex.getResponse();
+ }
+ }
+ /**
+ *
+ * @param crc
+ * @param identifier
+ * @return
+ * @throws edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse
+ * @todo: add an optional parameter that would force the recorded storage use
+ * to be recalculated (or should that be a POST version of this API?)
+ */
+ @GET
+ @AuthRequired
+ @Path("{identifier}/storage/use")
+ public Response getCollectionStorageUse(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier) throws WrappedResponse {
+ return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.storage.use"),
+ execCommand(new GetCollectionStorageUseCommand(req, findDataverseOrDie(identifier))))), getRequestUser(crc));
+ }
+
@GET
@AuthRequired
@Path("{identifier}/roles")
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
index af681234e82..89b22b76a7d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java
@@ -103,8 +103,10 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
String auxiliaryTag = null;
String auxiliaryType = null;
String auxiliaryFileName = null;
+
// Before we do anything else, check if this download can be handled
// by a redirect to remote storage (only supported on S3, as of 5.4):
+
if (storageIO.downloadRedirectEnabled()) {
// Even if the above is true, there are a few cases where a
@@ -158,7 +160,7 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
}
} else if (dataFile.isTabularData()) {
- // Many separate special cases here.
+ // Many separate special cases here.
if (di.getConversionParam() != null) {
if (di.getConversionParam().equals("format")) {
@@ -179,12 +181,26 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
redirectSupported = false;
}
}
- } else if (!di.getConversionParam().equals("noVarHeader")) {
- // This is a subset request - can't do.
+ } else if (di.getConversionParam().equals("noVarHeader")) {
+ // This will work just fine, if the tab. file is
+ // stored without the var. header. Throw "unavailable"
+ // exception otherwise.
+ // @todo: should we actually drop support for this "noVarHeader" flag?
+ if (dataFile.getDataTable().isStoredWithVariableHeader()) {
+ throw new ServiceUnavailableException();
+ }
+ // ... defaults to redirectSupported = true
+ } else {
+ // This must be a subset request then - can't do.
+ redirectSupported = false;
+ }
+ } else {
+ // "straight" download of the full tab-delimited file.
+ // can redirect, but only if stored with the variable
+ // header already added:
+ if (!dataFile.getDataTable().isStoredWithVariableHeader()) {
redirectSupported = false;
}
- } else {
- redirectSupported = false;
}
}
}
@@ -206,14 +222,15 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
redirect_url_str = null;
}
}
-
- if (systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList()
- .contains(DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier()))) {
+ String driverId = DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier());
+ if (systemConfig.isGlobusFileDownload() && (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId))) {
if (di.getConversionParam() != null) {
if (di.getConversionParam().equals("format")) {
if ("GlobusTransfer".equals(di.getConversionParamValue())) {
- redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, dataFile);
+ List downloadDFList = new ArrayList(1);
+ downloadDFList.add(dataFile);
+ redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadDFList);
}
}
}
@@ -245,11 +262,16 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
// finally, issue the redirect:
Response response = Response.seeOther(redirect_uri).build();
logger.fine("Issuing redirect to the file location.");
+ // Yes, this throws an exception. It's not an exception
+ // as in, "bummer, something went wrong". This is how a
+ // redirect is produced here!
throw new RedirectionException(response);
}
throw new ServiceUnavailableException();
}
+ // Past this point, this is a locally served/streamed download
+
if (di.getConversionParam() != null) {
// Image Thumbnail and Tabular data conversion:
// NOTE: only supported on local files, as of 4.0.2!
@@ -283,9 +305,14 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
// request any tabular-specific services.
if (di.getConversionParam().equals("noVarHeader")) {
- logger.fine("tabular data with no var header requested");
- storageIO.setNoVarHeader(Boolean.TRUE);
- storageIO.setVarHeader(null);
+ if (!dataFile.getDataTable().isStoredWithVariableHeader()) {
+ logger.fine("tabular data with no var header requested");
+ storageIO.setNoVarHeader(Boolean.TRUE);
+ storageIO.setVarHeader(null);
+ } else {
+ logger.fine("can't serve request for tabular data without varheader, since stored with it");
+ throw new ServiceUnavailableException();
+ }
} else if (di.getConversionParam().equals("format")) {
// Conversions, and downloads of "stored originals" are
// now supported on all DataFiles for which StorageIO
@@ -327,11 +354,10 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
if (variable.getDataTable().getDataFile().getId().equals(dataFile.getId())) {
logger.fine("adding variable id " + variable.getId() + " to the list.");
variablePositionIndex.add(variable.getFileOrder());
- if (subsetVariableHeader == null) {
- subsetVariableHeader = variable.getName();
- } else {
- subsetVariableHeader = subsetVariableHeader.concat("\t");
- subsetVariableHeader = subsetVariableHeader.concat(variable.getName());
+ if (!dataFile.getDataTable().isStoredWithVariableHeader()) {
+ subsetVariableHeader = subsetVariableHeader == null
+ ? variable.getName()
+ : subsetVariableHeader.concat("\t" + variable.getName());
}
} else {
logger.warning("variable does not belong to this data file.");
@@ -344,7 +370,17 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
try {
File tempSubsetFile = File.createTempFile("tempSubsetFile", ".tmp");
TabularSubsetGenerator tabularSubsetGenerator = new TabularSubsetGenerator();
- tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), tempSubsetFile.getAbsolutePath(), variablePositionIndex, dataFile.getDataTable().getCaseQuantity(), "\t");
+
+ long numberOfLines = dataFile.getDataTable().getCaseQuantity();
+ if (dataFile.getDataTable().isStoredWithVariableHeader()) {
+ numberOfLines++;
+ }
+
+ tabularSubsetGenerator.subsetFile(storageIO.getInputStream(),
+ tempSubsetFile.getAbsolutePath(),
+ variablePositionIndex,
+ numberOfLines,
+ "\t");
if (tempSubsetFile.exists()) {
FileInputStream subsetStream = new FileInputStream(tempSubsetFile);
@@ -352,8 +388,11 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
InputStreamIO subsetStreamIO = new InputStreamIO(subsetStream, subsetSize);
logger.fine("successfully created subset output stream.");
- subsetVariableHeader = subsetVariableHeader.concat("\n");
- subsetStreamIO.setVarHeader(subsetVariableHeader);
+
+ if (subsetVariableHeader != null) {
+ subsetVariableHeader = subsetVariableHeader.concat("\n");
+ subsetStreamIO.setVarHeader(subsetVariableHeader);
+ }
String tabularFileName = storageIO.getFileName();
@@ -378,8 +417,13 @@ public void writeTo(DownloadInstance di, Class> clazz, Type type, Annotation[]
} else {
logger.fine("empty list of extra arguments.");
}
+ // end of tab. data subset case
+ } else if (dataFile.getDataTable().isStoredWithVariableHeader()) {
+ logger.fine("tabular file stored with the var header included, no need to generate it on the fly");
+ storageIO.setNoVarHeader(Boolean.TRUE);
+ storageIO.setVarHeader(null);
}
- }
+ } // end of tab. data file case
if (storageIO == null) {
//throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
index ad24d81d996..5d400ee1438 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java
@@ -49,6 +49,8 @@
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.URLTokenUtil;
+
import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json;
import edu.harvard.iq.dataverse.util.json.JsonUtil;
import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
@@ -814,19 +816,17 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P
return error(BAD_REQUEST, "External tool does not have file scope.");
}
ApiToken apiToken = null;
- User u = getRequestUser(crc);
- if (u instanceof AuthenticatedUser) {
- apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u);
- }
+ User user = getRequestUser(crc);
+ apiToken = authSvc.getValidApiTokenForUser(user);
FileMetadata target = fileSvc.findFileMetadata(fmid);
if (target == null) {
return error(BAD_REQUEST, "FileMetadata not found.");
}
- ExternalToolHandler eth = null;
+ URLTokenUtil eth = null;
eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale);
- return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters()))));
+ return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls())));
}
@GET
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
index 0652539b595..40ce6cd25b7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java
@@ -1,6 +1,5 @@
package edu.harvard.iq.dataverse.api;
-import edu.harvard.iq.dataverse.api.auth.AuthRequired;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -9,8 +8,6 @@
import jakarta.json.JsonValue;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
-import jakarta.ws.rs.container.ContainerRequestContext;
-import jakarta.ws.rs.core.Context;
import jakarta.ws.rs.core.Response;
@Path("info")
@@ -35,30 +32,27 @@ public Response getMaxEmbargoDurationInMonths() {
}
@GET
- @AuthRequired
@Path("version")
- public Response getInfo(@Context ContainerRequestContext crc) {
+ public Response getInfo() {
String versionStr = systemConfig.getVersion(true);
String[] comps = versionStr.split("build",2);
String version = comps[0].trim();
JsonValue build = comps.length > 1 ? Json.createArrayBuilder().add(comps[1].trim()).build().get(0) : JsonValue.NULL;
-
- return response( req -> ok( Json.createObjectBuilder().add("version", version)
- .add("build", build)), getRequestUser(crc));
+ return ok(Json.createObjectBuilder()
+ .add("version", version)
+ .add("build", build));
}
@GET
- @AuthRequired
@Path("server")
- public Response getServer(@Context ContainerRequestContext crc) {
- return response( req -> ok(JvmSettings.FQDN.lookup()), getRequestUser(crc));
+ public Response getServer() {
+ return ok(JvmSettings.FQDN.lookup());
}
@GET
- @AuthRequired
@Path("apiTermsOfUse")
- public Response getTermsOfUse(@Context ContainerRequestContext crc) {
- return response( req -> ok(systemConfig.getApiTermsOfUse()), getRequestUser(crc));
+ public Response getTermsOfUse() {
+ return ok(systemConfig.getApiTermsOfUse());
}
@GET
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
index 6b48dbf8415..b2696757220 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java
@@ -8,11 +8,11 @@
import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.util.SystemConfig;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
-import java.io.FileReader;
import java.io.IOException;
+import java.io.InputStream;
import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.List;
@@ -83,26 +83,21 @@ public Response sendDataToHub() {
@Path("{id}/addUsageMetricsFromSushiReport")
public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) {
- JsonObject report;
-
- try (FileReader reader = new FileReader(reportOnDisk)) {
- report = Json.createReader(reader).readObject();
- Dataset dataset;
- try {
- dataset = findDatasetOrDie(id);
- List datasetMetrics = datasetMetricsService.parseSushiReport(report, dataset);
- if (!datasetMetrics.isEmpty()) {
- for (DatasetMetrics dm : datasetMetrics) {
- datasetMetricsService.save(dm);
- }
+ try {
+ JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk);
+ Dataset dataset = findDatasetOrDie(id);
+ List datasetMetrics = datasetMetricsService.parseSushiReport(report, dataset);
+ if (!datasetMetrics.isEmpty()) {
+ for (DatasetMetrics dm : datasetMetrics) {
+ datasetMetricsService.save(dm);
}
- } catch (WrappedResponse ex) {
- Logger.getLogger(MakeDataCountApi.class.getName()).log(Level.SEVERE, null, ex);
- return error(Status.BAD_REQUEST, "Wrapped response: " + ex.getLocalizedMessage());
}
+ } catch (WrappedResponse ex) {
+ logger.log(Level.SEVERE, null, ex);
+ return error(Status.BAD_REQUEST, "Wrapped response: " + ex.getLocalizedMessage());
} catch (IOException ex) {
- System.out.print(ex.getMessage());
+ logger.log(Level.WARNING, ex.getMessage());
return error(Status.BAD_REQUEST, "IOException: " + ex.getLocalizedMessage());
}
String msg = "Dummy Data has been added to dataset " + id;
@@ -113,10 +108,8 @@ public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @Quer
@Path("/addUsageMetricsFromSushiReport")
public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) {
- JsonObject report;
-
- try (FileReader reader = new FileReader(reportOnDisk)) {
- report = Json.createReader(reader).readObject();
+ try {
+ JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk);
List datasetMetrics = datasetMetricsService.parseSushiReport(report, null);
if (!datasetMetrics.isEmpty()) {
@@ -126,7 +119,7 @@ public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @Q
}
} catch (IOException ex) {
- System.out.print(ex.getMessage());
+ logger.log(Level.WARNING, ex.getMessage());
return error(Status.BAD_REQUEST, "IOException: " + ex.getLocalizedMessage());
}
String msg = "Usage Metrics Data has been added to all datasets from file " + reportOnDisk;
@@ -135,7 +128,7 @@ public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @Q
@POST
@Path("{id}/updateCitationsForDataset")
- public Response updateCitationsForDataset(@PathParam("id") String id) throws MalformedURLException, IOException {
+ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOException {
try {
Dataset dataset = findDatasetOrDie(id);
String persistentId = dataset.getGlobalId().toString();
@@ -158,7 +151,10 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws Mal
logger.warning("Failed to get citations from " + url.toString());
return error(Status.fromStatusCode(status), "Failed to get citations from " + url.toString());
}
- JsonObject report = Json.createReader(connection.getInputStream()).readObject();
+ JsonObject report;
+ try (InputStream inStream = connection.getInputStream()) {
+ report = JsonUtil.getJsonObject(inStream);
+ }
JsonObject links = report.getJsonObject("links");
JsonArray data = report.getJsonArray("data");
Iterator iter = data.iterator();
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java
index c760534ca7b..71e2865ca4d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java
@@ -157,7 +157,9 @@ public Response search(
numResultsPerPage,
true, //SEK get query entities always for search API additional Dataset Information 6300 12/6/2019
geoPoint,
- geoRadius
+ geoRadius,
+ showFacets, // facets are expensive, no need to ask for them if not requested
+ showRelevance // no need for highlights unless requested either
);
} catch (SearchException ex) {
Throwable cause = ex;
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java
index 87be1f14e05..10510013495 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java
@@ -71,5 +71,31 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que
return wr.getResponse();
}
}
+
+ @Path("files/{id}/externalTool/{toolId}")
+ @GET
+ public Response getExternalToolForFileById(@PathParam("id") String idSupplied, @QueryParam("type") String typeSupplied, @PathParam("toolId") String toolId) {
+ ExternalTool.Type type;
+ try {
+ type = ExternalTool.Type.fromString(typeSupplied);
+ } catch (IllegalArgumentException ex) {
+ return error(BAD_REQUEST, ex.getLocalizedMessage());
+ }
+ try {
+ DataFile dataFile = findDataFileOrDie(idSupplied);
+ List datasetTools = externalToolService.findFileToolsByTypeAndContentType(type, dataFile.getContentType());
+ for (ExternalTool tool : datasetTools) {
+ ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey());
+ ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null);
+ JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler);
+ if (externalToolService.meetsRequirements(tool, dataFile) && tool.getId().toString().equals(toolId)) {
+ return ok(toolToJson);
+ }
+ }
+ return error(BAD_REQUEST, "Could not find external tool with id of " + toolId);
+ } catch (WrappedResponse wr) {
+ return wr.getResponse();
+ }
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java
index 05ba150df8e..add43ea2091 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java
@@ -100,7 +100,7 @@ public String datafile(@QueryParam("fileName") String fileName, @QueryParam("fil
TabularDataIngest tabDataIngest = null;
try {
- tabDataIngest = ingestPlugin.read(fileInputStream, null);
+ tabDataIngest = ingestPlugin.read(fileInputStream, false, null);
} catch (IOException ingestEx) {
output = output.concat("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage());
return output;
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java
index f8572144236..258661f6495 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java
@@ -3,7 +3,10 @@
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.users.ApiToken;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
import edu.harvard.iq.dataverse.authorization.users.User;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.util.UrlSignerUtil;
@@ -27,16 +30,18 @@ public class SignedUrlAuthMechanism implements AuthMechanism {
@Inject
protected AuthenticationServiceBean authSvc;
-
+ @Inject
+ protected PrivateUrlServiceBean privateUrlSvc;
+
@Override
public User findUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse {
String signedUrlRequestParameter = getSignedUrlRequestParameter(containerRequestContext);
if (signedUrlRequestParameter == null) {
return null;
}
- AuthenticatedUser authUser = getAuthenticatedUserFromSignedUrl(containerRequestContext);
- if (authUser != null) {
- return authUser;
+ User user = getAuthenticatedUserFromSignedUrl(containerRequestContext);
+ if (user != null) {
+ return user;
}
throw new WrappedAuthErrorResponse(RESPONSE_MESSAGE_BAD_SIGNED_URL);
}
@@ -45,8 +50,8 @@ private String getSignedUrlRequestParameter(ContainerRequestContext containerReq
return containerRequestContext.getUriInfo().getQueryParameters().getFirst(SIGNED_URL_TOKEN);
}
- private AuthenticatedUser getAuthenticatedUserFromSignedUrl(ContainerRequestContext containerRequestContext) {
- AuthenticatedUser authUser = null;
+ private User getAuthenticatedUserFromSignedUrl(ContainerRequestContext containerRequestContext) {
+ User user = null;
// The signedUrl contains a param telling which user this is supposed to be for.
// We don't trust this. So we lookup that user, and get their API key, and use
// that as a secret in validating the signedURL. If the signature can't be
@@ -54,17 +59,26 @@ private AuthenticatedUser getAuthenticatedUserFromSignedUrl(ContainerRequestCont
// we reject the request.
UriInfo uriInfo = containerRequestContext.getUriInfo();
String userId = uriInfo.getQueryParameters().getFirst(SIGNED_URL_USER);
- AuthenticatedUser targetUser = authSvc.getAuthenticatedUser(userId);
- ApiToken userApiToken = authSvc.findApiTokenByUser(targetUser);
+ User targetUser = null;
+ ApiToken userApiToken = null;
+ if (!userId.startsWith(PrivateUrlUser.PREFIX)) {
+ targetUser = authSvc.getAuthenticatedUser(userId);
+ userApiToken = authSvc.findApiTokenByUser((AuthenticatedUser) targetUser);
+ } else {
+ PrivateUrl privateUrl = privateUrlSvc.getPrivateUrlFromDatasetId(Long.parseLong(userId.substring(PrivateUrlUser.PREFIX.length())));
+ userApiToken = new ApiToken();
+ userApiToken.setTokenString(privateUrl.getToken());
+ targetUser = privateUrlSvc.getPrivateUrlUserFromToken(privateUrl.getToken());
+ }
if (targetUser != null && userApiToken != null) {
String signedUrl = URLDecoder.decode(uriInfo.getRequestUri().toString(), StandardCharsets.UTF_8);
String requestMethod = containerRequestContext.getMethod();
String signedUrlSigningKey = JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + userApiToken.getTokenString();
boolean isSignedUrlValid = UrlSignerUtil.isValidUrl(signedUrl, userId, requestMethod, signedUrlSigningKey);
if (isSignedUrlValid) {
- authUser = targetUser;
+ user = targetUser;
}
}
- return authUser;
+ return user;
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
index a878720cc39..3f5345d8e0d 100644
--- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
+++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java
@@ -11,7 +11,6 @@
import edu.harvard.iq.dataverse.PermissionServiceBean;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException;
-import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
import edu.harvard.iq.dataverse.engine.command.Command;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
@@ -19,6 +18,7 @@
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand;
import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.ConstraintViolationUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
@@ -307,9 +307,9 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au
try {
//CreateDataFileResult createDataFilesResponse = FileUtil.createDataFiles(editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, null, systemConfig);
- UserStorageQuota quota = null;
+ UploadSessionQuotaLimit quota = null;
if (systemConfig.isStorageQuotasEnforced()) {
- quota = dataFileService.getUserStorageQuota(user, dataset);
+ quota = dataFileService.getUploadSessionQuotaLimit(dataset);
}
Command cmd = new CreateNewDataFilesCommand(dvReq, editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, quota, null);
CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
index 496620cd6e8..1c0f5010059 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java
@@ -21,10 +21,14 @@
import edu.harvard.iq.dataverse.authorization.providers.shib.ShibAuthenticationProvider;
import edu.harvard.iq.dataverse.authorization.users.ApiToken;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
+import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailData;
import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailServiceBean;
import edu.harvard.iq.dataverse.passwordreset.PasswordResetData;
import edu.harvard.iq.dataverse.passwordreset.PasswordResetServiceBean;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean;
@@ -118,6 +122,9 @@ public class AuthenticationServiceBean {
@EJB
SavedSearchServiceBean savedSearchService;
+ @EJB
+ PrivateUrlServiceBean privateUrlService;
+
@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;
@@ -615,6 +622,7 @@ public AuthenticatedUser createAuthenticatedUser(UserRecordIdentifier userRecord
String identifier = internalUserIdentifier + i;
while ( identifierExists(identifier) ) {
i += 1;
+ identifier = internalUserIdentifier + i;
}
authenticatedUser.setUserIdentifier(identifier);
} else {
@@ -931,14 +939,45 @@ public List getWorkflowCommentsByAuthenticatedUser(Authenticat
return query.getResultList();
}
- public ApiToken getValidApiTokenForUser(AuthenticatedUser user) {
+ /**
+ * This method gets a valid api token for an AuthenticatedUser, creating a new
+ * token if one doesn't exist or if the token is expired.
+ *
+ * @param user
+ * @return
+ */
+ public ApiToken getValidApiTokenForAuthenticatedUser(AuthenticatedUser user) {
ApiToken apiToken = null;
apiToken = findApiTokenByUser(user);
- if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) {
+ if ((apiToken == null) || apiToken.isExpired()) {
logger.fine("Created apiToken for user: " + user.getIdentifier());
apiToken = generateApiTokenForUser(user);
}
return apiToken;
}
+ /**
+ * Gets a token for an AuthenticatedUser or a PrivateUrlUser. It will create a
+ * new token if needed for an AuthenticatedUser. Note that, for a PrivateUrlUser, this method creates a token
+ * with a temporary AuthenticateUser that only has a userIdentifier - needed in generating signed Urls.
+ * @param user
+ * @return a token or null (i.e. if the user is not an AuthenticatedUser or PrivateUrlUser)
+ */
+
+ public ApiToken getValidApiTokenForUser(User user) {
+ ApiToken apiToken = null;
+ if (user instanceof AuthenticatedUser) {
+ apiToken = getValidApiTokenForAuthenticatedUser((AuthenticatedUser) user);
+ } else if (user instanceof PrivateUrlUser) {
+ PrivateUrlUser privateUrlUser = (PrivateUrlUser) user;
+
+ PrivateUrl privateUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId());
+ apiToken = new ApiToken();
+ apiToken.setTokenString(privateUrl.getToken());
+ AuthenticatedUser au = new AuthenticatedUser();
+ au.setUserIdentifier(privateUrlUser.getIdentifier());
+ apiToken.setAuthenticatedUser(au);
+ }
+ return apiToken;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java
index 3cbfc3cdcac..b307c655798 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java
@@ -68,7 +68,8 @@
@NamedQuery( name="AuthenticatedUser.filter",
query="select au from AuthenticatedUser au WHERE ("
+ "LOWER(au.userIdentifier) like LOWER(:query) OR "
- + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query))"),
+ + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query) or "
+ + "lower(au.email) like lower(:query))"),
@NamedQuery( name="AuthenticatedUser.findAdminUser",
query="select au from AuthenticatedUser au WHERE "
+ "au.superuser = true "
diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java
index f64b5c301e7..03f018221fd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java
+++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java
@@ -12,7 +12,7 @@
*/
public class PrivateUrlUser implements User {
- public static final String PREFIX = "#";
+ public static final String PREFIX = "!";
/**
* In the future, this could probably be dvObjectId rather than datasetId,
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
new file mode 100644
index 00000000000..10ff68a56f3
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java
@@ -0,0 +1,344 @@
+package edu.harvard.iq.dataverse.dataaccess;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.channels.Channel;
+import java.nio.file.Path;
+import java.security.KeyManagementException;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
+import java.util.List;
+import java.util.function.Predicate;
+import java.util.logging.Logger;
+
+import javax.net.ssl.SSLContext;
+
+import org.apache.http.client.config.CookieSpecs;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.protocol.HttpClientContext;
+import org.apache.http.config.Registry;
+import org.apache.http.config.RegistryBuilder;
+import org.apache.http.conn.socket.ConnectionSocketFactory;
+import org.apache.http.conn.ssl.NoopHostnameVerifier;
+import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
+import org.apache.http.conn.ssl.TrustAllStrategy;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
+import org.apache.http.ssl.SSLContextBuilder;
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.DvObject;
+
+
+/**
+ * A base class for StorageIO implementations supporting remote access. At present, that includes the RemoteOverlayAccessIO store and the newer GlobusOverlayAccessIO store. It primarily includes
+ * common methods for handling auxiliary files in the configured base store.
+ * @param
+ */
+public abstract class AbstractRemoteOverlayAccessIO extends StorageIO {
+
+ protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
+ public static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths";
+ static final String BASE_STORE = "base-store";
+ protected static final String SECRET_KEY = "secret-key";
+ static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes";
+ protected static final String REMOTE_STORE_NAME = "remote-store-name";
+ protected static final String REMOTE_STORE_URL = "remote-store-url";
+
+ // Whether Dataverse can access the file bytes
+ // Currently False only for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits
+ static final String FILES_NOT_ACCESSIBLE_BY_DATAVERSE = "files-not-accessible-by-dataverse";
+
+ protected StorageIO baseStore = null;
+ protected String path = null;
+ protected PoolingHttpClientConnectionManager cm = null;
+ CloseableHttpClient httpclient = null;
+ protected static HttpClientContext localContext = HttpClientContext.create();
+
+ protected int timeout = 1200;
+ protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000)
+ .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000)
+ .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build();
+ protected static boolean trustCerts = false;
+ protected int httpConcurrency = 4;
+
+ public static String getBaseStoreIdFor(String driverId) {
+ return getConfigParamForDriver(driverId, BASE_STORE);
+ }
+
+ public AbstractRemoteOverlayAccessIO() {
+ super();
+ }
+
+ public AbstractRemoteOverlayAccessIO(String storageLocation, String driverId) {
+ super(storageLocation, driverId);
+ }
+
+ public AbstractRemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) {
+ super(dvObject, req, driverId);
+ }
+
+ @Override
+ public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException {
+ return baseStore.openAuxChannel(auxItemTag, options);
+ }
+
+ @Override
+ public boolean isAuxObjectCached(String auxItemTag) throws IOException {
+ return baseStore.isAuxObjectCached(auxItemTag);
+ }
+
+ @Override
+ public long getAuxObjectSize(String auxItemTag) throws IOException {
+ return baseStore.getAuxObjectSize(auxItemTag);
+ }
+
+ @Override
+ public Path getAuxObjectAsPath(String auxItemTag) throws IOException {
+ return baseStore.getAuxObjectAsPath(auxItemTag);
+ }
+
+ @Override
+ public void backupAsAux(String auxItemTag) throws IOException {
+ baseStore.backupAsAux(auxItemTag);
+ }
+
+ @Override
+ public void revertBackupAsAux(String auxItemTag) throws IOException {
+ baseStore.revertBackupAsAux(auxItemTag);
+ }
+
+ @Override
+ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException {
+ baseStore.savePathAsAux(fileSystemPath, auxItemTag);
+ }
+
+ @Override
+ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException {
+ baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize);
+ }
+
+ /**
+ * @param inputStream InputStream we want to save
+ * @param auxItemTag String representing this Auxiliary type ("extension")
+ * @throws IOException if anything goes wrong.
+ */
+ @Override
+ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException {
+ baseStore.saveInputStreamAsAux(inputStream, auxItemTag);
+ }
+
+ @Override
+ public List listAuxObjects() throws IOException {
+ return baseStore.listAuxObjects();
+ }
+
+ @Override
+ public void deleteAuxObject(String auxItemTag) throws IOException {
+ baseStore.deleteAuxObject(auxItemTag);
+ }
+
+ @Override
+ public void deleteAllAuxObjects() throws IOException {
+ baseStore.deleteAllAuxObjects();
+ }
+
+ @Override
+ public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException {
+ return baseStore.getAuxFileAsInputStream(auxItemTag);
+ }
+
+ protected int getUrlExpirationMinutes() {
+ String optionValue = getConfigParam(URL_EXPIRATION_MINUTES);
+ if (optionValue != null) {
+ Integer num;
+ try {
+ num = Integer.parseInt(optionValue);
+ } catch (NumberFormatException ex) {
+ num = null;
+ }
+ if (num != null) {
+ return num;
+ }
+ }
+ return 60;
+ }
+
+ public CloseableHttpClient getSharedHttpClient() {
+ if (httpclient == null) {
+ try {
+ initHttpPool();
+ httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build();
+
+ } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) {
+ logger.warning(ex.getMessage());
+ }
+ }
+ return httpclient;
+ }
+
+ private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException {
+ if (trustCerts) {
+ // use the TrustSelfSignedStrategy to allow Self Signed Certificates
+ SSLContext sslContext;
+ SSLConnectionSocketFactory connectionFactory;
+
+ sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build();
+ // create an SSL Socket Factory to use the SSLContext with the trust self signed
+ // certificate strategy
+ // and allow all hosts verifier.
+ connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE);
+
+ Registry registry = RegistryBuilder.create()
+ .register("https", connectionFactory).build();
+ cm = new PoolingHttpClientConnectionManager(registry);
+ } else {
+ cm = new PoolingHttpClientConnectionManager();
+ }
+ cm.setDefaultMaxPerRoute(httpConcurrency);
+ cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20);
+ }
+
+ @Override
+ abstract public long retrieveSizeFromMedia();
+
+ @Override
+ public boolean exists() {
+ logger.fine("Exists called");
+ return (retrieveSizeFromMedia() != -1);
+ }
+
+ @Override
+ public List cleanUp(Predicate filter, boolean dryRun) throws IOException {
+ return baseStore.cleanUp(filter, dryRun);
+ }
+
+ @Override
+ public String getStorageLocation() throws IOException {
+ String fullStorageLocation = dvObject.getStorageIdentifier();
+ logger.fine("storageidentifier: " + fullStorageLocation);
+ int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR);
+ if (driverIndex >= 0) {
+ fullStorageLocation = fullStorageLocation
+ .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+ }
+ if (this.getDvObject() instanceof Dataset) {
+ throw new IOException("AbstractRemoteOverlayAccessIO: Datasets are not a supported dvObject");
+ } else if (this.getDvObject() instanceof DataFile) {
+ fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation;
+ } else if (dvObject instanceof Dataverse) {
+ throw new IOException("AbstractRemoteOverlayAccessIO: Dataverses are not a supported dvObject");
+ }
+ logger.fine("fullStorageLocation: " + fullStorageLocation);
+ return fullStorageLocation;
+ }
+ protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
+
+ if (baseStore == null) {
+ String baseDriverId = getBaseStoreIdFor(driverId);
+ String fullStorageLocation = null;
+ String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE,
+ DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
+
+ if (dvObject instanceof Dataset) {
+ baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId);
+ } else {
+ if (this.getDvObject() != null) {
+ fullStorageLocation = getStoragePath();
+
+ // S3 expects :///
+ switch (baseDriverType) {
+ case DataAccess.S3:
+ fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+ + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/"
+ + fullStorageLocation;
+ break;
+ case DataAccess.FILE:
+ fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+ + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files")
+ + "/" + fullStorageLocation;
+ break;
+ default:
+ logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
+ + getConfigParamForDriver(baseDriverId, StorageIO.TYPE));
+ throw new IOException("Not supported");
+ }
+
+ } else if (storageLocation != null) {
+ // :////
+ // remoteDriverId:// is removed if coming through directStorageIO
+ int index = storageLocation.indexOf(DataAccess.SEPARATOR);
+ if (index > 0) {
+ storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length());
+ }
+ // The base store needs the baseStoreIdentifier and not the relative URL (if it exists)
+ int endOfId = storageLocation.indexOf("//");
+ fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation;
+
+ switch (baseDriverType) {
+ case DataAccess.S3:
+ fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+ + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/"
+ + fullStorageLocation;
+ break;
+ case DataAccess.FILE:
+ fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
+ + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files")
+ + "/" + fullStorageLocation;
+ break;
+ default:
+ logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: "
+ + getConfigParamForDriver(baseDriverId, StorageIO.TYPE));
+ throw new IOException("Not supported");
+ }
+ }
+ baseStore = DataAccess.getDirectStorageIO(fullStorageLocation);
+ }
+ if (baseDriverType.contentEquals(DataAccess.S3)) {
+ ((S3AccessIO>) baseStore).setMainDriver(false);
+ }
+ }
+ remoteStoreName = getConfigParam(REMOTE_STORE_NAME);
+ try {
+ remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL));
+ } catch (MalformedURLException mfue) {
+ logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId);
+ }
+ }
+
+ protected String getStoragePath() throws IOException {
+ String fullStoragePath = dvObject.getStorageIdentifier();
+ logger.fine("storageidentifier: " + fullStoragePath);
+ int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR);
+ if (driverIndex >= 0) {
+ fullStoragePath = fullStoragePath
+ .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
+ }
+ int suffixIndex = fullStoragePath.indexOf("//");
+ if (suffixIndex >= 0) {
+ fullStoragePath = fullStoragePath.substring(0, suffixIndex);
+ }
+ if (getDvObject() instanceof Dataset) {
+ fullStoragePath = getDataset().getAuthorityForFileStorage() + "/"
+ + getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath;
+ } else if (getDvObject() instanceof DataFile) {
+ fullStoragePath = getDataFile().getOwner().getAuthorityForFileStorage() + "/"
+ + getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath;
+ } else if (dvObject instanceof Dataverse) {
+ throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
+ }
+ logger.fine("fullStoragePath: " + fullStoragePath);
+ return fullStoragePath;
+ }
+
+ public static boolean isNotDataverseAccessible(String storeId) {
+ return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(storeId, FILES_NOT_ACCESSIBLE_BY_DATAVERSE));
+ }
+
+
+
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
index d046fa4661d..a1bcbe49327 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java
@@ -48,6 +48,7 @@ public DataAccess() {
public static final String S3 = "s3";
static final String SWIFT = "swift";
static final String REMOTE = "remote";
+ public static final String GLOBUS = "globus";
static final String TMP = "tmp";
public static final String SEPARATOR = "://";
//Default to "file" is for tests only
@@ -98,6 +99,8 @@ protected static StorageIO getStorageIO(T dvObject, Data
return new SwiftAccessIO<>(dvObject, req, storageDriverId);
case REMOTE:
return new RemoteOverlayAccessIO<>(dvObject, req, storageDriverId);
+ case GLOBUS:
+ return new GlobusOverlayAccessIO<>(dvObject, req, storageDriverId);
case TMP:
throw new IOException(
"DataAccess IO attempted on a temporary file that hasn't been permanently saved yet.");
@@ -129,6 +132,8 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation)
return new SwiftAccessIO<>(storageLocation, storageDriverId);
case REMOTE:
return new RemoteOverlayAccessIO<>(storageLocation, storageDriverId);
+ case GLOBUS:
+ return new GlobusOverlayAccessIO<>(storageLocation, storageDriverId);
default:
logger.warning("Could not find storage driver for: " + fullStorageLocation);
throw new IOException("getDirectStorageIO: Unsupported storage method.");
@@ -148,19 +153,41 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) {
}
public static String getStorageIdFromLocation(String location) {
- if(location.contains(SEPARATOR)) {
- //It's a full location with a driverId, so strip and reapply the driver id
- //NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it
- return location.substring(0,location.indexOf(SEPARATOR) +3) + location.substring(location.lastIndexOf('/')+1);
- }
- return location.substring(location.lastIndexOf('/')+1);
+ if (location.contains(SEPARATOR)) {
+ // It's a full location with a driverId, so strip and reapply the driver id
+ // NOte that this will strip the bucketname out (which s3 uses) but the
+ // S3IOStorage class knows to look at re-insert it
+ return location.substring(0, location.indexOf(SEPARATOR) + 3)
+ + location.substring(location.lastIndexOf('/') + 1);
+ }
+ return location.substring(location.lastIndexOf('/') + 1);
+ }
+
+ /** Changes storageidentifiers of the form
+ * s3://bucketname/18b39722140-50eb7d3c5ece or file://18b39722140-50eb7d3c5ece to s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece
+ * and
+ * 18b39722140-50eb7d3c5ece to 10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece
+ * @param id
+ * @param dataset
+ * @return
+ */
+ public static String getLocationFromStorageId(String id, Dataset dataset) {
+ String path= dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/";
+ if (id.contains(SEPARATOR)) {
+ // It's a full location with a driverId, so strip and reapply the driver id
+ // NOte that this will strip the bucketname out (which s3 uses) but the
+ // S3IOStorage class knows to look at re-insert it
+ return id.substring(0, id.indexOf(SEPARATOR) + 3) + path
+ + id.substring(id.lastIndexOf('/') + 1);
+ }
+ return path + id.substring(id.lastIndexOf('/') + 1);
}
public static String getDriverType(String driverId) {
if(driverId.isEmpty() || driverId.equals("tmp")) {
return "tmp";
}
- return System.getProperty("dataverse.files." + driverId + ".type", "Undefined");
+ return StorageIO.getConfigParamForDriver(driverId, StorageIO.TYPE, "Undefined");
}
//This
@@ -168,7 +195,7 @@ public static String getDriverPrefix(String driverId) throws IOException {
if(driverId.isEmpty() || driverId.equals("tmp")) {
return "tmp" + SEPARATOR;
}
- String storageType = System.getProperty("dataverse.files." + driverId + ".type", "Undefined");
+ String storageType = StorageIO.getConfigParamForDriver(driverId, StorageIO.TYPE, "Undefined");
switch(storageType) {
case FILE:
return FileAccessIO.getDriverPrefix(driverId);
@@ -236,7 +263,8 @@ public static StorageIO createNewStorageIO(T dvObject, S
storageIO = new S3AccessIO<>(dvObject, null, storageDriverId);
break;
case REMOTE:
- storageIO = createNewStorageIO(dvObject, storageTag, RemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ;
+ case GLOBUS:
+ storageIO = createNewStorageIO(dvObject, storageTag, AbstractRemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ;
break;
default:
logger.warning("Could not find storage driver for: " + storageTag);
@@ -369,9 +397,35 @@ public static boolean isValidDirectStorageIdentifier(String storageId) {
return S3AccessIO.isValidIdentifier(driverId, storageId);
case REMOTE:
return RemoteOverlayAccessIO.isValidIdentifier(driverId, storageId);
+ case GLOBUS:
+ return GlobusOverlayAccessIO.isValidIdentifier(driverId, storageId);
default:
logger.warning("Request to validate for storage driver: " + driverId);
}
return false;
}
+
+
+
+ public static String getNewStorageIdentifier(String driverId) {
+ String storageType = DataAccess.getDriverType(driverId);
+ if (storageType.equals("tmp") || storageType.equals("Undefined")) {
+ return null;
+ }
+ switch (storageType) {
+ case FILE:
+ return FileAccessIO.getNewIdentifier(driverId);
+ case SWIFT:
+ return SwiftAccessIO.getNewIdentifier(driverId);
+ case S3:
+ return S3AccessIO.getNewIdentifier(driverId);
+ case REMOTE:
+ return RemoteOverlayAccessIO.getNewIdentifier(driverId);
+ case GLOBUS:
+ return GlobusOverlayAccessIO.getNewIdentifier(driverId);
+ default:
+ logger.warning("Request to validate for storage driver: " + driverId);
+ }
+ return null;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
index d95df1567bd..26637ec5742 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java
@@ -35,8 +35,6 @@
import java.util.List;
import java.util.function.Predicate;
import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import java.util.stream.Collectors;
// Dataverse imports:
@@ -55,6 +53,7 @@
public class FileAccessIO extends StorageIO {
private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.FileAccessIO");
+ public static final String DIRECTORY = "directory";
public FileAccessIO() {
@@ -115,13 +114,14 @@ public void open (DataAccessOption... options) throws IOException {
this.setInputStream(fin);
setChannel(fin.getChannel());
- this.setSize(getLocalFileSize());
+ this.setSize(retrieveSizeFromMedia());
if (dataFile.getContentType() != null
&& dataFile.getContentType().equals("text/tab-separated-values")
&& dataFile.isTabularData()
&& dataFile.getDataTable() != null
- && (!this.noVarHeader())) {
+ && (!this.noVarHeader())
+ && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
List datavariables = dataFile.getDataTable().getDataVariables();
String varHeaderLine = generateVariableHeader(datavariables);
@@ -506,21 +506,6 @@ public void delete() throws IOException {
// Auxilary helper methods, filesystem access-specific:
- private long getLocalFileSize () {
- long fileSize = -1;
-
- try {
- File testFile = getFileSystemPath().toFile();
- if (testFile != null) {
- fileSize = testFile.length();
- }
- return fileSize;
- } catch (IOException ex) {
- return -1;
- }
-
- }
-
public FileInputStream openLocalFileAsInputStream () {
FileInputStream in;
@@ -595,7 +580,7 @@ private String getDatasetDirectory() throws IOException {
protected String getFilesRootDirectory() {
- String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory", "/tmp/files");
+ String filesRootDirectory = getConfigParam(DIRECTORY, "/tmp/files");
return filesRootDirectory;
}
@@ -742,4 +727,18 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE
return toDelete;
}
+ @Override
+ public long retrieveSizeFromMedia() {
+ long fileSize = -1;
+ try {
+ File testFile = getFileSystemPath().toFile();
+ if (testFile != null) {
+ fileSize = testFile.length();
+ }
+ return fileSize;
+ } catch (IOException ex) {
+ return -1;
+ }
+ }
+
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
new file mode 100644
index 00000000000..8bed60d8302
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java
@@ -0,0 +1,78 @@
+package edu.harvard.iq.dataverse.dataaccess;
+
+import jakarta.json.Json;
+import jakarta.json.JsonArray;
+import jakarta.json.JsonArrayBuilder;
+
+public interface GlobusAccessibleStore {
+
+ //Whether Dataverse manages access controls for the Globus endpoint or not.
+ static final String MANAGED = "managed";
+ /*
+ * transfer and reference endpoint formats:
+ *
+ * REFERENCE_ENDPOINTS_WITH_BASEPATHS - reference endpoints separated by a comma
+ */
+ static final String TRANSFER_ENDPOINT_WITH_BASEPATH = "transfer-endpoint-with-basepath";
+ static final String GLOBUS_TOKEN = "globus-token";
+
+ public static boolean isDataverseManaged(String driverId) {
+ return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(driverId, MANAGED));
+ }
+
+ public static String getTransferEnpointWithPath(String driverId) {
+ return StorageIO.getConfigParamForDriver(driverId, GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH);
+ }
+
+ public static String getTransferEndpointId(String driverId) {
+ String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH);
+ int pathStart = endpointWithBasePath.indexOf("/");
+ return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
+ }
+
+ public static String getTransferPath(String driverId) {
+ String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH);
+ int pathStart = endpointWithBasePath.indexOf("/");
+ return pathStart > 0 ? endpointWithBasePath.substring(pathStart) : "";
+
+ }
+
+ public static JsonArray getReferenceEndpointsWithPaths(String driverId) {
+ String[] endpoints = StorageIO.getConfigParamForDriver(driverId, AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*");
+ JsonArrayBuilder builder = Json.createArrayBuilder();
+ for(int i=0;i:////
+ *
+ * Storage location:
+ * /
+ *
+ * Internal StorageIdentifier format:
+ * ://
+ *
+ * Storage location:
+ * ///
+ *
+ */
+public class GlobusOverlayAccessIO extends AbstractRemoteOverlayAccessIO implements GlobusAccessibleStore {
+ private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO");
+
+ /*
+ * If this is set to true, the store supports Globus transfer in and
+ * Dataverse/the globus app manage file locations, access controls, deletion,
+ * etc.
+ */
+ private Boolean dataverseManaged = null;
+
+ private String relativeDirectoryPath;
+
+ private String endpointPath;
+
+ private String filename;
+
+ private String[] allowedEndpoints;
+ private String endpoint;
+
+ public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
+ super(dvObject, req, driverId);
+ configureGlobusEndpoints();
+ configureStores(req, driverId, null);
+ logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier());
+ path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
+ validatePath(path);
+
+ logger.fine("Relative path: " + path);
+ }
+
+
+ public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException {
+ this.driverId = driverId;
+ configureGlobusEndpoints();
+ configureStores(null, driverId, storageLocation);
+ if (isManaged()) {
+ String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation);
+ path = parts[1];
+ } else {
+ this.setIsLocalFile(false);
+ path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
+ validatePath(path);
+ logger.fine("Referenced path: " + path);
+ }
+ }
+
+ private boolean isManaged() {
+ if(dataverseManaged==null) {
+ dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId);
+ }
+ return dataverseManaged;
+ }
+
+ private String retrieveGlobusAccessToken() {
+ String globusToken = getConfigParam(GlobusAccessibleStore.GLOBUS_TOKEN);
+
+
+ AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken);
+ return accessToken.getOtherTokens().get(0).getAccessToken();
+ }
+
+ private void parsePath() {
+ int filenameStart = path.lastIndexOf("/") + 1;
+ String endpointWithBasePath = null;
+ if (!isManaged()) {
+ endpointWithBasePath = findMatchingEndpoint(path, allowedEndpoints);
+ } else {
+ endpointWithBasePath = allowedEndpoints[0];
+ }
+ //String endpointWithBasePath = baseEndpointPath.substring(baseEndpointPath.lastIndexOf(DataAccess.SEPARATOR) + 3);
+ int pathStart = endpointWithBasePath.indexOf("/");
+ logger.fine("endpointWithBasePath: " + endpointWithBasePath);
+ endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : "");
+ logger.fine("endpointPath: " + endpointPath);
+
+
+ if (isManaged() && (dvObject!=null)) {
+
+ Dataset ds = null;
+ if (dvObject instanceof Dataset) {
+ ds = (Dataset) dvObject;
+ } else if (dvObject instanceof DataFile) {
+ ds = ((DataFile) dvObject).getOwner();
+ }
+ relativeDirectoryPath = "/" + ds.getAuthority() + "/" + ds.getIdentifier();
+ } else {
+ relativeDirectoryPath = "";
+ }
+ if (filenameStart > 0) {
+ relativeDirectoryPath = relativeDirectoryPath + path.substring(0, filenameStart);
+ }
+ logger.fine("relativeDirectoryPath finally: " + relativeDirectoryPath);
+ filename = path.substring(filenameStart);
+ endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath;
+
+
+ }
+
+ private static String findMatchingEndpoint(String path, String[] allowedEndpoints) {
+ for(int i=0;i= 0) {
+ this.setSize(dataFile.getFilesize());
+ } else {
+ logger.fine("Setting size");
+ this.setSize(retrieveSizeFromMedia());
+ }
+ // Only applies for the S3 Connector case (where we could have run an ingest)
+ if (dataFile.getContentType() != null
+ && dataFile.getContentType().equals("text/tab-separated-values")
+ && dataFile.isTabularData()
+ && dataFile.getDataTable() != null
+ && (!this.noVarHeader())
+ && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
+
+ List datavariables = dataFile.getDataTable().getDataVariables();
+ String varHeaderLine = generateVariableHeader(datavariables);
+ this.setVarHeader(varHeaderLine);
+ }
+
+ }
+
+ this.setMimeType(dataFile.getContentType());
+
+ try {
+ this.setFileName(dataFile.getFileMetadata().getLabel());
+ } catch (Exception ex) {
+ this.setFileName("unknown");
+ }
+ } else if (dvObject instanceof Dataset) {
+ throw new IOException(
+ "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet");
+ } else if (dvObject instanceof Dataverse) {
+ throw new IOException(
+ "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet");
+ }
+ }
+
+ @Override
+ public Path getFileSystemPath() throws IOException {
+ throw new UnsupportedDataAccessOperationException(
+ this.getClass().getName() + ": savePath() not implemented in this storage driver.");
+ }
+
+ @Override
+ public void savePath(Path fileSystemPath) throws IOException {
+ throw new UnsupportedDataAccessOperationException(
+ this.getClass().getName() + ": savePath() not implemented in this storage driver.");
+ }
+
+ @Override
+ public void saveInputStream(InputStream inputStream) throws IOException {
+ throw new UnsupportedDataAccessOperationException(
+ this.getClass().getName() + ": savePath() not implemented in this storage driver.");
+ }
+
+ @Override
+ public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
+ throw new UnsupportedDataAccessOperationException(
+ this.getClass().getName() + ": savePath() not implemented in this storage driver.");
+ }
+
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
index 2b4aed3a9a5..2de37174a3b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java
@@ -48,6 +48,7 @@
import java.nio.channels.WritableByteChannel;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.io.IOUtils;
//import org.primefaces.util.Base64;
@@ -110,19 +111,30 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s
}
if (isThumbnailCached(storageIO, size)) {
+ logger.fine("Found cached thumbnail for " + file.getId());
return true;
}
+ return generateThumbnail(file, storageIO, size);
- logger.fine("Checking for thumbnail, file type: " + file.getContentType());
+ }
- if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) {
- return generateImageThumbnail(storageIO, size);
- } else if (file.getContentType().equalsIgnoreCase("application/pdf")) {
- return generatePDFThumbnail(storageIO, size);
+ private static boolean generateThumbnail(DataFile file, StorageIO storageIO, int size) {
+ logger.log(Level.FINE, (file.isPreviewImageFail() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId());
+ // Don't try to generate if there have been failures:
+ if (!file.isPreviewImageFail()) {
+ boolean thumbnailGenerated = false;
+ if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) {
+ thumbnailGenerated = generateImageThumbnail(storageIO, size);
+ } else if (file.getContentType().equalsIgnoreCase("application/pdf")) {
+ thumbnailGenerated = generatePDFThumbnail(storageIO, size);
+ }
+ if (!thumbnailGenerated) {
+ logger.fine("No thumbnail generated for " + file.getId());
+ }
+ return thumbnailGenerated;
}
return false;
-
}
// Note that this method works on ALL file types for which thumbnail
@@ -184,6 +196,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s
// We rely on ImageMagick to convert PDFs; so if it's not installed,
// better give up right away:
if (!isImageMagickInstalled()) {
+ logger.fine("Couldn't find ImageMagick");
return false;
}
@@ -206,35 +219,34 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s
tempFilesRequired = true;
} catch (IOException ioex) {
+ logger.warning(ioex.getMessage());
// this on the other hand is likely a fatal condition :(
return false;
}
if (tempFilesRequired) {
- ReadableByteChannel pdfFileChannel;
-
+ InputStream inputStream = null;
try {
storageIO.open();
- //inputStream = storageIO.getInputStream();
- pdfFileChannel = storageIO.getReadChannel();
+ inputStream = storageIO.getInputStream();
} catch (Exception ioex) {
logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier());
return false;
}
File tempFile;
- FileChannel tempFileChannel = null;
+ OutputStream outputStream = null;
try {
tempFile = File.createTempFile("tempFileToRescale", ".tmp");
- tempFileChannel = new FileOutputStream(tempFile).getChannel();
-
- tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize());
+ outputStream = new FileOutputStream(tempFile);
+ //Reads/transfers all bytes from the input stream to the output stream.
+ inputStream.transferTo(outputStream);
} catch (IOException ioex) {
logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file.");
return false;
} finally {
- IOUtils.closeQuietly(tempFileChannel);
- IOUtils.closeQuietly(pdfFileChannel);
+ IOUtils.closeQuietly(inputStream);
+ IOUtils.closeQuietly(outputStream);
}
sourcePdfFile = tempFile;
}
@@ -436,16 +448,8 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) {
if (cachedThumbnailChannel == null) {
logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size);
- // try to generate, if not available:
- boolean generated = false;
- if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) {
- generated = generateImageThumbnail(storageIO, size);
- } else if (file.getContentType().equalsIgnoreCase("application/pdf")) {
- generated = generatePDFThumbnail(storageIO, size);
- }
-
- if (generated) {
- // try to open again:
+ // try to generate, if not available and hasn't failed before
+ if(generateThumbnail(file, storageIO, size)) {
try {
cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size);
} catch (Exception ioEx) {
@@ -757,7 +761,7 @@ public static String generatePDFThumbnailFromFile(String fileLocation, int size)
try {
fileSize = new File(fileLocation).length();
} catch (Exception ex) {
- //
+ logger.warning("Can't open file: " + fileLocation);
}
if (fileSize == 0 || fileSize > sizeLimit) {
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java
index be6f9df0254..de392b74cca 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java
@@ -165,4 +165,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE
throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver.");
}
+ @Override
+ public long retrieveSizeFromMedia() throws UnsupportedDataAccessOperationException {
+ throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver.");
+ }
+
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
index 66c6a4cc2ee..bca70259cb7 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java
@@ -11,105 +11,77 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
-import java.net.URL;
import java.nio.channels.Channel;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.file.Path;
-import java.security.KeyManagementException;
-import java.security.KeyStoreException;
-import java.security.NoSuchAlgorithmException;
import java.util.List;
-import java.util.function.Predicate;
-import java.util.logging.Logger;
import org.apache.http.Header;
-import org.apache.http.client.config.CookieSpecs;
-import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpDelete;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpHead;
-import org.apache.http.client.protocol.HttpClientContext;
-import org.apache.http.config.Registry;
-import org.apache.http.config.RegistryBuilder;
-import org.apache.http.conn.socket.ConnectionSocketFactory;
-import org.apache.http.conn.ssl.NoopHostnameVerifier;
-import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
-import org.apache.http.conn.ssl.TrustAllStrategy;
-import org.apache.http.impl.client.CloseableHttpClient;
-import org.apache.http.impl.client.HttpClients;
-import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.protocol.HTTP;
-import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.util.EntityUtils;
-import javax.net.ssl.SSLContext;
-
/**
* @author qqmyers
- * @param what it stores
*/
/*
* Remote Overlay Driver
*
* StorageIdentifier format:
- * :////
+ * :////
+ *
+ * baseUrl: http(s)://
*/
-public class RemoteOverlayAccessIO extends StorageIO {
-
- private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO");
-
- private StorageIO baseStore = null;
- private String urlPath = null;
- private String baseUrl = null;
+public class RemoteOverlayAccessIO extends AbstractRemoteOverlayAccessIO {
- private static HttpClientContext localContext = HttpClientContext.create();
- private PoolingHttpClientConnectionManager cm = null;
- CloseableHttpClient httpclient = null;
- private int timeout = 1200;
- private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000)
- .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000)
- .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build();
- private static boolean trustCerts = false;
- private int httpConcurrency = 4;
+ // A single baseUrl of the form http(s):// where this store can reference data
+ static final String BASE_URL = "base-url";
+ String baseUrl = null;
+ public RemoteOverlayAccessIO() {
+ super();
+ }
+
public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException {
super(dvObject, req, driverId);
this.setIsLocalFile(false);
+ configureRemoteEndpoints();
configureStores(req, driverId, null);
logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier());
- urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
- validatePath(urlPath);
-
- logger.fine("Base URL: " + urlPath);
+ path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2);
+ validatePath(path);
+
+ logger.fine("Relative path: " + path);
}
public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException {
super(null, null, driverId);
this.setIsLocalFile(false);
+ configureRemoteEndpoints();
configureStores(null, driverId, storageLocation);
- urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
- validatePath(urlPath);
- logger.fine("Base URL: " + urlPath);
+ path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2);
+ validatePath(path);
+ logger.fine("Relative path: " + path);
}
-
- private void validatePath(String path) throws IOException {
+
+ protected void validatePath(String relPath) throws IOException {
try {
- URI absoluteURI = new URI(baseUrl + "/" + urlPath);
- if(!absoluteURI.normalize().toString().startsWith(baseUrl)) {
+ URI absoluteURI = new URI(baseUrl + "/" + relPath);
+ if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url");
}
- } catch(URISyntaxException use) {
+ } catch (URISyntaxException use) {
throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId);
}
- }
-
+ }
@Override
public void open(DataAccessOption... options) throws IOException {
@@ -150,10 +122,14 @@ public void open(DataAccessOption... options) throws IOException {
this.setSize(dataFile.getFilesize());
} else {
logger.fine("Setting size");
- this.setSize(getSizeFromHttpHeader());
+ this.setSize(retrieveSizeFromMedia());
}
- if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values")
- && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) {
+ if (dataFile.getContentType() != null
+ && dataFile.getContentType().equals("text/tab-separated-values")
+ && dataFile.isTabularData()
+ && dataFile.getDataTable() != null
+ && (!this.noVarHeader())
+ && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
List datavariables = dataFile.getDataTable().getDataVariables();
String varHeaderLine = generateVariableHeader(datavariables);
@@ -171,18 +147,17 @@ public void open(DataAccessOption... options) throws IOException {
}
} else if (dvObject instanceof Dataset) {
throw new IOException(
- "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet");
+ "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet");
} else if (dvObject instanceof Dataverse) {
throw new IOException(
- "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet");
- } else {
- this.setSize(getSizeFromHttpHeader());
+ "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet");
}
}
- private long getSizeFromHttpHeader() {
+ @Override
+ public long retrieveSizeFromMedia() {
long size = -1;
- HttpHead head = new HttpHead(baseUrl + "/" + urlPath);
+ HttpHead head = new HttpHead(baseUrl + "/" + path);
try {
CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext);
@@ -224,12 +199,12 @@ public InputStream getInputStream() throws IOException {
break;
default:
logger.warning("Response from " + get.getURI().toString() + " was " + code);
- throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code);
+ throw new IOException("Cannot retrieve: " + baseUrl + "/" + path + " code: " + code);
}
} catch (Exception e) {
logger.warning(e.getMessage());
e.printStackTrace();
- throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage());
+ throw new IOException("Error retrieving: " + baseUrl + "/" + path + " " + e.getMessage());
}
setChannel(Channels.newChannel(super.getInputStream()));
@@ -260,13 +235,13 @@ public void delete() throws IOException {
throw new IOException("Direct Access IO must be used to permanently delete stored file objects");
}
try {
- HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath);
+ HttpDelete del = new HttpDelete(baseUrl + "/" + path);
CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext);
try {
int code = response.getStatusLine().getStatusCode();
switch (code) {
case 200:
- logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath);
+ logger.fine("Sent DELETE for " + baseUrl + "/" + path);
default:
logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code);
}
@@ -275,7 +250,7 @@ public void delete() throws IOException {
}
} catch (Exception e) {
logger.warning(e.getMessage());
- throw new IOException("Error deleting: " + baseUrl + "/" + urlPath);
+ throw new IOException("Error deleting: " + baseUrl + "/" + path);
}
@@ -284,104 +259,12 @@ public void delete() throws IOException {
}
- @Override
- public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException {
- return baseStore.openAuxChannel(auxItemTag, options);
- }
-
- @Override
- public boolean isAuxObjectCached(String auxItemTag) throws IOException {
- return baseStore.isAuxObjectCached(auxItemTag);
- }
-
- @Override
- public long getAuxObjectSize(String auxItemTag) throws IOException {
- return baseStore.getAuxObjectSize(auxItemTag);
- }
-
- @Override
- public Path getAuxObjectAsPath(String auxItemTag) throws IOException {
- return baseStore.getAuxObjectAsPath(auxItemTag);
- }
-
- @Override
- public void backupAsAux(String auxItemTag) throws IOException {
- baseStore.backupAsAux(auxItemTag);
- }
-
- @Override
- public void revertBackupAsAux(String auxItemTag) throws IOException {
- baseStore.revertBackupAsAux(auxItemTag);
- }
-
- @Override
- // this method copies a local filesystem Path into this DataAccess Auxiliary
- // location:
- public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException {
- baseStore.savePathAsAux(fileSystemPath, auxItemTag);
- }
-
- @Override
- public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException {
- baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize);
- }
-
- /**
- * @param inputStream InputStream we want to save
- * @param auxItemTag String representing this Auxiliary type ("extension")
- * @throws IOException if anything goes wrong.
- */
- @Override
- public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException {
- baseStore.saveInputStreamAsAux(inputStream, auxItemTag);
- }
-
- @Override
- public List listAuxObjects() throws IOException {
- return baseStore.listAuxObjects();
- }
-
- @Override
- public void deleteAuxObject(String auxItemTag) throws IOException {
- baseStore.deleteAuxObject(auxItemTag);
- }
-
- @Override
- public void deleteAllAuxObjects() throws IOException {
- baseStore.deleteAllAuxObjects();
- }
-
- @Override
- public String getStorageLocation() throws IOException {
- String fullStorageLocation = dvObject.getStorageIdentifier();
- logger.fine("storageidentifier: " + fullStorageLocation);
- int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR);
- if(driverIndex >=0) {
- fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
- }
- if (this.getDvObject() instanceof Dataset) {
- throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject");
- } else if (this.getDvObject() instanceof DataFile) {
- fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation;
- } else if (dvObject instanceof Dataverse) {
- throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
- }
- logger.fine("fullStorageLocation: " + fullStorageLocation);
- return fullStorageLocation;
- }
-
@Override
public Path getFileSystemPath() throws UnsupportedDataAccessOperationException {
throw new UnsupportedDataAccessOperationException(
"RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it.");
}
- @Override
- public boolean exists() {
- logger.fine("Exists called");
- return (getSizeFromHttpHeader() != -1);
- }
-
@Override
public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException {
throw new UnsupportedDataAccessOperationException(
@@ -394,20 +277,15 @@ public OutputStream getOutputStream() throws UnsupportedDataAccessOperationExcep
"RemoteOverlayAccessIO: there are no output Streams associated with S3 objects.");
}
- @Override
- public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException {
- return baseStore.getAuxFileAsInputStream(auxItemTag);
- }
-
@Override
public boolean downloadRedirectEnabled() {
- String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect");
+ String optionValue = getConfigParam(StorageIO.DOWNLOAD_REDIRECT);
if ("true".equalsIgnoreCase(optionValue)) {
return true;
}
return false;
}
-
+
public boolean downloadRedirectEnabled(String auxObjectTag) {
return baseStore.downloadRedirectEnabled(auxObjectTag);
}
@@ -418,11 +296,11 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
// ToDo - support remote auxiliary Files
if (auxiliaryTag == null) {
- String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key");
+ String secretKey = getConfigParam(SECRET_KEY);
if (secretKey == null) {
- return baseUrl + "/" + urlPath;
+ return baseUrl + "/" + path;
} else {
- return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET",
+ return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET",
secretKey);
}
} else {
@@ -430,27 +308,21 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary
}
}
- int getUrlExpirationMinutes() {
- String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes");
- if (optionValue != null) {
- Integer num;
- try {
- num = Integer.parseInt(optionValue);
- } catch (NumberFormatException ex) {
- num = null;
- }
- if (num != null) {
- return num;
- }
- }
- return 60;
- }
- private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException {
- baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url");
+ /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but
+ * the derived GlobusOverlayAccessIO can support multiple endpoints.
+ * @throws IOException
+ */
+ protected void configureRemoteEndpoints() throws IOException {
+ baseUrl = getConfigParam(BASE_URL);
if (baseUrl == null) {
- throw new IOException("dataverse.files." + this.driverId + ".base-url is required");
- } else {
+ //Will accept the first endpoint using the newer setting
+ baseUrl = getConfigParam(REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*")[0];
+ if (baseUrl == null) {
+ throw new IOException("dataverse.files." + this.driverId + ".base-url is required");
+ }
+ }
+ if (baseUrl != null) {
try {
new URI(baseUrl);
} catch (Exception e) {
@@ -460,180 +332,42 @@ private void configureStores(DataAccessRequest req, String driverId, String stor
}
}
-
- if (baseStore == null) {
- String baseDriverId = getBaseStoreIdFor(driverId);
- String fullStorageLocation = null;
- String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER);
-
- if(dvObject instanceof Dataset) {
- baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId);
- } else {
- if (this.getDvObject() != null) {
- fullStorageLocation = getStoragePath();
-
- // S3 expects :///
- switch (baseDriverType) {
- case DataAccess.S3:
- fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
- + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/"
- + fullStorageLocation;
- break;
- case DataAccess.FILE:
- fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
- + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/"
- + fullStorageLocation;
- break;
- default:
- logger.warning("Not Implemented: RemoteOverlay store with base store type: "
- + System.getProperty("dataverse.files." + baseDriverId + ".type"));
- throw new IOException("Not implemented");
- }
-
- } else if (storageLocation != null) {
- // :////
- //remoteDriverId:// is removed if coming through directStorageIO
- int index = storageLocation.indexOf(DataAccess.SEPARATOR);
- if(index > 0) {
- storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length());
- }
- //THe base store needs the baseStoreIdentifier and not the relative URL
- fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//"));
-
- switch (baseDriverType) {
- case DataAccess.S3:
- fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
- + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/"
- + fullStorageLocation;
- break;
- case DataAccess.FILE:
- fullStorageLocation = baseDriverId + DataAccess.SEPARATOR
- + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/"
- + fullStorageLocation;
- break;
- default:
- logger.warning("Not Implemented: RemoteOverlay store with base store type: "
- + System.getProperty("dataverse.files." + baseDriverId + ".type"));
- throw new IOException("Not implemented");
- }
- }
- baseStore = DataAccess.getDirectStorageIO(fullStorageLocation);
- }
- if (baseDriverType.contentEquals(DataAccess.S3)) {
- ((S3AccessIO>) baseStore).setMainDriver(false);
- }
- }
- remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name");
- try {
- remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url"));
- } catch(MalformedURLException mfue) {
- logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId);
- }
- }
-
- //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required.
- private String getStoragePath() throws IOException {
- String fullStoragePath = dvObject.getStorageIdentifier();
- logger.fine("storageidentifier: " + fullStoragePath);
- int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR);
- if(driverIndex >=0) {
- fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length());
- }
- int suffixIndex = fullStoragePath.indexOf("//");
- if(suffixIndex >=0) {
- fullStoragePath = fullStoragePath.substring(0, suffixIndex);
- }
- if (this.getDvObject() instanceof Dataset) {
- fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/"
- + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath;
- } else if (this.getDvObject() instanceof DataFile) {
- fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/"
- + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath;
- }else if (dvObject instanceof Dataverse) {
- throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject");
- }
- logger.fine("fullStoragePath: " + fullStoragePath);
- return fullStoragePath;
- }
-
- public CloseableHttpClient getSharedHttpClient() {
- if (httpclient == null) {
- try {
- initHttpPool();
- httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build();
-
- } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) {
- logger.warning(ex.getMessage());
- }
- }
- return httpclient;
- }
-
- private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException {
- if (trustCerts) {
- // use the TrustSelfSignedStrategy to allow Self Signed Certificates
- SSLContext sslContext;
- SSLConnectionSocketFactory connectionFactory;
-
- sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build();
- // create an SSL Socket Factory to use the SSLContext with the trust self signed
- // certificate strategy
- // and allow all hosts verifier.
- connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE);
-
- Registry registry = RegistryBuilder.create()
- .register("https", connectionFactory).build();
- cm = new PoolingHttpClientConnectionManager(registry);
- } else {
- cm = new PoolingHttpClientConnectionManager();
- }
- cm.setDefaultMaxPerRoute(httpConcurrency);
- cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20);
}
@Override
public void savePath(Path fileSystemPath) throws IOException {
throw new UnsupportedDataAccessOperationException(
- "RemoteOverlayAccessIO: savePath() not implemented in this storage driver.");
+ this.getClass().getName() + ": savePath() not implemented in this storage driver.");
}
@Override
public void saveInputStream(InputStream inputStream) throws IOException {
throw new UnsupportedDataAccessOperationException(
- "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver.");
+ this.getClass().getName() + ": saveInputStream() not implemented in this storage driver.");
}
@Override
public void saveInputStream(InputStream inputStream, Long filesize) throws IOException {
throw new UnsupportedDataAccessOperationException(
- "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver.");
+ this.getClass().getName() + ": saveInputStream(InputStream, Long) not implemented in this storage driver.");
}
- protected static boolean isValidIdentifier(String driverId, String storageId) {
+ static boolean isValidIdentifier(String driverId, String storageId) {
String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2);
- String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url");
+ String baseUrl = getConfigParamForDriver(driverId, BASE_URL);
try {
URI absoluteURI = new URI(baseUrl + "/" + urlPath);
- if(!absoluteURI.normalize().toString().startsWith(baseUrl)) {
+ if (!absoluteURI.normalize().toString().startsWith(baseUrl)) {
logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId);
return false;
}
- } catch(URISyntaxException use) {
+ } catch (URISyntaxException use) {
logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId);
return false;
}
return true;
}
-
- public static String getBaseStoreIdFor(String driverId) {
- return System.getProperty("dataverse.files." + driverId + ".base-store");
- }
-
- @Override
- public List cleanUp(Predicate filter, boolean dryRun) throws IOException {
- return baseStore.cleanUp(filter, dryRun);
- }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
index e3c6bfede7c..c2143bd4789 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
@@ -4,6 +4,7 @@
import com.amazonaws.ClientConfiguration;
import com.amazonaws.HttpMethod;
import com.amazonaws.SdkClientException;
+import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSCredentialsProviderChain;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
@@ -57,9 +58,11 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
+import java.util.Optional;
import java.util.Random;
import java.util.function.Predicate;
import java.util.logging.Logger;
@@ -88,6 +91,16 @@ public class S3AccessIO extends StorageIO {
private static final Config config = ConfigProvider.getConfig();
private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO");
+ static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes";
+ static final String CUSTOM_ENDPOINT_URL = "custom-endpoint-url";
+ static final String PROXY_URL = "proxy-url";
+ static final String BUCKET_NAME = "bucket-name";
+ static final String MIN_PART_SIZE = "min-part-size";
+ static final String CUSTOM_ENDPOINT_REGION = "custom-endpoint-region";
+ static final String PATH_STYLE_ACCESS = "path-style-access";
+ static final String PAYLOAD_SIGNING = "payload-signing";
+ static final String CHUNKED_ENCODING = "chunked-encoding";
+ static final String PROFILE = "profile";
private boolean mainDriver = true;
@@ -103,19 +116,18 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) {
minPartSize = getMinPartSize(driverId);
s3=getClient(driverId);
tm=getTransferManager(driverId);
- endpoint = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", "");
- proxy = System.getProperty("dataverse.files." + driverId + ".proxy-url", "");
+ endpoint = getConfigParam(CUSTOM_ENDPOINT_URL, "");
+ proxy = getConfigParam(PROXY_URL, "");
if(!StringUtil.isEmpty(proxy)&&StringUtil.isEmpty(endpoint)) {
logger.severe(driverId + " config error: Must specify a custom-endpoint-url if proxy-url is specified");
}
- //Not sure this is needed but moving it from the open method for now since it definitely doesn't need to run every time an object is opened.
- try {
- if (bucketName == null || !s3.doesBucketExistV2(bucketName)) {
- throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets.");
- }
- } catch (SdkClientException sce) {
- throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage());
- }
+
+ // FWIW: There used to be a check here to see if the bucket exists.
+ // It was very redundant (checking every time we access any file) and didn't do
+ // much but potentially make the failure (in the unlikely case a bucket doesn't
+ // exist/just disappeared) happen slightly earlier (here versus at the first
+ // file/metadata access).
+
} catch (Exception e) {
throw new AmazonClientException(
"Cannot instantiate a S3 client; check your AWS credentials and region",
@@ -207,20 +219,14 @@ public void open(DataAccessOption... options) throws IOException {
if (isReadAccess) {
- key = getMainFileKey();
- ObjectMetadata objectMetadata = null;
- try {
- objectMetadata = s3.getObjectMetadata(bucketName, key);
- } catch (SdkClientException sce) {
- throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")");
- }
- this.setSize(objectMetadata.getContentLength());
+ this.setSize(retrieveSizeFromMedia());
if (dataFile.getContentType() != null
&& dataFile.getContentType().equals("text/tab-separated-values")
&& dataFile.isTabularData()
&& dataFile.getDataTable() != null
- && (!this.noVarHeader())) {
+ && (!this.noVarHeader())
+ && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
List datavariables = dataFile.getDataTable().getDataVariables();
String varHeaderLine = generateVariableHeader(datavariables);
@@ -849,7 +855,7 @@ private static String getMainFileKey(String baseKey, String storageIdentifier, S
@Override
public boolean downloadRedirectEnabled() {
- String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect");
+ String optionValue = getConfigParam(DOWNLOAD_REDIRECT);
if ("true".equalsIgnoreCase(optionValue)) {
return true;
}
@@ -1073,7 +1079,7 @@ public JsonObjectBuilder generateTemporaryS3UploadUrls(String globalId, String s
}
int getUrlExpirationMinutes() {
- String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes");
+ String optionValue = getConfigParam(URL_EXPIRATION_MINUTES);
if (optionValue != null) {
Integer num;
try {
@@ -1089,7 +1095,7 @@ int getUrlExpirationMinutes() {
}
private static String getBucketName(String driverId) {
- return System.getProperty("dataverse.files." + driverId + ".bucket-name");
+ return getConfigParamForDriver(driverId, BUCKET_NAME);
}
private static long getMinPartSize(String driverId) {
@@ -1097,7 +1103,7 @@ private static long getMinPartSize(String driverId) {
// (minimum allowed is 5*1024**2 but it probably isn't worth the complexity starting at ~5MB. Also - confirmed that they use base 2 definitions)
long min = 5 * 1024 * 1024l;
- String partLength = System.getProperty("dataverse.files." + driverId + ".min-part-size");
+ String partLength = getConfigParamForDriver(driverId, MIN_PART_SIZE);
try {
if (partLength != null) {
long val = Long.parseLong(partLength);
@@ -1146,12 +1152,12 @@ private static AmazonS3 getClient(String driverId) {
* Pass in a URL pointing to your S3 compatible storage.
* For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html
*/
- String s3CEUrl = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", "");
+ String s3CEUrl = getConfigParamForDriver(driverId, CUSTOM_ENDPOINT_URL, "");
/**
* Pass in a region to use for SigV4 signing of requests.
* Defaults to "dataverse" as it is not relevant for custom S3 implementations.
*/
- String s3CERegion = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-region", "dataverse");
+ String s3CERegion = getConfigParamForDriver(driverId, CUSTOM_ENDPOINT_REGION, "dataverse");
// if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones.
if (!s3CEUrl.isEmpty()) {
@@ -1161,7 +1167,7 @@ private static AmazonS3 getClient(String driverId) {
* Pass in a boolean value if path style access should be used within the S3 client.
* Anything but case-insensitive "true" will lead to value of false, which is default value, too.
*/
- Boolean s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".path-style-access", "false"));
+ Boolean s3pathStyleAccess = Boolean.parseBoolean(getConfigParamForDriver(driverId, PATH_STYLE_ACCESS, "false"));
// some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false
s3CB.withPathStyleAccessEnabled(s3pathStyleAccess);
@@ -1169,41 +1175,70 @@ private static AmazonS3 getClient(String driverId) {
* Pass in a boolean value if payload signing should be used within the S3 client.
* Anything but case-insensitive "true" will lead to value of false, which is default value, too.
*/
- Boolean s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".payload-signing","false"));
+ Boolean s3payloadSigning = Boolean.parseBoolean(getConfigParamForDriver(driverId, PAYLOAD_SIGNING,"false"));
/**
* Pass in a boolean value if chunked encoding should not be used within the S3 client.
* Anything but case-insensitive "false" will lead to value of true, which is default value, too.
*/
- Boolean s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".chunked-encoding","true"));
+ Boolean s3chunkedEncoding = Boolean.parseBoolean(getConfigParamForDriver(driverId, CHUNKED_ENCODING,"true"));
// Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false
s3CB.setPayloadSigningEnabled(s3payloadSigning);
// Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true
// Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled
s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding);
- /**
- * Pass in a string value if this storage driver should use a non-default AWS S3 profile.
- * The default is "default" which should work when only one profile exists.
+ /** Configure credentials for the S3 client. There are multiple mechanisms available.
+ * Role-based/instance credentials are globally defined while the other mechanisms (profile, static)
+ * are defined per store. The logic below assures that
+ * * if a store specific profile or static credentials are explicitly set, they will be used in preference to the global role-based credentials.
+ * * if a store specific role-based credentials are explicitly set, they will be used in preference to the global instance credentials,
+ * * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and
+ * * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found.
*/
- String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default");
- ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile);
-
- // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env
- // vars or system properties to provide these, but use the secrets config source provided by Payara.
- AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(
- new BasicAWSCredentials(
- config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class).orElse(""),
- config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class).orElse("")
- ));
-
- //Add role-based provider as in the default provider chain
- InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance();
+
+ ArrayList providers = new ArrayList<>();
+
+ String s3profile = getConfigParamForDriver(driverId, PROFILE);
+ boolean allowInstanceCredentials = true;
+ // Assume that instance credentials should not be used if the profile is
+ // actually set for this store or if static creds are provided (below).
+ if (s3profile != null) {
+ allowInstanceCredentials = false;
+ }
+ // Try to retrieve credentials via Microprofile Config API, too. For production
+ // use, you should not use env vars or system properties to provide these, but
+ // use the secrets config source provided by Payara.
+ Optional accessKey = config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class);
+ Optional secretKey = config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class);
+ if (accessKey.isPresent() && secretKey.isPresent()) {
+ allowInstanceCredentials = false;
+ AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider(
+ new BasicAWSCredentials(
+ accessKey.get(),
+ secretKey.get()));
+ providers.add(staticCredentials);
+ } else if (s3profile == null) {
+ //Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred).
+ s3profile = "default";
+ }
+ if (s3profile != null) {
+ providers.add(new ProfileCredentialsProvider(s3profile));
+ }
+
+ if (allowInstanceCredentials) {
+ // Add role-based provider as in the default provider chain
+ providers.add(InstanceProfileCredentialsProvider.getInstance());
+ }
// Add all providers to chain - the first working provider will be used
- // (role-based is first in the default cred provider chain, so we're just
+ // (role-based is first in the default cred provider chain (if no profile or
+ // static creds are explicitly set for the store), so we're just
// reproducing that, then profile, then static credentials as the fallback)
- AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(instanceCredentials, profileCredentials, staticCredentials);
+
+ // As the order is the reverse of how we added providers, we reverse the list here
+ Collections.reverse(providers);
+ AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(providers);
s3CB.setCredentials(providerChain);
-
+
// let's build the client :-)
AmazonS3 client = s3CB.build();
driverClientMap.put(driverId, client);
@@ -1385,4 +1420,20 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE
}
return toDelete;
}
+
+ @Override
+ public long retrieveSizeFromMedia() throws IOException {
+ key = getMainFileKey();
+ ObjectMetadata objectMetadata = null;
+ try {
+ objectMetadata = s3.getObjectMetadata(bucketName, key);
+ } catch (SdkClientException sce) {
+ throw new IOException("Cannot get S3 object " + key + " (" + sce.getMessage() + ")");
+ }
+ return objectMetadata.getContentLength();
+ }
+
+ public static String getNewIdentifier(String driverId) {
+ return driverId + DataAccess.SEPARATOR + getConfigParamForDriver(driverId, BUCKET_NAME) + ":" + FileUtil.generateStorageIdentifier();
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
index d33f8f5e5bd..51cdecf64a0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java
@@ -20,12 +20,12 @@
package edu.harvard.iq.dataverse.dataaccess;
-
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DvObject;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
+import edu.harvard.iq.dataverse.util.FileUtil;
import java.io.IOException;
import java.io.InputStream;
@@ -43,7 +43,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
/**
*
* @author Leonid Andreev
@@ -52,18 +51,27 @@
public abstract class StorageIO {
+ static final String INGEST_SIZE_LIMIT = "ingestsizelimit";
+ static final String PUBLIC = "public";
+ static final String TYPE = "type";
+ static final String UPLOAD_REDIRECT = "upload-redirect";
+ static final String UPLOAD_OUT_OF_BAND = "upload-out-of-band";
+ protected static final String DOWNLOAD_REDIRECT = "download-redirect";
+ protected static final String DATAVERSE_INACCESSIBLE = "dataverse-inaccessible";
+
+
public StorageIO() {
}
-
+
public StorageIO(String storageLocation, String driverId) {
- this.driverId=driverId;
+ this.driverId = driverId;
}
public StorageIO(T dvObject, DataAccessRequest req, String driverId) {
this.dvObject = dvObject;
this.req = req;
- this.driverId=driverId;
+ this.driverId = driverId;
if (this.req == null) {
this.req = new DataAccessRequest();
}
@@ -72,18 +80,19 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) {
}
}
-
-
// Abstract methods to be implemented by the storage drivers:
public abstract void open(DataAccessOption... option) throws IOException;
protected boolean isReadAccess = false;
protected boolean isWriteAccess = false;
- //A public store is one in which files may be accessible outside Dataverse and therefore accessible without regard to Dataverse's access controls related to restriction and embargoes.
- //Currently, this is just used to warn users at upload time rather than disable restriction/embargo.
+ // A public store is one in which files may be accessible outside Dataverse and
+ // therefore accessible without regard to Dataverse's access controls related to
+ // restriction and embargoes.
+ // Currently, this is just used to warn users at upload time rather than disable
+ // restriction/embargo.
static protected Map driverPublicAccessMap = new HashMap();
-
+
public boolean canRead() {
return isReadAccess;
}
@@ -94,115 +103,118 @@ public boolean canWrite() {
public abstract String getStorageLocation() throws IOException;
- // This method will return a Path, if the storage method is a
- // local filesystem. Otherwise should throw an IOException.
+ // This method will return a Path, if the storage method is a
+ // local filesystem. Otherwise should throw an IOException.
public abstract Path getFileSystemPath() throws IOException;
-
- public abstract boolean exists() throws IOException;
-
+
+ public abstract boolean exists() throws IOException;
+
public abstract void delete() throws IOException;
-
+
// this method for copies a local Path (for ex., a
// temp file, into this DataAccess location):
public abstract void savePath(Path fileSystemPath) throws IOException;
-
+
// same, for an InputStream:
/**
- * This method copies a local InputStream into this DataAccess location.
- * Note that the S3 driver implementation of this abstract method is problematic,
- * because S3 cannot save an object of an unknown length. This effectively
- * nullifies any benefits of streaming; as we cannot start saving until we
- * have read the entire stream.
- * One way of solving this would be to buffer the entire stream as byte[],
- * in memory, then save it... Which of course would be limited by the amount
- * of memory available, and thus would not work for streams larger than that.
- * So we have eventually decided to save save the stream to a temp file, then
- * save to S3. This is slower, but guaranteed to work on any size stream.
- * An alternative we may want to consider is to not implement this method
- * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException,
- * similarly to how we handle attempts to open OutputStreams, in this and the
- * Swift driver.
- * (Not an issue in either FileAccessIO or SwiftAccessIO implementations)
+ * This method copies a local InputStream into this DataAccess location. Note
+ * that the S3 driver implementation of this abstract method is problematic,
+ * because S3 cannot save an object of an unknown length. This effectively
+ * nullifies any benefits of streaming; as we cannot start saving until we have
+ * read the entire stream. One way of solving this would be to buffer the entire
+ * stream as byte[], in memory, then save it... Which of course would be limited
+ * by the amount of memory available, and thus would not work for streams larger
+ * than that. So we have eventually decided to save save the stream to a temp
+ * file, then save to S3. This is slower, but guaranteed to work on any size
+ * stream. An alternative we may want to consider is to not implement this
+ * method in the S3 driver, and make it throw the
+ * UnsupportedDataAccessOperationException, similarly to how we handle attempts
+ * to open OutputStreams, in this and the Swift driver. (Not an issue in either
+ * FileAccessIO or SwiftAccessIO implementations)
*
* @param inputStream InputStream we want to save
- * @param auxItemTag String representing this Auxiliary type ("extension")
+ * @param auxItemTag String representing this Auxiliary type ("extension")
* @throws IOException if anything goes wrong.
- */
+ */
public abstract void saveInputStream(InputStream inputStream) throws IOException;
+
public abstract void saveInputStream(InputStream inputStream, Long filesize) throws IOException;
-
+
// Auxiliary File Management: (new as of 4.0.2!)
-
+
// An "auxiliary object" is an abstraction of the traditional DVN/Dataverse
- // mechanism of storing extra files related to the man StudyFile/DataFile -
- // such as "saved original" and cached format conversions for tabular files,
- // thumbnails for images, etc. - in physical files with the same file
- // name but various reserved extensions.
-
- //This function retrieves auxiliary files related to datasets, and returns them as inputstream
- public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException ;
-
+ // mechanism of storing extra files related to the man StudyFile/DataFile -
+ // such as "saved original" and cached format conversions for tabular files,
+ // thumbnails for images, etc. - in physical files with the same file
+ // name but various reserved extensions.
+
+ // This function retrieves auxiliary files related to datasets, and returns them
+ // as inputstream
+ public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException;
+
public abstract Channel openAuxChannel(String auxItemTag, DataAccessOption... option) throws IOException;
-
- public abstract long getAuxObjectSize(String auxItemTag) throws IOException;
-
- public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException;
-
- public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException;
-
- public abstract void backupAsAux(String auxItemTag) throws IOException;
-
- public abstract void revertBackupAsAux(String auxItemTag) throws IOException;
-
- // this method copies a local filesystem Path into this DataAccess Auxiliary location:
+
+ public abstract long getAuxObjectSize(String auxItemTag) throws IOException;
+
+ public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException;
+
+ public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException;
+
+ public abstract void backupAsAux(String auxItemTag) throws IOException;
+
+ public abstract void revertBackupAsAux(String auxItemTag) throws IOException;
+
+ // this method copies a local filesystem Path into this DataAccess Auxiliary
+ // location:
public abstract void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException;
-
+
/**
- * This method copies a local InputStream into this DataAccess Auxiliary location.
- * Note that the S3 driver implementation of this abstract method is problematic,
- * because S3 cannot save an object of an unknown length. This effectively
- * nullifies any benefits of streaming; as we cannot start saving until we
- * have read the entire stream.
- * One way of solving this would be to buffer the entire stream as byte[],
- * in memory, then save it... Which of course would be limited by the amount
- * of memory available, and thus would not work for streams larger than that.
- * So we have eventually decided to save save the stream to a temp file, then
- * save to S3. This is slower, but guaranteed to work on any size stream.
- * An alternative we may want to consider is to not implement this method
- * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException,
- * similarly to how we handle attempts to open OutputStreams, in this and the
- * Swift driver.
- * (Not an issue in either FileAccessIO or SwiftAccessIO implementations)
+ * This method copies a local InputStream into this DataAccess Auxiliary
+ * location. Note that the S3 driver implementation of this abstract method is
+ * problematic, because S3 cannot save an object of an unknown length. This
+ * effectively nullifies any benefits of streaming; as we cannot start saving
+ * until we have read the entire stream. One way of solving this would be to
+ * buffer the entire stream as byte[], in memory, then save it... Which of
+ * course would be limited by the amount of memory available, and thus would not
+ * work for streams larger than that. So we have eventually decided to save save
+ * the stream to a temp file, then save to S3. This is slower, but guaranteed to
+ * work on any size stream. An alternative we may want to consider is to not
+ * implement this method in the S3 driver, and make it throw the
+ * UnsupportedDataAccessOperationException, similarly to how we handle attempts
+ * to open OutputStreams, in this and the Swift driver. (Not an issue in either
+ * FileAccessIO or SwiftAccessIO implementations)
*
* @param inputStream InputStream we want to save
- * @param auxItemTag String representing this Auxiliary type ("extension")
+ * @param auxItemTag String representing this Auxiliary type ("extension")
* @throws IOException if anything goes wrong.
- */
- public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException;
- public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException;
-
- public abstract ListlistAuxObjects() throws IOException;
-
- public abstract void deleteAuxObject(String auxItemTag) throws IOException;
-
+ */
+ public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException;
+
+ public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize)
+ throws IOException;
+
+ public abstract List listAuxObjects() throws IOException;
+
+ public abstract void deleteAuxObject(String auxItemTag) throws IOException;
+
public abstract void deleteAllAuxObjects() throws IOException;
private DataAccessRequest req;
private InputStream in = null;
- private OutputStream out;
+ private OutputStream out;
protected Channel channel;
protected DvObject dvObject;
protected String driverId;
- /*private int status;*/
+ /* private int status; */
private long size;
/**
- * Where in the file to seek to when reading (default is zero bytes, the
- * start of the file).
+ * Where in the file to seek to when reading (default is zero bytes, the start
+ * of the file).
*/
private long offset;
-
+
private String mimeType;
private String fileName;
private String varHeader;
@@ -215,8 +227,8 @@ public boolean canWrite() {
private String swiftContainerName;
private boolean isLocalFile = false;
- /*private boolean isRemoteAccess = false;*/
- /*private boolean isHttpAccess = false;*/
+ /* private boolean isRemoteAccess = false; */
+ /* private boolean isHttpAccess = false; */
private boolean noVarHeader = false;
// For remote downloads:
@@ -229,13 +241,14 @@ public boolean canWrite() {
private String remoteUrl;
protected String remoteStoreName = null;
protected URL remoteStoreUrl = null;
-
+
// For HTTP-based downloads:
- /*private GetMethod method = null;
- private Header[] responseHeaders;*/
+ /*
+ * private GetMethod method = null; private Header[] responseHeaders;
+ */
// getters:
-
+
public Channel getChannel() throws IOException {
return channel;
}
@@ -255,16 +268,15 @@ public ReadableByteChannel getReadChannel() throws IOException {
return (ReadableByteChannel) channel;
}
-
- public DvObject getDvObject()
- {
+
+ public DvObject getDvObject() {
return dvObject;
}
-
+
public DataFile getDataFile() {
return (DataFile) dvObject;
}
-
+
public Dataset getDataset() {
return (Dataset) dvObject;
}
@@ -277,9 +289,9 @@ public DataAccessRequest getRequest() {
return req;
}
- /*public int getStatus() {
- return status;
- }*/
+ /*
+ * public int getStatus() { return status; }
+ */
public long getSize() {
return size;
@@ -292,9 +304,9 @@ public long getOffset() {
public InputStream getInputStream() throws IOException {
return in;
}
-
+
public OutputStream getOutputStream() throws IOException {
- return out;
+ return out;
}
public String getMimeType() {
@@ -317,23 +329,23 @@ public String getRemoteUrl() {
return remoteUrl;
}
- public String getTemporarySwiftUrl(){
+ public String getTemporarySwiftUrl() {
return temporarySwiftUrl;
}
-
+
public String getTempUrlExpiry() {
return tempUrlExpiry;
}
-
+
public String getTempUrlSignature() {
return tempUrlSignature;
}
-
+
public String getSwiftFileName() {
return swiftFileName;
}
- public String getSwiftContainerName(){
+ public String getSwiftContainerName() {
return swiftContainerName;
}
@@ -344,34 +356,32 @@ public String getRemoteStoreName() {
public URL getRemoteStoreUrl() {
return remoteStoreUrl;
}
-
- /*public GetMethod getHTTPMethod() {
- return method;
- }
- public Header[] getResponseHeaders() {
- return responseHeaders;
- }*/
+ /*
+ * public GetMethod getHTTPMethod() { return method; }
+ *
+ * public Header[] getResponseHeaders() { return responseHeaders; }
+ */
public boolean isLocalFile() {
return isLocalFile;
}
-
- // "Direct Access" StorageIO is used to access a physical storage
- // location not associated with any dvObject. (For example, when we
- // are deleting a physical file left behind by a DataFile that's
- // already been deleted from the database).
+
+ // "Direct Access" StorageIO is used to access a physical storage
+ // location not associated with any dvObject. (For example, when we
+ // are deleting a physical file left behind by a DataFile that's
+ // already been deleted from the database).
public boolean isDirectAccess() {
- return dvObject == null;
+ return dvObject == null;
}
- /*public boolean isRemoteAccess() {
- return isRemoteAccess;
- }*/
+ /*
+ * public boolean isRemoteAccess() { return isRemoteAccess; }
+ */
- /*public boolean isHttpAccess() {
- return isHttpAccess;
- }*/
+ /*
+ * public boolean isHttpAccess() { return isHttpAccess; }
+ */
public boolean isDownloadSupported() {
return isDownloadSupported;
@@ -398,9 +408,9 @@ public void setRequest(DataAccessRequest dar) {
req = dar;
}
- /*public void setStatus(int s) {
- status = s;
- }*/
+ /*
+ * public void setStatus(int s) { status = s; }
+ */
public void setSize(long s) {
size = s;
@@ -421,11 +431,11 @@ public void setOffset(long offset) throws IOException {
public void setInputStream(InputStream is) {
in = is;
}
-
+
public void setOutputStream(OutputStream os) {
- out = os;
- }
-
+ out = os;
+ }
+
public void setChannel(Channel c) {
channel = c;
}
@@ -450,45 +460,46 @@ public void setRemoteUrl(String u) {
remoteUrl = u;
}
- public void setTemporarySwiftUrl(String u){
+ public void setTemporarySwiftUrl(String u) {
temporarySwiftUrl = u;
}
-
- public void setTempUrlExpiry(Long u){
+
+ public void setTempUrlExpiry(Long u) {
tempUrlExpiry = String.valueOf(u);
}
-
+
public void setSwiftFileName(String u) {
swiftFileName = u;
}
-
- public void setTempUrlSignature(String u){
+
+ public void setTempUrlSignature(String u) {
tempUrlSignature = u;
}
- public void setSwiftContainerName(String u){
+ public void setSwiftContainerName(String u) {
swiftContainerName = u;
}
- /*public void setHTTPMethod(GetMethod hm) {
- method = hm;
- }*/
+ /*
+ * public void setHTTPMethod(GetMethod hm) { method = hm; }
+ */
- /*public void setResponseHeaders(Header[] headers) {
- responseHeaders = headers;
- }*/
+ /*
+ * public void setResponseHeaders(Header[] headers) { responseHeaders = headers;
+ * }
+ */
public void setIsLocalFile(boolean f) {
isLocalFile = f;
}
- /*public void setIsRemoteAccess(boolean r) {
- isRemoteAccess = r;
- }*/
+ /*
+ * public void setIsRemoteAccess(boolean r) { isRemoteAccess = r; }
+ */
- /*public void setIsHttpAccess(boolean h) {
- isHttpAccess = h;
- }*/
+ /*
+ * public void setIsHttpAccess(boolean h) { isHttpAccess = h; }
+ */
public void setIsDownloadSupported(boolean d) {
isDownloadSupported = d;
@@ -506,12 +517,11 @@ public void setNoVarHeader(boolean nvh) {
noVarHeader = nvh;
}
- // connection management methods:
- /*public void releaseConnection() {
- if (method != null) {
- method.releaseConnection();
- }
- }*/
+ // connection management methods:
+ /*
+ * public void releaseConnection() { if (method != null) {
+ * method.releaseConnection(); } }
+ */
public void closeInputStream() {
if (in != null) {
@@ -528,7 +538,7 @@ public void closeInputStream() {
}
}
}
-
+
public String generateVariableHeader(List dvs) {
String varHeader = null;
@@ -571,14 +581,14 @@ protected boolean isWriteAccessRequested(DataAccessOption... options) throws IOE
return false;
}
- public boolean isBelowIngestSizeLimit() {
- long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1"));
- if(limit>0 && getSize()>limit) {
- return false;
- } else {
- return true;
- }
- }
+ public boolean isBelowIngestSizeLimit() {
+ long limit = Long.parseLong(getConfigParam(INGEST_SIZE_LIMIT, "-1"));
+ if (limit > 0 && getSize() > limit) {
+ return false;
+ } else {
+ return true;
+ }
+ }
public boolean downloadRedirectEnabled() {
return false;
@@ -587,36 +597,43 @@ public boolean downloadRedirectEnabled() {
public boolean downloadRedirectEnabled(String auxObjectTag) {
return false;
}
-
- public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException {
+
+ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName)
+ throws IOException {
throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type");
}
-
public static boolean isPublicStore(String driverId) {
- //Read once and cache
- if(!driverPublicAccessMap.containsKey(driverId)) {
- driverPublicAccessMap.put(driverId, Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public")));
+ // Read once and cache
+ if (!driverPublicAccessMap.containsKey(driverId)) {
+ driverPublicAccessMap.put(driverId,
+ Boolean.parseBoolean(getConfigParamForDriver(driverId, PUBLIC)));
}
return driverPublicAccessMap.get(driverId);
}
-
+
public static String getDriverPrefix(String driverId) {
- return driverId+ DataAccess.SEPARATOR;
+ return driverId + DataAccess.SEPARATOR;
}
-
+
public static boolean isDirectUploadEnabled(String driverId) {
- return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) ||
- Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band"));
+ return (getConfigParamForDriver(driverId, TYPE).equals(DataAccess.S3)
+ && Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_REDIRECT)))
+ || Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_OUT_OF_BAND));
+ }
+
+ //True by default, Stores (e.g. RemoteOverlay, Globus) can set this false to stop attempts to read bytes
+ public static boolean isDataverseAccessible(String driverId) {
+ return (true && !Boolean.parseBoolean(getConfigParamForDriver(driverId, DATAVERSE_INACCESSIBLE)));
}
- //Check that storageIdentifier is consistent with store's config
- //False will prevent direct uploads
- protected static boolean isValidIdentifier(String driverId, String storageId) {
+ // Check that storageIdentifier is consistent with store's config
+ // False will prevent direct uploads
+ static boolean isValidIdentifier(String driverId, String storageId) {
return false;
}
-
- //Utility to verify the standard UUID pattern for stored files.
+
+ // Utility to verify the standard UUID pattern for stored files.
protected static boolean usesStandardNamePattern(String identifier) {
Pattern r = Pattern.compile("^[a-f,0-9]{11}-[a-f,0-9]{12}$");
@@ -626,4 +643,44 @@ protected static boolean usesStandardNamePattern(String identifier) {
public abstract List cleanUp(Predicate filter, boolean dryRun) throws IOException;
+ /**
+ * A storage-type-specific mechanism for retrieving the size of a file. Intended
+ * primarily as a way to get the size before it has been recorded in the
+ * database, e.g. during direct/out-of-band transfers but could be useful to
+ * check the db values.
+ *
+ * @return file size in bytes
+ * @throws IOException
+ */
+ public abstract long retrieveSizeFromMedia() throws IOException;
+
+
+ /* Convenience methods to get a driver-specific parameter
+ *
+ * - with or without a default
+ * - static or per object
+ *
+ * @param parameterName
+ * @return the parameter value
+ */
+
+ protected String getConfigParam(String parameterName) {
+ return getConfigParam(parameterName, null);
+ }
+
+ protected String getConfigParam(String parameterName, String defaultValue) {
+ return getConfigParamForDriver(this.driverId, parameterName, defaultValue);
+ }
+
+ protected static String getConfigParamForDriver(String driverId, String parameterName) {
+ return getConfigParamForDriver(driverId, parameterName, null);
+ }
+ protected static String getConfigParamForDriver(String driverId, String parameterName, String defaultValue) {
+ return System.getProperty("dataverse.files." + driverId + "." + parameterName, defaultValue);
+ }
+
+ public static String getNewIdentifier(String driverId) {
+ return driverId + DataAccess.SEPARATOR + FileUtil.generateStorageIdentifier();
+ }
+
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
index 6c84009de3e..717f46ffd60 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java
@@ -50,6 +50,17 @@ public class SwiftAccessIO extends StorageIO {
private String swiftLocation;
private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO");
+ private static final String IS_PUBLIC_CONTAINER = "isPublicContainer";
+ private static final String FOLDER_PATH_SEPARATOR = "folderPathSeparator";
+ private static final String DEFAULT_ENDPOINT = "defaultEndpoint";
+ private static final String TEMPORARY_URL_EXPIRY_TIME = "temporaryUrlExpiryTime";
+ private static final String AUTH_URL = "authUrl";
+ private static final String USERNAME = "username";
+ private static final String PASSWORD = "password";
+ private static final String TENANT = "tenant";
+ private static final String AUTH_TYPE = "authType";
+ private static final String HASH_KEY = "hashKey";
+ private static final String ENDPOINT = "endpoint";
public SwiftAccessIO() {
//Partially functional StorageIO object - constructor only for testing
@@ -70,10 +81,10 @@ public SwiftAccessIO(String swiftLocation, String driverId) {
}
private void readSettings() {
- isPublicContainer = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".isPublicContainer", "true"));
- swiftFolderPathSeparator = System.getProperty("dataverse.files." + this.driverId + ".folderPathSeparator", "_");
- swiftDefaultEndpoint = System.getProperty("dataverse.files." + this.driverId + ".defaultEndpoint");
- tempUrlExpires = Integer.parseInt(System.getProperty("dataverse.files." + this.driverId + ".temporaryUrlExpiryTime", "60"));
+ isPublicContainer = Boolean.parseBoolean(getConfigParam(IS_PUBLIC_CONTAINER, "true"));
+ swiftFolderPathSeparator = getConfigParam(FOLDER_PATH_SEPARATOR, "_");
+ swiftDefaultEndpoint = getConfigParam(DEFAULT_ENDPOINT);
+ tempUrlExpires = Integer.parseInt(getConfigParam(TEMPORARY_URL_EXPIRY_TIME, "60"));
}
@@ -131,7 +142,8 @@ public void open(DataAccessOption... options) throws IOException {
&& dataFile.getContentType().equals("text/tab-separated-values")
&& dataFile.isTabularData()
&& dataFile.getDataTable() != null
- && (!this.noVarHeader())) {
+ && (!this.noVarHeader())
+ && (!dataFile.getDataTable().isStoredWithVariableHeader())) {
List datavariables = dataFile.getDataTable().getDataVariables();
String varHeaderLine = generateVariableHeader(datavariables);
@@ -740,12 +752,12 @@ private StoredObject openSwiftAuxFile(boolean writeAccess, String auxItemTag) th
}
Account authenticateWithSwift(String swiftEndPoint) throws IOException {
- String swiftEndPointAuthUrl = System.getProperty("dataverse.files." + this.driverId + ".authUrl." + swiftEndPoint);
- String swiftEndPointUsername = System.getProperty("dataverse.files." + this.driverId + ".username." + swiftEndPoint);
- String swiftEndPointSecretKey = System.getProperty("dataverse.files." + this.driverId + ".password." + swiftEndPoint);
- String swiftEndPointTenantName = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint);
- String swiftEndPointAuthMethod = System.getProperty("dataverse.files." + this.driverId + ".authType." + swiftEndPoint);
- String swiftEndPointTenantId = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint);
+ String swiftEndPointAuthUrl = getConfigParam(AUTH_URL + "." + swiftEndPoint);
+ String swiftEndPointUsername = getConfigParam(USERNAME + "." + swiftEndPoint);
+ String swiftEndPointSecretKey = getConfigParam(PASSWORD + "." + swiftEndPoint);
+ String swiftEndPointTenantName = getConfigParam(TENANT + "." + swiftEndPoint);
+ String swiftEndPointAuthMethod = getConfigParam(AUTH_TYPE + "." + swiftEndPoint);
+ String swiftEndPointTenantId = getConfigParam(TENANT + "." + swiftEndPoint);
if (swiftEndPointAuthUrl == null || swiftEndPointUsername == null || swiftEndPointSecretKey == null
|| "".equals(swiftEndPointAuthUrl) || "".equals(swiftEndPointUsername) || "".equals(swiftEndPointSecretKey)) {
@@ -814,7 +826,7 @@ private String getSwiftFileURI(StoredObject fileObject) throws IOException {
private String hmac = null;
public String generateTempUrlSignature(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException {
if (hmac == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) {
- String secretKey = System.getProperty("dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint);
+ String secretKey = getConfigParam(HASH_KEY + "." + swiftEndPoint);
if (secretKey == null) {
throw new IOException("Please input a hash key under dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint);
}
@@ -841,7 +853,7 @@ public long generateTempUrlExpiry(int duration, long currentTime) {
private String temporaryUrl = null;
private String generateTemporarySwiftUrl(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException {
- String baseUrl = System.getProperty("dataverse.files." + this.driverId + ".endpoint." + swiftEndPoint);
+ String baseUrl = getConfigParam(ENDPOINT + "." + swiftEndPoint);
String path = "/v1/" + containerName + "/" + objectName;
if (temporaryUrl == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) {
@@ -954,4 +966,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE
}
return toDelete;
}
+
+ @Override
+ public long retrieveSizeFromMedia() throws IOException {
+ throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver.");
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java
index 782f7f3a52d..c369010c8cd 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java
@@ -60,305 +60,26 @@
public class TabularSubsetGenerator implements SubsetGenerator {
- private static Logger dbgLog = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName());
+ private static Logger logger = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName());
- private static int COLUMN_TYPE_STRING = 1;
- private static int COLUMN_TYPE_LONG = 2;
- private static int COLUMN_TYPE_DOUBLE = 3;
- private static int COLUMN_TYPE_FLOAT = 4;
-
- private static int MAX_COLUMN_BUFFER = 8192;
-
- private FileChannel fileChannel = null;
-
- private int varcount;
- private int casecount;
- private int subsetcount;
-
- private byte[][] columnEntries = null;
-
-
- private ByteBuffer[] columnByteBuffers;
- private int[] columnBufferSizes;
- private int[] columnBufferOffsets;
-
- private long[] columnStartOffsets;
- private long[] columnTotalOffsets;
- private long[] columnTotalLengths;
-
- public TabularSubsetGenerator() {
-
- }
-
- public TabularSubsetGenerator (DataFile datafile, List variables) throws IOException {
- if (!datafile.isTabularData()) {
- throw new IOException("DataFile is not tabular data.");
- }
-
- setVarCount(datafile.getDataTable().getVarQuantity().intValue());
- setCaseCount(datafile.getDataTable().getCaseQuantity().intValue());
-
-
-
- StorageIO dataAccess = datafile.getStorageIO();
- if (!dataAccess.isLocalFile()) {
- throw new IOException("Subsetting is supported on local files only!");
- }
-
- //File tabfile = datafile.getFileSystemLocation().toFile();
- File tabfile = dataAccess.getFileSystemPath().toFile();
+ //private static int MAX_COLUMN_BUFFER = 8192;
- File rotatedImageFile = getRotatedImage(tabfile, getVarCount(), getCaseCount());
- long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, getVarCount(), getCaseCount());
-
- fileChannel = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ));
-
- if (variables == null || variables.size() < 1 || variables.size() > getVarCount()) {
- throw new IOException("Illegal number of variables in the subset request");
- }
-
- subsetcount = variables.size();
- columnTotalOffsets = new long[subsetcount];
- columnTotalLengths = new long[subsetcount];
- columnByteBuffers = new ByteBuffer[subsetcount];
-
-
+ public TabularSubsetGenerator() {
- if (subsetcount == 1) {
- if (!datafile.getDataTable().getId().equals(variables.get(0).getDataTable().getId())) {
- throw new IOException("Variable in the subset request does not belong to the datafile.");
- }
- dbgLog.fine("single variable subset; setting fileChannel position to "+extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder()));
- fileChannel.position(extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder()));
- columnTotalLengths[0] = extractColumnLength(columnEndOffsets, variables.get(0).getFileOrder());
- columnTotalOffsets[0] = 0;
- } else {
- columnEntries = new byte[subsetcount][];
-
- columnBufferSizes = new int[subsetcount];
- columnBufferOffsets = new int[subsetcount];
- columnStartOffsets = new long[subsetcount];
-
- int i = 0;
- for (DataVariable var : variables) {
- if (!datafile.getDataTable().getId().equals(var.getDataTable().getId())) {
- throw new IOException("Variable in the subset request does not belong to the datafile.");
- }
- columnByteBuffers[i] = ByteBuffer.allocate(MAX_COLUMN_BUFFER);
- columnTotalLengths[i] = extractColumnLength(columnEndOffsets, var.getFileOrder());
- columnStartOffsets[i] = extractColumnOffset(columnEndOffsets, var.getFileOrder());
- if (columnTotalLengths[i] < MAX_COLUMN_BUFFER) {
- columnByteBuffers[i].limit((int)columnTotalLengths[i]);
- }
- fileChannel.position(columnStartOffsets[i]);
- columnBufferSizes[i] = fileChannel.read(columnByteBuffers[i]);
- columnBufferOffsets[i] = 0;
- columnTotalOffsets[i] = columnBufferSizes[i];
- i++;
- }
- }
- }
-
- private int getVarCount() {
- return varcount;
}
- private void setVarCount(int varcount) {
- this.varcount = varcount;
- }
-
- private int getCaseCount() {
- return casecount;
- }
-
- private void setCaseCount(int casecount) {
- this.casecount = casecount;
- }
-
-
- /*
- * Note that this method operates on the *absolute* column number, i.e.
- * the number of the physical column in the tabular file. This is stored
- * in DataVariable.FileOrder.
- * This "column number" should not be confused with the number of column
- * in the subset request; a user can request any number of variable
- * columns, in an order that doesn't have to follow the physical order
- * of the columns in the file.
- */
- private long extractColumnOffset(long[] columnEndOffsets, int column) throws IOException {
- if (columnEndOffsets == null || columnEndOffsets.length <= column) {
- throw new IOException("Offsets table not initialized; or column out of bounds.");
- }
- long columnOffset;
-
- if (column > 0) {
- columnOffset = columnEndOffsets[column - 1];
- } else {
- columnOffset = getVarCount() * 8;
- }
- return columnOffset;
- }
-
- /*
- * See the comment for the method above.
+ /**
+ * This class used to be much more complex. There were methods for subsetting
+ * from fixed-width field files; including using the optimized, "90 deg. rotated"
+ * versions of such files (i.e. you create a *columns-wise* copy of your data
+ * file in which the columns are stored sequentially, and a table of byte
+ * offsets of each column. You can then read individual variable columns
+ * for cheap; at the expense of doubling the storage size of your tabular
+ * data files. These methods were not used, so they were deleted (in Jan. 2024
+ * prior to 6.2.
+ * Please consult git history if you are interested in looking at that code.
*/
- private long extractColumnLength(long[] columnEndOffsets, int column) throws IOException {
- if (columnEndOffsets == null || columnEndOffsets.length <= column) {
- throw new IOException("Offsets table not initialized; or column out of bounds.");
- }
- long columnLength;
-
- if (column > 0) {
- columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1];
- } else {
- columnLength = columnEndOffsets[0] - varcount * 8;
- }
-
- return columnLength;
- }
-
-
- private void bufferMoreColumnBytes(int column) throws IOException {
- if (columnTotalOffsets[column] >= columnTotalLengths[column]) {
- throw new IOException("attempt to buffer bytes past the column boundary");
- }
- fileChannel.position(columnStartOffsets[column] + columnTotalOffsets[column]);
-
- columnByteBuffers[column].clear();
- if (columnTotalLengths[column] < columnTotalOffsets[column] + MAX_COLUMN_BUFFER) {
- dbgLog.fine("Limiting the buffer to "+(columnTotalLengths[column] - columnTotalOffsets[column])+" bytes");
- columnByteBuffers[column].limit((int) (columnTotalLengths[column] - columnTotalOffsets[column]));
- }
- columnBufferSizes[column] = fileChannel.read(columnByteBuffers[column]);
- dbgLog.fine("Read "+columnBufferSizes[column]+" bytes for subset column "+column);
- columnBufferOffsets[column] = 0;
- columnTotalOffsets[column] += columnBufferSizes[column];
- }
-
- public byte[] readColumnEntryBytes(int column) {
- return readColumnEntryBytes(column, true);
- }
-
-
- public byte[] readColumnEntryBytes(int column, boolean addTabs) {
- byte[] leftover = null;
- byte[] ret = null;
-
- if (columnBufferOffsets[column] >= columnBufferSizes[column]) {
- try {
- bufferMoreColumnBytes(column);
- if (columnBufferSizes[column] < 1) {
- return null;
- }
- } catch (IOException ioe) {
- return null;
- }
- }
-
- int byteindex = columnBufferOffsets[column];
- try {
- while (columnByteBuffers[column].array()[byteindex] != '\n') {
- byteindex++;
- if (byteindex == columnBufferSizes[column]) {
- // save the leftover:
- if (leftover == null) {
- leftover = new byte[columnBufferSizes[column] - columnBufferOffsets[column]];
- System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], leftover, 0, columnBufferSizes[column] - columnBufferOffsets[column]);
- } else {
- byte[] merged = new byte[leftover.length + columnBufferSizes[column]];
-
- System.arraycopy(leftover, 0, merged, 0, leftover.length);
- System.arraycopy(columnByteBuffers[column].array(), 0, merged, leftover.length, columnBufferSizes[column]);
- leftover = merged;
- merged = null;
- }
- // read more bytes:
- bufferMoreColumnBytes(column);
- if (columnBufferSizes[column] < 1) {
- return null;
- }
- byteindex = 0;
- }
- }
-
- // presumably, we have found our '\n':
- if (leftover == null) {
- ret = new byte[byteindex - columnBufferOffsets[column] + 1];
- System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], ret, 0, byteindex - columnBufferOffsets[column] + 1);
- } else {
- ret = new byte[leftover.length + byteindex + 1];
- System.arraycopy(leftover, 0, ret, 0, leftover.length);
- System.arraycopy(columnByteBuffers[column].array(), 0, ret, leftover.length, byteindex + 1);
- }
-
- } catch (IOException ioe) {
- return null;
- }
-
- columnBufferOffsets[column] = (byteindex + 1);
-
- if (column < columnBufferOffsets.length - 1) {
- ret[ret.length - 1] = '\t';
- }
- return ret;
- }
-
- public int readSingleColumnSubset(byte[] buffer) throws IOException {
- if (columnTotalOffsets[0] == columnTotalLengths[0]) {
- return -1;
- }
-
- if (columnByteBuffers[0] == null) {
- dbgLog.fine("allocating single column subset buffer.");
- columnByteBuffers[0] = ByteBuffer.allocate(buffer.length);
- }
-
- int bytesread = fileChannel.read(columnByteBuffers[0]);
- dbgLog.fine("single column subset: read "+bytesread+" bytes.");
- if (columnTotalOffsets[0] + bytesread > columnTotalLengths[0]) {
- bytesread = (int)(columnTotalLengths[0] - columnTotalOffsets[0]);
- }
- System.arraycopy(columnByteBuffers[0].array(), 0, buffer, 0, bytesread);
-
- columnTotalOffsets[0] += bytesread;
- columnByteBuffers[0].clear();
- return bytesread > 0 ? bytesread : -1;
- }
-
-
- public byte[] readSubsetLineBytes() throws IOException {
- byte[] ret = null;
- int total = 0;
- for (int i = 0; i < subsetcount; i++) {
- columnEntries[i] = readColumnEntryBytes(i);
- if (columnEntries[i] == null) {
- throw new IOException("Failed to read subset line entry");
- }
- total += columnEntries[i].length;
- }
-
- ret = new byte[total];
- int offset = 0;
- for (int i = 0; i < subsetcount; i++) {
- System.arraycopy(columnEntries[i], 0, ret, offset, columnEntries[i].length);
- offset += columnEntries[i].length;
- }
- dbgLog.fine("line: "+new String(ret));
- return ret;
- }
-
-
- public void close() {
- if (fileChannel != null) {
- try {
- fileChannel.close();
- } catch (IOException ioe) {
- // don't care.
- }
- }
- }
-
public void subsetFile(String infile, String outfile, List columns, Long numCases) {
subsetFile(infile, outfile, columns, numCases, "\t");
}
@@ -411,11 +132,15 @@ public void subsetFile(InputStream in, String outfile, List columns, Lo
* files, OK to use on small files:
*/
- public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) {
+ public static Double[] subsetDoubleVector(InputStream in, int column, int numCases, boolean skipHeader) {
Double[] retVector = new Double[numCases];
try (Scanner scanner = new Scanner(in)) {
scanner.useDelimiter("\\n");
+ if (skipHeader) {
+ skipFirstLine(scanner);
+ }
+
for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
if (scanner.hasNext()) {
String[] line = (scanner.next()).split("\t", -1);
@@ -463,11 +188,15 @@ public static Double[] subsetDoubleVector(InputStream in, int column, int numCas
* Same deal as with the method above - straightforward, but (potentially) slow.
* Not a resource hog though - will only try to store one vector in memory.
*/
- public static Float[] subsetFloatVector(InputStream in, int column, int numCases) {
+ public static Float[] subsetFloatVector(InputStream in, int column, int numCases, boolean skipHeader) {
Float[] retVector = new Float[numCases];
try (Scanner scanner = new Scanner(in)) {
scanner.useDelimiter("\\n");
+ if (skipHeader) {
+ skipFirstLine(scanner);
+ }
+
for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
if (scanner.hasNext()) {
String[] line = (scanner.next()).split("\t", -1);
@@ -513,11 +242,15 @@ public static Float[] subsetFloatVector(InputStream in, int column, int numCases
* Same deal as with the method above - straightforward, but (potentially) slow.
* Not a resource hog though - will only try to store one vector in memory.
*/
- public static Long[] subsetLongVector(InputStream in, int column, int numCases) {
+ public static Long[] subsetLongVector(InputStream in, int column, int numCases, boolean skipHeader) {
Long[] retVector = new Long[numCases];
try (Scanner scanner = new Scanner(in)) {
scanner.useDelimiter("\\n");
+ if (skipHeader) {
+ skipFirstLine(scanner);
+ }
+
for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
if (scanner.hasNext()) {
String[] line = (scanner.next()).split("\t", -1);
@@ -549,11 +282,15 @@ public static Long[] subsetLongVector(InputStream in, int column, int numCases)
* Same deal as with the method above - straightforward, but (potentially) slow.
* Not a resource hog though - will only try to store one vector in memory.
*/
- public static String[] subsetStringVector(InputStream in, int column, int numCases) {
+ public static String[] subsetStringVector(InputStream in, int column, int numCases, boolean skipHeader) {
String[] retVector = new String[numCases];
try (Scanner scanner = new Scanner(in)) {
scanner.useDelimiter("\\n");
+ if (skipHeader) {
+ skipFirstLine(scanner);
+ }
+
for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
if (scanner.hasNext()) {
String[] line = (scanner.next()).split("\t", -1);
@@ -621,819 +358,10 @@ public static String[] subsetStringVector(InputStream in, int column, int numCas
}
- /*
- * Straightforward method for subsetting a tab-delimited data file, extracting
- * all the columns representing continuous variables and returning them as
- * a 2-dimensional array of Doubles;
- * Inefficient on large files, OK to use on small ones.
- */
- public static Double[][] subsetDoubleVectors(InputStream in, Set columns, int numCases) throws IOException {
- Double[][] retVector = new Double[columns.size()][numCases];
- try (Scanner scanner = new Scanner(in)) {
- scanner.useDelimiter("\\n");
-
- for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
- if (scanner.hasNext()) {
- String[] line = (scanner.next()).split("\t", -1);
- int j = 0;
- for (Integer i : columns) {
- try {
- // TODO: verify that NaN and +-Inf are going to be
- // handled correctly here! -- L.A.
- // NO, "+-Inf" is not handled correctly; see the
- // comment further down below.
- retVector[j][caseIndex] = new Double(line[i]);
- } catch (NumberFormatException ex) {
- retVector[j][caseIndex] = null; // missing value
- }
- j++;
- }
- } else {
- throw new IOException("Tab file has fewer rows than the stored number of cases!");
- }
- }
-
- int tailIndex = numCases;
- while (scanner.hasNext()) {
- String nextLine = scanner.next();
- if (!"".equals(nextLine)) {
- throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine);
- }
- tailIndex++;
- }
-
- }
- return retVector;
-
- }
-
- public String[] subsetStringVector(DataFile datafile, int column) throws IOException {
- return (String[])subsetObjectVector(datafile, column, COLUMN_TYPE_STRING);
- }
-
- public Double[] subsetDoubleVector(DataFile datafile, int column) throws IOException {
- return (Double[])subsetObjectVector(datafile, column, COLUMN_TYPE_DOUBLE);
- }
-
- public Long[] subsetLongVector(DataFile datafile, int column) throws IOException {
- return (Long[])subsetObjectVector(datafile, column, COLUMN_TYPE_LONG);
- }
-
- // Float methods are temporary;
- // In normal operations we'll be treating all the floating point types as
- // doubles. I need to be able to handle floats for some 4.0 vs 3.* ingest
- // tests. -- L.A.
-
- public Float[] subsetFloatVector(DataFile datafile, int column) throws IOException {
- return (Float[])subsetObjectVector(datafile, column, COLUMN_TYPE_FLOAT);
- }
-
- public String[] subsetStringVector(File tabfile, int column, int varcount, int casecount) throws IOException {
- return (String[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_STRING);
- }
-
- public Double[] subsetDoubleVector(File tabfile, int column, int varcount, int casecount) throws IOException {
- return (Double[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_DOUBLE);
- }
-
- public Long[] subsetLongVector(File tabfile, int column, int varcount, int casecount) throws IOException {
- return (Long[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_LONG);
- }
-
- public Float[] subsetFloatVector(File tabfile, int column, int varcount, int casecount) throws IOException {
- return (Float[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_FLOAT);
- }
-
- public Object[] subsetObjectVector(DataFile dataFile, int column, int columntype) throws IOException {
- if (!dataFile.isTabularData()) {
- throw new IOException("DataFile is not tabular data.");
- }
-
- int varcount = dataFile.getDataTable().getVarQuantity().intValue();
- int casecount = dataFile.getDataTable().getCaseQuantity().intValue();
-
- if (column >= varcount) {
- throw new IOException("Column "+column+" is out of bounds.");
- }
-
- StorageIO dataAccess = dataFile.getStorageIO();
- if (!dataAccess.isLocalFile()) {
- throw new IOException("Subsetting is supported on local files only!");
- }
-
- //File tabfile = datafile.getFileSystemLocation().toFile();
- File tabfile = dataAccess.getFileSystemPath().toFile();
-
- if (columntype == COLUMN_TYPE_STRING) {
- String filename = dataFile.getFileMetadata().getLabel();
- if (filename != null) {
- filename = filename.replaceFirst("^_", "");
- Integer fnumvalue = null;
- try {
- fnumvalue = new Integer(filename);
- } catch (Exception ex){
- fnumvalue = null;
- }
- if (fnumvalue != null) {
- //if ((fnumvalue.intValue() < 112497)) { // && (fnumvalue.intValue() > 60015)) {
- if ((fnumvalue.intValue() < 111931)) { // && (fnumvalue.intValue() > 60015)) {
- if (!(fnumvalue.intValue() == 60007
- || fnumvalue.intValue() == 59997
- || fnumvalue.intValue() == 60015
- || fnumvalue.intValue() == 59948
- || fnumvalue.intValue() == 60012
- || fnumvalue.intValue() == 52585
- || fnumvalue.intValue() == 60005
- || fnumvalue.intValue() == 60002
- || fnumvalue.intValue() == 59954
- || fnumvalue.intValue() == 60008
- || fnumvalue.intValue() == 54972
- || fnumvalue.intValue() == 55010
- || fnumvalue.intValue() == 54996
- || fnumvalue.intValue() == 53527
- || fnumvalue.intValue() == 53546
- || fnumvalue.intValue() == 55002
- || fnumvalue.intValue() == 55006
- || fnumvalue.intValue() == 54998
- || fnumvalue.intValue() == 52552
- // SPSS/SAV cases with similar issue - compat mode must be disabled
- //|| fnumvalue.intValue() == 101826 // temporary - tricky file with accents and v. 16...
- || fnumvalue.intValue() == 54618 // another SAV file, with long strings...
- || fnumvalue.intValue() == 54619 // [same]
- || fnumvalue.intValue() == 57983
- || fnumvalue.intValue() == 58262
- || fnumvalue.intValue() == 58288
- || fnumvalue.intValue() == 58656
- || fnumvalue.intValue() == 59144
- // || fnumvalue.intValue() == 69626 [nope!]
- )) {
- dbgLog.info("\"Old\" file name detected; using \"compatibility mode\" for a character vector subset;");
- return subsetObjectVector(tabfile, column, varcount, casecount, columntype, true);
- }
- }
- }
- }
+ private static void skipFirstLine(Scanner scanner) {
+ if (!scanner.hasNext()) {
+ throw new RuntimeException("Failed to read the variable name header line from the tab-delimited file!");
}
-
- return subsetObjectVector(tabfile, column, varcount, casecount, columntype);
- }
-
- public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype) throws IOException {
- return subsetObjectVector(tabfile, column, varcount, casecount, columntype, false);
- }
-
-
-
- public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype, boolean compatmode) throws IOException {
-
- Object[] retVector = null;
-
- boolean isString = false;
- boolean isDouble = false;
- boolean isLong = false;
- boolean isFloat = false;
-
- //Locale loc = new Locale("en", "US");
-
- if (columntype == COLUMN_TYPE_STRING) {
- isString = true;
- retVector = new String[casecount];
- } else if (columntype == COLUMN_TYPE_DOUBLE) {
- isDouble = true;
- retVector = new Double[casecount];
- } else if (columntype == COLUMN_TYPE_LONG) {
- isLong = true;
- retVector = new Long[casecount];
- } else if (columntype == COLUMN_TYPE_FLOAT){
- isFloat = true;
- retVector = new Float[casecount];
- } else {
- throw new IOException("Unsupported column type: "+columntype);
- }
-
- File rotatedImageFile = getRotatedImage(tabfile, varcount, casecount);
- long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, varcount, casecount);
- long columnOffset = 0;
- long columnLength = 0;
-
- if (column > 0) {
- columnOffset = columnEndOffsets[column - 1];
- columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1];
- } else {
- columnOffset = varcount * 8;
- columnLength = columnEndOffsets[0] - varcount * 8;
- }
- int caseindex = 0;
-
- try (FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()),
- StandardOpenOption.READ))) {
- fc.position(columnOffset);
- int MAX_COLUMN_BUFFER = 8192;
-
- ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER);
-
- if (columnLength < MAX_COLUMN_BUFFER) {
- in.limit((int) (columnLength));
- }
-
- long bytesRead = 0;
- long bytesReadTotal = 0;
-
- int byteoffset = 0;
- byte[] leftover = null;
-
- while (bytesReadTotal < columnLength) {
- bytesRead = fc.read(in);
- byte[] columnBytes = in.array();
- int bytecount = 0;
-
- while (bytecount < bytesRead) {
- if (columnBytes[bytecount] == '\n') {
- /*
- String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8");
-
- if (leftover != null) {
- String leftoverString = new String (leftover, "UTF8");
- token = leftoverString + token;
- leftover = null;
- }
- */
- /*
- * Note that the way I was doing it at first - above -
- * was not quite the correct way - because I was creating UTF8
- * strings from the leftover bytes, and the bytes in the
- * current buffer *separately*; which means, if a multi-byte
- * UTF8 character got split in the middle between one buffer
- * and the next, both chunks of it would become junk
- * characters, on each side!
- * The correct way of doing it, of course, is to create a
- * merged byte buffer, and then turn it into a UTF8 string.
- * -- L.A. 4.0
- */
- String token = null;
-
- if (leftover == null) {
- token = new String(columnBytes, byteoffset, bytecount - byteoffset, "UTF8");
- } else {
- byte[] merged = new byte[leftover.length + bytecount - byteoffset];
-
- System.arraycopy(leftover, 0, merged, 0, leftover.length);
- System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount - byteoffset);
- token = new String(merged, "UTF8");
- leftover = null;
- merged = null;
- }
-
- if (isString) {
- if ("".equals(token)) {
- // An empty string is a string missing value!
- // An empty string in quotes is an empty string!
- retVector[caseindex] = null;
- } else {
- // Strip the outer quotes:
- token = token.replaceFirst("^\\\"", "");
- token = token.replaceFirst("\\\"$", "");
-
- // We need to restore the special characters that
- // are stored in tab files escaped - quotes, new lines
- // and tabs. Before we do that however, we need to
- // take care of any escaped backslashes stored in
- // the tab file. I.e., "foo\t" should be transformed
- // to "foo"; but "foo\\t" should be transformed
- // to "foo\t". This way new lines and tabs that were
- // already escaped in the original data are not
- // going to be transformed to unescaped tab and
- // new line characters!
-
- String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2);
-
- // (note that it's important to use the 2-argument version
- // of String.split(), and set the limit argument to a
- // negative value; otherwise any trailing backslashes
- // are lost.)
-
- for (int i = 0; i < splitTokens.length; i++) {
- splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\"");
- splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t");
- splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n");
- splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r");
- }
- // TODO:
- // Make (some of?) the above optional; for ex., we
- // do need to restore the newlines when calculating UNFs;
- // But if we are subsetting these vectors in order to
- // create a new tab-delimited file, they will
- // actually break things! -- L.A. Jul. 28 2014
-
- token = StringUtils.join(splitTokens, '\\');
-
- // "compatibility mode" - a hack, to be able to produce
- // unfs identical to those produced by the "early"
- // unf5 jar; will be removed in production 4.0.
- // -- L.A. (TODO: ...)
- if (compatmode && !"".equals(token)) {
- if (token.length() > 128) {
- if ("".equals(token.trim())) {
- // don't ask...
- token = token.substring(0, 129);
- } else {
- token = token.substring(0, 128);
- // token = String.format(loc, "%.128s", token);
- token = token.trim();
- // dbgLog.info("formatted and trimmed: "+token);
- }
- } else {
- if ("".equals(token.trim())) {
- // again, don't ask;
- // - this replicates some bugginness
- // that happens inside unf5;
- token = "null";
- } else {
- token = token.trim();
- }
- }
- }
-
- retVector[caseindex] = token;
- }
- } else if (isDouble) {
- try {
- // TODO: verify that NaN and +-Inf are
- // handled correctly here! -- L.A.
- // Verified: new Double("nan") works correctly,
- // resulting in Double.NaN;
- // Double("[+-]Inf") doesn't work however;
- // (the constructor appears to be expecting it
- // to be spelled as "Infinity", "-Infinity", etc.
- if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) {
- retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY;
- } else if ("-inf".equalsIgnoreCase(token)) {
- retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY;
- } else if (token == null || token.equals("")) {
- // missing value:
- retVector[caseindex] = null;
- } else {
- retVector[caseindex] = new Double(token);
- }
- } catch (NumberFormatException ex) {
- dbgLog.warning("NumberFormatException thrown for " + token + " as Double");
-
- retVector[caseindex] = null; // missing value
- // TODO: ?
- }
- } else if (isLong) {
- try {
- retVector[caseindex] = new Long(token);
- } catch (NumberFormatException ex) {
- retVector[caseindex] = null; // assume missing value
- }
- } else if (isFloat) {
- try {
- if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) {
- retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY;
- } else if ("-inf".equalsIgnoreCase(token)) {
- retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY;
- } else if (token == null || token.equals("")) {
- // missing value:
- retVector[caseindex] = null;
- } else {
- retVector[caseindex] = new Float(token);
- }
- } catch (NumberFormatException ex) {
- dbgLog.warning("NumberFormatException thrown for " + token + " as Float");
- retVector[caseindex] = null; // assume missing value (TODO: ?)
- }
- }
- caseindex++;
-
- if (bytecount == bytesRead - 1) {
- byteoffset = 0;
- } else {
- byteoffset = bytecount + 1;
- }
- } else {
- if (bytecount == bytesRead - 1) {
- // We've reached the end of the buffer;
- // This means we'll save whatever unused bytes left in
- // it - i.e., the bytes between the last new line
- // encountered and the end - in the leftover buffer.
-
- // *EXCEPT*, there may be a case of a very long String
- // that is actually longer than MAX_COLUMN_BUFFER, in
- // which case it is possible that we've read through
- // an entire buffer of bytes without finding any
- // new lines... in this case we may need to add this
- // entire byte buffer to an already existing leftover
- // buffer!
- if (leftover == null) {
- leftover = new byte[(int) bytesRead - byteoffset];
- System.arraycopy(columnBytes, byteoffset, leftover, 0, (int) bytesRead - byteoffset);
- } else {
- if (byteoffset != 0) {
- throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!");
- }
- byte[] merged = new byte[leftover.length + (int) bytesRead];
-
- System.arraycopy(leftover, 0, merged, 0, leftover.length);
- System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int) bytesRead);
- // leftover = null;
- leftover = merged;
- merged = null;
- }
- byteoffset = 0;
-
- }
- }
- bytecount++;
- }
-
- bytesReadTotal += bytesRead;
- in.clear();
- if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) {
- in.limit((int) (columnLength - bytesReadTotal));
- }
- }
-
- }
-
- if (caseindex != casecount) {
- throw new IOException("Faile to read "+casecount+" tokens for column "+column);
- //System.out.println("read "+caseindex+" tokens instead of expected "+casecount+".");
- }
-
- return retVector;
- }
-
- private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException {
- long[] byteOffsets = new long[varcount];
-
- try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile))) {
-
- byte[] offsetHeader = new byte[varcount * 8];
-
- int readlen = rotfileStream.read(offsetHeader);
-
- if (readlen != varcount * 8) {
- throw new IOException("Could not read " + varcount * 8 + " header bytes from the rotated file.");
- }
-
- for (int varindex = 0; varindex < varcount; varindex++) {
- byte[] offsetBytes = new byte[8];
- System.arraycopy(offsetHeader, varindex * 8, offsetBytes, 0, 8);
-
- ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes);
- byteOffsets[varindex] = offsetByteBuffer.getLong();
-
- // System.out.println(byteOffsets[varindex]);
- }
-
- }
-
- return byteOffsets;
- }
-
- private File getRotatedImage(File tabfile, int varcount, int casecount) throws IOException {
- String fileName = tabfile.getAbsolutePath();
- String rotatedImageFileName = fileName + ".90d";
- File rotatedImageFile = new File(rotatedImageFileName);
- if (rotatedImageFile.exists()) {
- //System.out.println("Image already exists!");
- return rotatedImageFile;
- }
-
- return generateRotatedImage(tabfile, varcount, casecount);
-
- }
-
- private File generateRotatedImage (File tabfile, int varcount, int casecount) throws IOException {
- // TODO: throw exceptions if bad file, zero varcount, etc. ...
-
- String fileName = tabfile.getAbsolutePath();
- String rotatedImageFileName = fileName + ".90d";
-
- int MAX_OUTPUT_STREAMS = 32;
- int MAX_BUFFERED_BYTES = 10 * 1024 * 1024; // 10 MB - for now?
- int MAX_COLUMN_BUFFER = 8 * 1024;
-
- // offsetHeader will contain the byte offsets of the individual column
- // vectors in the final rotated image file
- byte[] offsetHeader = new byte[varcount * 8];
- int[] bufferedSizes = new int[varcount];
- long[] cachedfileSizes = new long[varcount];
- File[] columnTempFiles = new File[varcount];
-
- for (int i = 0; i < varcount; i++) {
- bufferedSizes[i] = 0;
- cachedfileSizes[i] = 0;
- }
-
- // TODO: adjust MAX_COLUMN_BUFFER here, so that the total size is
- // no more than MAX_BUFFERED_BYTES (but no less than 1024 maybe?)
-
- byte[][] bufferedColumns = new byte [varcount][MAX_COLUMN_BUFFER];
-
- // read the tab-delimited file:
-
- try (FileInputStream tabfileStream = new FileInputStream(tabfile);
- Scanner scanner = new Scanner(tabfileStream)) {
- scanner.useDelimiter("\\n");
-
- for (int caseindex = 0; caseindex < casecount; caseindex++) {
- if (scanner.hasNext()) {
- String[] line = (scanner.next()).split("\t", -1);
- // TODO: throw an exception if there are fewer tab-delimited
- // tokens than the number of variables specified.
- String token = "";
- int tokensize = 0;
- for (int varindex = 0; varindex < varcount; varindex++) {
- // TODO: figure out the safest way to convert strings to
- // bytes here. Is it going to be safer to use getBytes("UTF8")?
- // we are already making the assumption that the values
- // in the tab file are in UTF8. -- L.A.
- token = line[varindex] + "\n";
- tokensize = token.getBytes().length;
- if (bufferedSizes[varindex] + tokensize > MAX_COLUMN_BUFFER) {
- // fill the buffer and dump its contents into the temp file:
- // (do note that there may be *several* MAX_COLUMN_BUFFERs
- // worth of bytes in the token!)
-
- int tokenoffset = 0;
-
- if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) {
- tokenoffset = MAX_COLUMN_BUFFER - bufferedSizes[varindex];
- System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset);
- } // (otherwise the buffer is already full, and we should
- // simply dump it into the temp file, without adding any
- // extra bytes to it)
-
- File bufferTempFile = columnTempFiles[varindex];
- if (bufferTempFile == null) {
- bufferTempFile = File.createTempFile("columnBufferFile", "bytes");
- columnTempFiles[varindex] = bufferTempFile;
- }
-
- // *append* the contents of the buffer to the end of the
- // temp file, if already exists:
- try (BufferedOutputStream outputStream = new BufferedOutputStream(
- new FileOutputStream(bufferTempFile, true))) {
- outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER);
- cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;
-
- // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into
- // the temp file, for as long as there's more than MAX_COLUMN_BUFFER
- // bytes left in the token:
-
- while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) {
- outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER);
- cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;
- tokenoffset += MAX_COLUMN_BUFFER;
- }
-
- }
-
- // buffer the remaining bytes and reset the buffered
- // byte counter:
-
- System.arraycopy(token.getBytes(),
- tokenoffset,
- bufferedColumns[varindex],
- 0,
- tokensize - tokenoffset);
-
- bufferedSizes[varindex] = tokensize - tokenoffset;
-
- } else {
- // continue buffering
- System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize);
- bufferedSizes[varindex] += tokensize;
- }
- }
- } else {
- throw new IOException("Tab file has fewer rows than the stored number of cases!");
- }
- }
- }
-
- // OK, we've created the individual byte vectors of the tab file columns;
- // they may be partially saved in temp files and/or in memory.
- // We now need to go through all these buffers and create the final
- // rotated image file.
-
- try (BufferedOutputStream finalOut = new BufferedOutputStream(
- new FileOutputStream(new File(rotatedImageFileName)))) {
-
- // but first we should create the offset header and write it out into
- // the final file; because it should be at the head, doh!
-
- long columnOffset = varcount * 8;
- // (this is the offset of the first column vector; it is equal to the
- // size of the offset header, i.e. varcount * 8 bytes)
-
- for (int varindex = 0; varindex < varcount; varindex++) {
- long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex];
- columnOffset += totalColumnBytes;
- // totalColumnBytes;
- byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array();
- System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8);
- }
-
- finalOut.write(offsetHeader, 0, varcount * 8);
-
- for (int varindex = 0; varindex < varcount; varindex++) {
- long cachedBytesRead = 0;
-
- // check if there is a cached temp file:
-
- File cachedTempFile = columnTempFiles[varindex];
- if (cachedTempFile != null) {
- byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER];
- try (BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile))) {
- int readlen = 0;
- while ((readlen = cachedIn.read(cachedBytes)) > -1) {
- finalOut.write(cachedBytes, 0, readlen);
- cachedBytesRead += readlen;
- }
- }
-
- // delete the temp file:
- cachedTempFile.delete();
-
- }
-
- if (cachedBytesRead != cachedfileSizes[varindex]) {
- throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+
- cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read.");
- }
-
- // then check if there are any bytes buffered for this column:
-
- if (bufferedSizes[varindex] > 0) {
- finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]);
- }
-
- }
- }
-
- return new File(rotatedImageFileName);
-
- }
-
- /*
- * Test method for taking a "rotated" image, and reversing it, reassembling
- * all the columns in the original order. Which should result in a file
- * byte-for-byte identical file to the original tab-delimited version.
- *
- * (do note that this method is not efficiently implemented; it's only
- * being used for experiments so far, to confirm the accuracy of the
- * accuracy of generateRotatedImage(). It should not be used for any
- * practical means in the application!)
- */
- private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException {
- // open the file, read in the offset header:
- try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile))) {
- byte[] offsetHeader = new byte[varcount * 8];
- long[] byteOffsets = new long[varcount];
-
- int readlen = rotfileStream.read(offsetHeader);
-
- if (readlen != varcount * 8) {
- throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file.");
- }
-
- for (int varindex = 0; varindex < varcount; varindex++) {
- byte[] offsetBytes = new byte[8];
- System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8);
-
- ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes);
- byteOffsets[varindex] = offsetByteBuffer.getLong();
-
- //System.out.println(byteOffsets[varindex]);
- }
-
- String [][] reversedMatrix = new String[casecount][varcount];
-
- long offset = varcount * 8;
- byte[] columnBytes;
-
- for (int varindex = 0; varindex < varcount; varindex++) {
- long columnLength = byteOffsets[varindex] - offset;
-
-
-
- columnBytes = new byte[(int)columnLength];
- readlen = rotfileStream.read(columnBytes);
-
- if (readlen != columnLength) {
- throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex);
- }
- /*
- String columnString = new String(columnBytes);
- //System.out.print(columnString);
- String[] values = columnString.split("\n", -1);
-
- if (values.length < casecount) {
- throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex);
- }
-
- for (int caseindex = 0; caseindex < casecount; caseindex++) {
- reversedMatrix[caseindex][varindex] = values[caseindex];
- }*/
-
- int bytecount = 0;
- int byteoffset = 0;
- int caseindex = 0;
- //System.out.println("generating value vector for column "+varindex);
- while (bytecount < columnLength) {
- if (columnBytes[bytecount] == '\n') {
- String token = new String(columnBytes, byteoffset, bytecount-byteoffset);
- reversedMatrix[caseindex++][varindex] = token;
- byteoffset = bytecount + 1;
- }
- bytecount++;
- }
-
- if (caseindex != casecount) {
- throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex);
- }
- offset = byteOffsets[varindex];
- }
-
- for (int caseindex = 0; caseindex < casecount; caseindex++) {
- for (int varindex = 0; varindex < varcount; varindex++) {
- System.out.print(reversedMatrix[caseindex][varindex]);
- if (varindex < varcount-1) {
- System.out.print("\t");
- } else {
- System.out.print("\n");
- }
- }
- }
-
- }
-
-
- }
-
- /**
- * main() method, for testing
- * usage: java edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator testfile.tab varcount casecount column type
- * make sure the CLASSPATH contains ...
- *
- */
-
- public static void main(String[] args) {
-
- String tabFileName = args[0];
- int varcount = new Integer(args[1]).intValue();
- int casecount = new Integer(args[2]).intValue();
- int column = new Integer(args[3]).intValue();
- String type = args[4];
-
- File tabFile = new File(tabFileName);
- File rotatedImageFile = null;
-
- TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator();
-
- /*
- try {
- rotatedImageFile = subsetGenerator.getRotatedImage(tabFile, varcount, casecount);
- } catch (IOException ex) {
- System.out.println(ex.getMessage());
- }
- */
-
- //System.out.println("\nFinished generating \"rotated\" column image file.");
-
- //System.out.println("\nOffsets:");
-
- MathContext doubleMathContext = new MathContext(15, RoundingMode.HALF_EVEN);
- String FORMAT_IEEE754 = "%+#.15e";
-
- try {
- //subsetGenerator.reverseRotatedImage(rotatedImageFile, varcount, casecount);
- //String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount);
- if ("string".equals(type)) {
- String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount);
- for (int i = 0; i < casecount; i++) {
- System.out.println(columns[i]);
- }
- } else {
-
- Double[] columns = subsetGenerator.subsetDoubleVector(tabFile, column, varcount, casecount);
- for (int i = 0; i < casecount; i++) {
- if (columns[i] != null) {
- BigDecimal outBigDecimal = new BigDecimal(columns[i], doubleMathContext);
- System.out.println(String.format(FORMAT_IEEE754, outBigDecimal));
- } else {
- System.out.println("NA");
- }
- //System.out.println(columns[i]);
- }
- }
- } catch (IOException ex) {
- System.out.println(ex.getMessage());
- }
- }
-}
-
-
+ scanner.next();
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java
deleted file mode 100644
index 89e033353c1..00000000000
--- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package edu.harvard.iq.dataverse.dataaccess;
-
-import edu.harvard.iq.dataverse.DataFile;
-import edu.harvard.iq.dataverse.datavariable.DataVariable;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.logging.Logger;
-
-/**
- *
- * @author Leonid Andreev
- */
-public class TabularSubsetInputStream extends InputStream {
- private static final Logger logger = Logger.getLogger(TabularSubsetInputStream.class.getCanonicalName());
-
- private TabularSubsetGenerator subsetGenerator = null;
- private int numberOfSubsetVariables;
- private int numberOfObservations;
- private int numberOfObservationsRead = 0;
- private byte[] leftoverBytes = null;
-
- public TabularSubsetInputStream(DataFile datafile, List variables) throws IOException {
- if (datafile == null) {
- throw new IOException("Null datafile in subset request");
- }
- if (!datafile.isTabularData()) {
- throw new IOException("Subset requested on a non-tabular data file");
- }
- numberOfObservations = datafile.getDataTable().getCaseQuantity().intValue();
-
- if (variables == null || variables.size() < 1) {
- throw new IOException("Null or empty list of variables in subset request.");
- }
- numberOfSubsetVariables = variables.size();
- subsetGenerator = new TabularSubsetGenerator(datafile, variables);
-
- }
-
- //@Override
- public int read() throws IOException {
- throw new IOException("read() method not implemented; do not use.");
- }
-
- //@Override
- public int read(byte[] b) throws IOException {
- // TODO:
- // Move this code into TabularSubsetGenerator
- logger.fine("subset input stream: read request, on a "+b.length+" byte buffer;");
-
- if (numberOfSubsetVariables == 1) {
- logger.fine("calling the single variable subset read method");
- return subsetGenerator.readSingleColumnSubset(b);
- }
-
- int bytesread = 0;
- byte [] linebuffer;
-
- // do we have a leftover?
- if (leftoverBytes != null) {
- if (leftoverBytes.length < b.length) {
- System.arraycopy(leftoverBytes, 0, b, 0, leftoverBytes.length);
- bytesread = leftoverBytes.length;
- leftoverBytes = null;
-
- } else {
- // shouldn't really happen... unless it's a very large subset,
- // or a very long string, etc.
- System.arraycopy(leftoverBytes, 0, b, 0, b.length);
- byte[] tmp = new byte[leftoverBytes.length - b.length];
- System.arraycopy(leftoverBytes, b.length, tmp, 0, leftoverBytes.length - b.length);
- leftoverBytes = tmp;
- tmp = null;
- return b.length;
- }
- }
-
- while (bytesread < b.length && numberOfObservationsRead < numberOfObservations) {
- linebuffer = subsetGenerator.readSubsetLineBytes();
- numberOfObservationsRead++;
-
- if (bytesread + linebuffer.length < b.length) {
- // copy linebuffer into the return buffer:
- System.arraycopy(linebuffer, 0, b, bytesread, linebuffer.length);
- bytesread += linebuffer.length;
- } else {
- System.arraycopy(linebuffer, 0, b, bytesread, b.length - bytesread);
- // save the leftover;
- if (bytesread + linebuffer.length > b.length) {
- leftoverBytes = new byte[bytesread + linebuffer.length - b.length];
- System.arraycopy(linebuffer, b.length - bytesread, leftoverBytes, 0, bytesread + linebuffer.length - b.length);
- }
- return b.length;
- }
- }
-
- // and this means we've reached the end of the tab file!
-
- return bytesread > 0 ? bytesread : -1;
- }
-
- //@Override
- public void close() {
- if (subsetGenerator != null) {
- subsetGenerator.close();
- }
- }
-}
diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
index 096f1f87acc..03a0044a987 100644
--- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
+++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java
@@ -411,6 +411,69 @@ public static InputStream getThumbnailAsInputStream(Dataset dataset, int size) {
return nonDefaultDatasetThumbnail;
}
}
+
+ public static InputStream getLogoAsInputStream(Dataset dataset) {
+ if (dataset == null) {
+ return null;
+ }
+ StorageIO dataAccess = null;
+
+ try {
+ dataAccess = DataAccess.getStorageIO(dataset);
+ } catch (IOException ioex) {
+ logger.warning("getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier()
+ + " (" + ioex.getMessage() + ")");
+ }
+
+ InputStream in = null;
+ try {
+ if (dataAccess == null) {
+ logger.warning(
+ "getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier());
+ } else {
+ in = dataAccess.getAuxFileAsInputStream(datasetLogoFilenameFinal);
+ }
+ } catch (IOException ex) {
+ logger.fine(
+ "Dataset-level thumbnail file does not exist, or failed to open; will try to find an image file that can be used as the thumbnail.");
+ }
+
+ if (in == null) {
+ DataFile thumbnailFile = dataset.getThumbnailFile();
+
+ if (thumbnailFile == null) {
+ if (dataset.isUseGenericThumbnail()) {
+ logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo and is 'Use Generic'.");
+ return null;
+ } else {
+ thumbnailFile = attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null);
+ if (thumbnailFile == null) {
+ logger.fine("Dataset (id :" + dataset.getId()
+ + ") does not have a logo available that could be selected automatically.");
+ return null;
+ } else {
+
+ }
+ }
+ }
+ if (thumbnailFile.isRestricted()) {
+ logger.fine("Dataset (id :" + dataset.getId()
+ + ") has a logo the user selected but the file must have later been restricted. Returning null.");
+ return null;
+ }
+
+ try {
+ in = ImageThumbConverter.getImageThumbnailAsInputStream(thumbnailFile.getStorageIO(),
+ ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE).getInputStream();
+ } catch (IOException ioex) {
+ logger.warning("getLogo(): Failed to get logo from DataFile for " + dataset.getStorageIdentifier()
+ + " (" + ioex.getMessage() + ")");
+ ioex.printStackTrace();
+ }
+
+ }
+ return in;
+ }
/**
* The dataset logo is the file that a user uploads which is *not* one of
diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
index d44388f39f7..0143fced87c 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java
@@ -61,6 +61,7 @@
import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
import org.apache.commons.io.IOUtils;
import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDataFilesCommand;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST;
/**
@@ -1212,9 +1213,9 @@ private boolean step_030_createNewFilesViaIngest(){
this.newCheckSumType,
this.systemConfig);*/
- DataFileServiceBean.UserStorageQuota quota = null;
+ UploadSessionQuotaLimit quota = null;
if (systemConfig.isStorageQuotasEnforced()) {
- quota = fileService.getUserStorageQuota(dvRequest.getAuthenticatedUser(), dataset);
+ quota = fileService.getUploadSessionQuotaLimit(dataset);
}
Command cmd = new CreateNewDataFilesCommand(dvRequest, workingVersion, newFileInputStream, newFileName, newFileContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType);
CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd);
diff --git a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java
index 29e821c28a4..147c2c004db 100644
--- a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java
+++ b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java
@@ -71,6 +71,7 @@ public class VariableMetadata implements Serializable {
/**
* universe: metadata variable field.
*/
+ @Column(columnDefinition="TEXT")
private String universe;
/**
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java
index 55a375acb6c..f74c1222bb0 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java
@@ -39,6 +39,7 @@
import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean;
import java.util.Stack;
@@ -126,6 +127,8 @@ public interface CommandContext {
public UserNotificationServiceBean notifications();
public AuthenticationServiceBean authentication();
+
+ public StorageUseServiceBean storageUse();
public SystemConfig systemConfig();
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
index 0470f59b861..3a21345448b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java
@@ -13,8 +13,8 @@
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException;
import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper;
-import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota;
import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit;
import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.FileUtil;
@@ -74,7 +74,7 @@ public class CreateNewDataFilesCommand extends AbstractCommand fileSizeLimit) {
try {
tempFile.toFile().delete();
@@ -213,11 +216,11 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
}
DataFile datafile = null;
- long fileSize = 0L;
+ long uncompressedFileSize = -1;
try {
uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile()));
File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit, storageQuotaLimit);
- fileSize = unZippedTempFile.length();
+ uncompressedFileSize = unZippedTempFile.length();
datafile = FileUtil.createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, ctxt.systemConfig().getFileFixityChecksumAlgorithm());
} catch (IOException | FileExceedsMaxSizeException | FileExceedsStorageQuotaException ioex) {
// it looks like we simply skip the file silently, if its uncompressed size
@@ -248,7 +251,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
datafiles.add(datafile);
// Update quota if present
if (quota != null) {
- quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize);
+ quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + uncompressedFileSize);
}
return CreateDataFileResult.success(fileName, finalType, datafiles);
}
@@ -628,7 +631,35 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
throw new CommandExecutionException("Failed to process uploaded BagIt file", ioex, this);
}
}
+
+ // These are the final File and its size that will be used to
+ // add create a single Datafile:
+
+ newFile = tempFile.toFile();
+ fileSize = newFile.length();
+
} else {
+ // Direct upload.
+
+ // Since this is a direct upload, and therefore no temp file associated
+ // with it, we may, OR MAY NOT know the size of the file. If this is
+ // a direct upload via the UI, the page must have already looked up
+ // the size, after the client confirmed that the upload had completed.
+ // (so that we can reject the upload here, i.e. before the user clicks
+ // save, if it's over the size limit or storage quota). However, if
+ // this is a direct upload via the API, we will wait until the
+ // upload is finalized in the saveAndAddFiles method to enforce the
+ // limits.
+ if (newFileSize != null) {
+ fileSize = newFileSize;
+
+ // if the size is specified, and it's above the individual size
+ // limit for this store, we can reject it now:
+ if (fileSizeLimit != null && fileSize > fileSizeLimit) {
+ throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit)), this);
+ }
+ }
+
// Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied
finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType;
String type = determineFileTypeByNameAndExtension(fileName);
@@ -639,34 +670,19 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
}
logger.fine("Supplied type: " + suppliedContentType + ", finalType: " + finalType);
}
+
+
}
+
// Finally, if none of the special cases above were applicable (or
// if we were unable to unpack an uploaded file, etc.), we'll just
// create and return a single DataFile:
- File newFile = null;
- long fileSize = -1;
- if (tempFile != null) {
- newFile = tempFile.toFile();
- fileSize = newFile.length();
- } else {
- // If this is a direct upload, and therefore no temp file associated
- // with it, the file size must be explicitly passed to the command
- // (note that direct upload relies on knowing the size of the file
- // that's being uploaded in advance).
- if (newFileSize != null) {
- fileSize = newFileSize;
- } else {
- // This is a direct upload via the API (DVUploader, etc.)
- //throw new CommandExecutionException("File size must be explicitly specified when creating DataFiles with Direct Upload", this);
- }
- }
// We have already checked that this file does not exceed the individual size limit;
// but if we are processing it as is, as a single file, we need to check if
// its size does not go beyond the allocated storage quota (if specified):
-
if (storageQuotaLimit != null && fileSize > storageQuotaLimit) {
if (newFile != null) {
// Remove the temp. file, if this is a non-direct upload.
@@ -685,7 +701,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
DataFile datafile = FileUtil.createSingleDataFile(version, newFile, newStorageIdentifier, fileName, finalType, newCheckSumType, newCheckSum);
- if (datafile != null && ((newFile != null) || (newStorageIdentifier != null))) {
+ if (datafile != null) {
if (warningMessage != null) {
createIngestFailureReport(datafile, warningMessage);
@@ -696,10 +712,19 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException
}
datafiles.add(datafile);
- // Update quota (may not be necessary in the context of direct upload - ?)
+ // Update the quota definition for the *current upload session*
+ // This is relevant for the uploads going through the UI page
+ // (where there may be an appreciable amount of time between the user
+ // uploading the files and clicking "save". The file size should be
+ // available here for both direct and local uploads via the UI.
+ // It is not yet available if this is direct-via-API - but
+ // for API uploads the quota check will be enforced during the final
+ // save.
if (fileSize > 0 && quota != null) {
+ logger.info("Setting total usage in bytes to " + (quota.getTotalUsageInBytes() + fileSize));
quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize);
}
+
return CreateDataFileResult.success(fileName, finalType, datafiles);
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
new file mode 100644
index 00000000000..c0f863686da
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java
@@ -0,0 +1,53 @@
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
+import edu.harvard.iq.dataverse.storageuse.StorageQuota;
+import edu.harvard.iq.dataverse.util.BundleUtil;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author landreev
+ *
+ * A superuser-only command:
+ */
+@RequiredPermissions({})
+public class DeleteCollectionQuotaCommand extends AbstractVoidCommand {
+
+ private static final Logger logger = Logger.getLogger(DeleteCollectionQuotaCommand.class.getCanonicalName());
+
+ private final Dataverse targetDataverse;
+
+ public DeleteCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) {
+ super(aRequest, target);
+ targetDataverse = target;
+ }
+
+ @Override
+ public void executeImpl(CommandContext ctxt) throws CommandException {
+ // first check if user is a superuser
+ if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) {
+ throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"),
+ this, null, targetDataverse);
+ }
+
+ if (targetDataverse == null) {
+ throw new IllegalCommandException("", this);
+ }
+
+ StorageQuota storageQuota = targetDataverse.getStorageQuota();
+
+ if (storageQuota != null && storageQuota.getAllocation() != null) {
+ ctxt.dataverses().disableStorageQuota(storageQuota);
+ }
+ // ... and if no quota was enabled on the collection - nothing to do = success
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java
index 83d0f877d61..e2730ec06d3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java
@@ -235,6 +235,20 @@ public String describe() {
@Override
public boolean onSuccess(CommandContext ctxt, Object r) {
+ // Adjust the storage use for the parent containers:
+ if (!doomed.isHarvested()) {
+ long storedSize = doomed.getFilesize();
+ // ingested tabular data files also have saved originals that
+ // are counted as "storage use"
+ Long savedOriginalSize = doomed.getOriginalFileSize();
+ if (savedOriginalSize != null) {
+ // Note that DataFile.getFilesize() can return -1 (for "unknown"):
+ storedSize = storedSize > 0 ? storedSize + savedOriginalSize : savedOriginalSize;
+ }
+ if (storedSize > 0) {
+ ctxt.storageUse().incrementStorageSizeRecursively(doomed.getOwner().getId(), (0L - storedSize));
+ }
+ }
/**
* We *could* re-index the entire dataset but it's more efficient to
* target individual files for deletion, which should always be drafts.
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
index 3da087addd9..89cfc732455 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java
@@ -32,15 +32,13 @@
import java.util.logging.Logger;
import edu.harvard.iq.dataverse.GlobalIdServiceBean;
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
+import edu.harvard.iq.dataverse.dataaccess.StorageIO;
import edu.harvard.iq.dataverse.engine.command.Command;
import edu.harvard.iq.dataverse.util.FileUtil;
import java.util.ArrayList;
import java.util.concurrent.Future;
import org.apache.solr.client.solrj.SolrServerException;
-import jakarta.ejb.EJB;
-import jakarta.inject.Inject;
-
/**
*
@@ -350,7 +348,8 @@ private void validateDataFiles(Dataset dataset, CommandContext ctxt) throws Comm
// (the decision was made to validate all the files on every
// major release; we can revisit the decision if there's any
// indication that this makes publishing take significantly longer.
- if (maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) {
+ String driverId = FileUtil.getStorageDriver(dataFile);
+ if(StorageIO.isDataverseAccessible(driverId) && maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) {
FileUtil.validateDataFileChecksum(dataFile);
}
else {
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java
new file mode 100644
index 00000000000..49f14e7c280
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java
@@ -0,0 +1,51 @@
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author landreev
+ * The command doesn't do much. It's sole purpose is to check the permissions
+ * when it's called by the /api/dataverses/.../storage/quota api.
+ */
+// @RequiredPermissions - none defined, dynamic
+public class GetCollectionQuotaCommand extends AbstractCommand {
+
+ private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName());
+
+ private final Dataverse dataverse;
+
+ public GetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) {
+ super(aRequest, target);
+ dataverse = target;
+ }
+
+ @Override
+ public Long execute(CommandContext ctxt) throws CommandException {
+
+ if (dataverse != null && dataverse.getStorageQuota() != null) {
+ return dataverse.getStorageQuota().getAllocation();
+ }
+
+ return null;
+ }
+
+ @Override
+ public Map> getRequiredPermissions() {
+ return Collections.singletonMap("",
+ dataverse.isReleased() ? Collections.emptySet()
+ : Collections.singleton(Permission.ViewUnpublishedDataverse));
+ }
+}
+
+
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java
new file mode 100644
index 00000000000..c30a5a34a81
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java
@@ -0,0 +1,45 @@
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author landreev
+ */
+@RequiredPermissions(Permission.ManageDataversePermissions)
+// alternatively, we could make it dynamic - public for published collections
+// and Permission.ViewUnpublishedDataverse required otherwise (?)
+public class GetCollectionStorageUseCommand extends AbstractCommand {
+
+ private static final Logger logger = Logger.getLogger(GetCollectionStorageUseCommand.class.getCanonicalName());
+
+ private final Dataverse collection;
+
+ public GetCollectionStorageUseCommand(DataverseRequest aRequest, Dataverse target) {
+ super(aRequest, target);
+ collection = target;
+ }
+
+ @Override
+ public Long execute(CommandContext ctxt) throws CommandException {
+
+ if (collection == null) {
+ throw new CommandException("null collection passed to get storage use command", this);
+ }
+ return ctxt.storageUse().findStorageSizeByDvContainerId(collection.getId());
+ }
+
+ /*@Override
+ public Map> getRequiredPermissions() {
+ return Collections.singletonMap("",
+ dataverse.isReleased() ? Collections.emptySet()
+ : Collections.singleton(Permission.ViewUnpublishedDataverse));
+ }*/
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java
new file mode 100644
index 00000000000..2d5e1251614
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java
@@ -0,0 +1,38 @@
+
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+
+
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author stephenkraffmiller
+ */
+@RequiredPermissions(Permission.AddDataset)
+public class GetDatasetSchemaCommand extends AbstractCommand {
+
+ private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName());
+
+ private final Dataverse dataverse;
+
+ public GetDatasetSchemaCommand(DataverseRequest aRequest, Dataverse target) {
+ super(aRequest, target);
+ dataverse = target;
+ }
+
+ @Override
+ public String execute(CommandContext ctxt) throws CommandException {
+ return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias());
+ }
+
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java
index 1454a4b1fdd..7bcc851bde2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java
@@ -25,15 +25,17 @@
public class GetLatestAccessibleDatasetVersionCommand extends AbstractCommand {
private final Dataset ds;
private final boolean includeDeaccessioned;
+ private boolean checkPerms;
public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) {
- this(aRequest, anAffectedDataset, false);
+ this(aRequest, anAffectedDataset, false, false);
}
- public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned) {
+ public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPerms) {
super(aRequest, anAffectedDataset);
ds = anAffectedDataset;
this.includeDeaccessioned = includeDeaccessioned;
+ this.checkPerms = checkPerms;
}
@Override
@@ -41,6 +43,6 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException {
if (ds.getLatestVersion().isDraft() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.ViewUnpublishedDataset)) {
return ctxt.engine().submit(new GetDraftDatasetVersionCommand(getRequest(), ds));
}
- return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned));
+ return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned, checkPerms));
}
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
index 4e4252fd155..0afcbe2d0bb 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java
@@ -17,24 +17,49 @@
public class GetLatestPublishedDatasetVersionCommand extends AbstractCommand {
private final Dataset ds;
private final boolean includeDeaccessioned;
+ private final boolean checkPermsWhenDeaccessioned;
public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) {
- this(aRequest, anAffectedDataset, false);
+ this(aRequest, anAffectedDataset, false, false);
}
- public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned) {
+ public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) {
super(aRequest, anAffectedDataset);
ds = anAffectedDataset;
this.includeDeaccessioned = includeDeaccessioned;
+ this.checkPermsWhenDeaccessioned = checkPermsWhenDeaccessioned;
}
+ /*
+ * This command depending on the requested parameters will return:
+ *
+ * If the user requested to include a deaccessioned dataset with the files, the command will return the deaccessioned version if the user has permissions to view the files. Otherwise, it will return null.
+ * If the user requested to include a deaccessioned dataset but did not request the files, the command will return the deaccessioned version.
+ * If the user did not request to include a deaccessioned dataset, the command will return the latest published version.
+ *
+ */
@Override
public DatasetVersion execute(CommandContext ctxt) throws CommandException {
- for (DatasetVersion dsv : ds.getVersions()) {
- if (dsv.isReleased() || (includeDeaccessioned && dsv.isDeaccessioned() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset))) {
- return dsv;
+ DatasetVersion dsVersionResult = getReleaseOrDeaccessionedDatasetVersion();
+ if (dsVersionResult != null && userHasPermissionsOnDatasetVersion(dsVersionResult, checkPermsWhenDeaccessioned, ctxt, ds)) {
+ return dsVersionResult;
+ }
+ return null;
+ }
+
+ private DatasetVersion getReleaseOrDeaccessionedDatasetVersion() {
+ for (DatasetVersion dsVersion : ds.getVersions()) {
+ if (dsVersion.isReleased() || (includeDeaccessioned && dsVersion.isDeaccessioned())) {
+ return dsVersion;
}
}
return null;
}
+
+ private boolean userHasPermissionsOnDatasetVersion(DatasetVersion dsVersionResult, boolean checkPermsWhenDeaccessioned, CommandContext ctxt, Dataset ds) {
+ if (dsVersionResult.isDeaccessioned() && checkPermsWhenDeaccessioned) {
+ return ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset);
+ }
+ return true;
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java
index 2de2adff099..b98cd70a4da 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java
@@ -9,12 +9,12 @@
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.util.json.JsonUtil;
+
import java.io.IOException;
import java.io.InputStream;
import java.util.logging.Logger;
-import jakarta.json.Json;
import jakarta.json.JsonObject;
-import jakarta.json.JsonReader;
@RequiredPermissions(Permission.EditDataset)
public class GetProvJsonCommand extends AbstractCommand {
@@ -35,13 +35,13 @@ public JsonObject execute(CommandContext ctxt) throws CommandException {
try {
StorageIO dataAccess = dataFile.getStorageIO();
- InputStream inputStream = dataAccess.getAuxFileAsInputStream(provJsonExtension);
- JsonObject jsonObject = null;
- if(null != inputStream) {
- JsonReader jsonReader = Json.createReader(inputStream);
- jsonObject = jsonReader.readObject();
+ try (InputStream inputStream = dataAccess.getAuxFileAsInputStream(provJsonExtension)) {
+ JsonObject jsonObject = null;
+ if (null != inputStream) {
+ jsonObject = JsonUtil.getJsonObject(inputStream);
+ }
+ return jsonObject;
}
- return jsonObject;
} catch (IOException ex) {
String error = "Exception caught in DataAccess.getStorageIO(dataFile) getting file. Error: " + ex;
throw new IllegalCommandException(error, this);
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java
index a87eb8a99a5..07256f057e2 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java
@@ -25,23 +25,36 @@ public class GetSpecificPublishedDatasetVersionCommand extends AbstractCommand> {
private final DvObject definitionPoint;
@@ -34,5 +36,12 @@ public List execute(CommandContext ctxt) throws CommandException
}
return ctxt.permissions().assignmentsOn(definitionPoint);
}
+
+ @Override
+ public Map> getRequiredPermissions() {
+ return Collections.singletonMap("",
+ definitionPoint.isInstanceofDataset() ? Collections.singleton(Permission.ManageDatasetPermissions)
+ : Collections.singleton(Permission.ManageDataversePermissions));
+ }
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
new file mode 100644
index 00000000000..e52c47a5e7d
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCollectionQuotaCommand.java
@@ -0,0 +1,53 @@
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
+import edu.harvard.iq.dataverse.util.BundleUtil;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author landreev
+ *
+ * A superuser-only command:
+ */
+@RequiredPermissions({})
+public class SetCollectionQuotaCommand extends AbstractVoidCommand {
+
+ private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName());
+
+ private final Dataverse dataverse;
+ private final Long allocation;
+
+ public SetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target, Long allocation) {
+ super(aRequest, target);
+ dataverse = target;
+ this.allocation = allocation;
+ }
+
+ @Override
+ public void executeImpl(CommandContext ctxt) throws CommandException {
+ // Check if user is a superuser:
+ if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) {
+ throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"),
+ this, null, dataverse);
+ }
+
+ if (dataverse == null) {
+ throw new IllegalCommandException("Must specify valid collection", this);
+ }
+
+ if (allocation == null) {
+ throw new IllegalCommandException("Must specify valid allocation in bytes", this);
+ }
+
+ ctxt.dataverses().saveStorageQuota(dataverse, allocation);
+ }
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UningestFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UningestFileCommand.java
index f2b89746160..3e85630dd59 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UningestFileCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UningestFileCommand.java
@@ -105,6 +105,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException {
// all the attribute of the file that are stored in the database:
// the file size:
+ long archivalFileSize = uningest.getFilesize();
uningest.setFilesize(storedOriginalFileSize);
// original file format:
@@ -170,8 +171,20 @@ protected void executeImpl(CommandContext ctxt) throws CommandException {
logger.warning("Io Exception deleting all aux objects : " + uningest.getId());
}
+ // Finally, adjust the recorded storage use for the ancestral
+ // DvObjectContainers (the parent dataset + all the parent collections
+ // up to the root):
+ if (archivalFileSize > 0) {
+ ctxt.storageUse().incrementStorageSizeRecursively(uningest.getOwner().getId(), (0L - archivalFileSize));
+ }
+
}
+ @Override
+ public boolean onSuccess(CommandContext ctxt, Object r) {
+
+ return true;
+ }
private void resetIngestStats(DataFile uningest, CommandContext ctxt){
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
index 56c76f04c05..fe9415f39f9 100644
--- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java
@@ -32,6 +32,8 @@ public class UpdateDataverseCommand extends AbstractCommand {
private final List facetList;
private final List featuredDataverseList;
private final List inputLevelList;
+
+ private boolean datasetsReindexRequired = false;
public UpdateDataverseCommand(Dataverse editedDv, List facetList, List featuredDataverseList,
DataverseRequest aRequest, List inputLevelList ) {
@@ -74,9 +76,13 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
}
}
- DataverseType oldDvType = ctxt.dataverses().find(editedDv.getId()).getDataverseType();
- String oldDvAlias = ctxt.dataverses().find(editedDv.getId()).getAlias();
- String oldDvName = ctxt.dataverses().find(editedDv.getId()).getName();
+ Dataverse oldDv = ctxt.dataverses().find(editedDv.getId());
+
+ DataverseType oldDvType = oldDv.getDataverseType();
+ String oldDvAlias = oldDv.getAlias();
+ String oldDvName = oldDv.getName();
+ oldDv = null;
+
Dataverse result = ctxt.dataverses().save(editedDv);
if ( facetList != null ) {
@@ -101,6 +107,14 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
}
}
+ // We don't want to reindex the children datasets unnecessarily:
+ // When these values are changed we need to reindex all children datasets
+ // This check is not recursive as all the values just report the immediate parent
+ if (!oldDvType.equals(editedDv.getDataverseType())
+ || !oldDvName.equals(editedDv.getName())
+ || !oldDvAlias.equals(editedDv.getAlias())) {
+ datasetsReindexRequired = true;
+ }
return result;
}
@@ -110,9 +124,16 @@ public boolean onSuccess(CommandContext ctxt, Object r) {
// first kick of async index of datasets
// TODO: is this actually needed? Is there a better way to handle
+ // It appears that we at some point lost some extra logic here, where
+ // we only reindex the underlying datasets if one or more of the specific set
+ // of fields have been changed (since these values are included in the
+ // indexed solr documents for dataasets). So I'm putting that back. -L.A.
Dataverse result = (Dataverse) r;
- List datasets = ctxt.datasets().findByOwnerId(result.getId());
- ctxt.index().asyncIndexDatasetList(datasets, true);
+
+ if (datasetsReindexRequired) {
+ List datasets = ctxt.datasets().findByOwnerId(result.getId());
+ ctxt.index().asyncIndexDatasetList(datasets, true);
+ }
return ctxt.dataverses().index((Dataverse) r);
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
new file mode 100644
index 00000000000..619740ddd89
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java
@@ -0,0 +1,41 @@
+
+package edu.harvard.iq.dataverse.engine.command.impl;
+
+import edu.harvard.iq.dataverse.DataFile;
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.authorization.Permission;
+import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
+import edu.harvard.iq.dataverse.engine.command.CommandContext;
+import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
+import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
+import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
+import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
+
+
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author stephenkraffmiller
+ */
+@RequiredPermissions(Permission.AddDataset)
+public class ValidateDatasetJsonCommand extends AbstractCommand {
+
+ private static final Logger logger = Logger.getLogger(ValidateDatasetJsonCommand.class.getCanonicalName());
+
+ private final Dataverse dataverse;
+ private final String datasetJson;
+
+ public ValidateDatasetJsonCommand(DataverseRequest aRequest, Dataverse target, String datasetJsonIn) {
+ super(aRequest, target);
+ dataverse = target;
+ datasetJson = datasetJsonIn;
+ }
+
+ @Override
+ public String execute(CommandContext ctxt) throws CommandException {
+
+ return ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson);
+
+ }
+}
diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
index 5119b4b96c7..edd01ae98a3 100644
--- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java
@@ -545,6 +545,16 @@ private void createDataFileDDI(XMLStreamWriter xmlw, Set excludedFieldSe
List vars = variableService.findByDataTableId(dt.getId());
if (checkField("catgry", excludedFieldSet, includedFieldSet)) {
if (checkIsWithoutFrequencies(vars)) {
+ // @todo: the method called here to calculate frequencies
+ // when they are missing from the database (for whatever
+ // reasons) subsets the physical tab-delimited file and
+ // calculates them in real time. this is very expensive operation
+ // potentially. let's make sure that, when we do this, we
+ // save the resulting frequencies in the database, so that
+ // we don't have to do this again. Also, let's double check
+ // whether the "checkIsWithoutFrequencies()" method is doing
+ // the right thing - as it appears to return true when there
+ // are no categorical variables in the DataTable (?)
calculateFrequencies(df, vars);
}
}
@@ -580,6 +590,7 @@ private boolean checkIsWithoutFrequencies(List vars) {
private void calculateFrequencies(DataFile df, List vars)
{
+ // @todo: see the comment in the part of the code that calls this method
try {
DataConverter dc = new DataConverter();
File tabFile = dc.downloadFromStorageIO(df.getStorageIO());
diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
index cdde9fbe0e8..e7ae451cacf 100644
--- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
+++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java
@@ -22,12 +22,8 @@
import java.util.logging.Level;
import java.util.logging.Logger;
-import jakarta.json.Json;
-import jakarta.json.JsonArray;
-import jakarta.json.JsonArrayBuilder;
import jakarta.json.JsonNumber;
import jakarta.json.JsonObject;
-import jakarta.json.JsonObjectBuilder;
import jakarta.json.JsonString;
import jakarta.json.JsonValue;
import jakarta.ws.rs.HttpMethod;
@@ -43,15 +39,10 @@
*/
public class ExternalToolHandler extends URLTokenUtil {
- private final ExternalTool externalTool;
+ public final ExternalTool externalTool;
private String requestMethod;
-
- public static final String HTTP_METHOD="httpMethod";
- public static final String TIMEOUT="timeOut";
- public static final String SIGNED_URL="signedUrl";
- public static final String NAME="name";
- public static final String URL_TEMPLATE="urlTemplate";
+
/**
@@ -136,12 +127,12 @@ public String handleRequest(boolean preview) {
} else {
// ToDo - if the allowedApiCalls() are defined, could/should we send them to
- // tools using GET as well?
+ // tools using POST as well?
if (requestMethod.equals(HttpMethod.POST)) {
- String body = JsonUtil.prettyPrint(createPostBody(params).build());
+ String body = JsonUtil.prettyPrint(createPostBody(params, null).build());
try {
- logger.info("POST Body: " + body);
+ logger.fine("POST Body: " + body);
return postFormData(body);
} catch (IOException | InterruptedException ex) {
Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex);
@@ -151,60 +142,6 @@ public String handleRequest(boolean preview) {
return null;
}
- public JsonObject getParams(JsonObject toolParameters) {
- //ToDo - why an array of object each with a single key/value pair instead of one object?
- JsonArray queryParams = toolParameters.getJsonArray("queryParameters");
-
- // ToDo return json and print later
- JsonObjectBuilder paramsBuilder = Json.createObjectBuilder();
- if (!(queryParams == null) && !queryParams.isEmpty()) {
- queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> {
- queryParam.keySet().forEach((key) -> {
- String value = queryParam.getString(key);
- JsonValue param = getParam(value);
- if (param != null) {
- paramsBuilder.add(key, param);
- }
- });
- });
- }
- return paramsBuilder.build();
- }
-
- public JsonObjectBuilder createPostBody(JsonObject params) {
- JsonObjectBuilder bodyBuilder = Json.createObjectBuilder();
- bodyBuilder.add("queryParameters", params);
- String apiCallStr = externalTool.getAllowedApiCalls();
- if (apiCallStr != null && !apiCallStr.isBlank()) {
- JsonArray apiArray = JsonUtil.getJsonArray(externalTool.getAllowedApiCalls());
- JsonArrayBuilder apisBuilder = Json.createArrayBuilder();
- apiArray.getValuesAs(JsonObject.class).forEach(((apiObj) -> {
- logger.fine(JsonUtil.prettyPrint(apiObj));
- String name = apiObj.getJsonString(NAME).getString();
- String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString();
- int timeout = apiObj.getInt(TIMEOUT);
- String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString();
- logger.fine("URL Template: " + urlTemplate);
- urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate;
- String apiPath = replaceTokensWithValues(urlTemplate);
- logger.fine("URL WithTokens: " + apiPath);
- String url = apiPath;
- // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users)
- ApiToken apiToken = getApiToken();
- if (apiToken != null) {
- url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(),
- httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("")
- + getApiToken().getTokenString());
- }
- logger.fine("Signed URL: " + url);
- apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod)
- .add(SIGNED_URL, url).add(TIMEOUT, timeout));
- }));
- bodyBuilder.add("signedUrls", apisBuilder);
- }
- return bodyBuilder;
- }
-
private String postFormData(String allowedApis) throws IOException, InterruptedException {
String url = null;
HttpClient client = HttpClient.newHttpClient();
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java
index 877fc68e4a1..c93e2c6aa94 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java
@@ -46,7 +46,7 @@ String getRefreshToken() {
return refreshToken;
}
- ArrayList getOtherTokens() {
+ public ArrayList getOtherTokens() {
return otherTokens;
}
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java
new file mode 100644
index 00000000000..7e555935e2e
--- /dev/null
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java
@@ -0,0 +1,38 @@
+package edu.harvard.iq.dataverse.globus;
+
+public class GlobusEndpoint {
+
+ private String id;
+ private String clientToken;
+ private String basePath;
+
+ public GlobusEndpoint(String id, String clientToken, String basePath) {
+ this.id = id;
+ this.clientToken = clientToken;
+ this.basePath = basePath;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public String getClientToken() {
+ return clientToken;
+ }
+
+ public void setClientToken(String clientToken) {
+ this.clientToken = clientToken;
+ }
+
+ public String getBasePath() {
+ return basePath;
+ }
+
+ public void setBasePath(String basePath) {
+ this.basePath = basePath;
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
index d6943ec3511..3e60441850b 100644
--- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
+++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java
@@ -1,9 +1,11 @@
package edu.harvard.iq.dataverse.globus;
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.Scheduler;
import com.google.gson.FieldNamingPolicy;
import com.google.gson.GsonBuilder;
import edu.harvard.iq.dataverse.*;
-
import jakarta.ejb.Asynchronous;
import jakarta.ejb.EJB;
import jakarta.ejb.Stateless;
@@ -15,8 +17,13 @@
import jakarta.json.JsonArray;
import jakarta.json.JsonArrayBuilder;
import jakarta.json.JsonObject;
+import jakarta.json.JsonObjectBuilder;
import jakarta.json.JsonPatch;
+import jakarta.json.JsonString;
+import jakarta.json.JsonValue.ValueType;
+import jakarta.json.stream.JsonParsingException;
import jakarta.servlet.http.HttpServletRequest;
+import jakarta.ws.rs.HttpMethod;
import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json;
import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray;
@@ -29,6 +36,8 @@
import java.net.URLEncoder;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
@@ -40,17 +49,26 @@
import java.util.stream.Collectors;
import java.util.stream.IntStream;
+import org.apache.commons.codec.binary.StringUtils;
+import org.primefaces.PrimeFaces;
+
import com.google.gson.Gson;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.users.ApiToken;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
+import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
+import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
+import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
+import edu.harvard.iq.dataverse.settings.JvmSettings;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
import edu.harvard.iq.dataverse.util.URLTokenUtil;
+import edu.harvard.iq.dataverse.util.UrlSignerUtil;
import edu.harvard.iq.dataverse.util.json.JsonUtil;
@Stateless
@@ -59,197 +77,313 @@ public class GlobusServiceBean implements java.io.Serializable {
@EJB
protected DatasetServiceBean datasetSvc;
-
@EJB
protected SettingsServiceBean settingsSvc;
-
@Inject
DataverseSession session;
-
@EJB
protected AuthenticationServiceBean authSvc;
-
@EJB
EjbDataverseEngine commandEngine;
-
@EJB
UserNotificationServiceBean userNotificationService;
+ @EJB
+ PrivateUrlServiceBean privateUrlService;
+ @EJB
+ FileDownloadServiceBean fileDownloadService;
+ @EJB
+ DataFileServiceBean dataFileService;
private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName());
private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss");
- private String code;
- private String userTransferToken;
- private String state;
-
- public String getState() {
- return state;
- }
-
- public void setState(String state) {
- this.state = state;
- }
-
- public String getCode() {
- return code;
- }
-
- public void setCode(String code) {
- this.code = code;
- }
-
- public String getUserTransferToken() {
- return userTransferToken;
- }
+ private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions)
+ throws MalformedURLException {
- public void setUserTransferToken(String userTransferToken) {
- this.userTransferToken = userTransferToken;
- }
+ String principalType = "identity";
- ArrayList checkPermisions(AccessToken clientTokenUser, String directory, String globusEndpoint,
- String principalType, String principal) throws MalformedURLException {
- URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list");
- MakeRequestResponse result = makeRequest(url, "Bearer",
- clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null);
- ArrayList ids = new ArrayList();
+ URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list");
+ MakeRequestResponse result = makeRequest(url, "Bearer", endpoint.getClientToken(), "GET", null);
if (result.status == 200) {
AccessList al = parseJson(result.jsonResponse, AccessList.class, false);
for (int i = 0; i < al.getDATA().size(); i++) {
Permissions pr = al.getDATA().get(i);
- if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory))
+
+ if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath()))
&& pr.getPrincipalType().equals(principalType)
- && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) {
- ids.add(pr.getId());
+ && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))
+ && pr.getPermissions().equals(permissions)) {
+ return pr.getId();
} else {
- logger.info(pr.getPath() + " === " + directory + " == " + pr.getPrincipalType());
+ logger.fine(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType());
continue;
}
}
}
-
- return ids;
+ return null;
}
- public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm)
- throws MalformedURLException {
- if (directory != null && !directory.equals("")) {
- directory = directory + "/";
- }
- logger.info("Start updating permissions." + " Directory is " + directory);
- String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, "");
- ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, null);
- logger.info("Size of rules " + rules.size());
- int count = 0;
- while (count < rules.size()) {
- logger.info("Start removing rules " + rules.get(count));
- Permissions permissions = new Permissions();
- permissions.setDATA_TYPE("access");
- permissions.setPermissions(perm);
- permissions.setPath(directory);
-
- Gson gson = new GsonBuilder().create();
- URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/"
- + rules.get(count));
- logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/"
- + rules.get(count));
- MakeRequestResponse result = makeRequest(url, "Bearer",
- clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", gson.toJson(permissions));
- if (result.status != 200) {
- logger.warning("Cannot update access rule " + rules.get(count));
- } else {
- logger.info("Access rule " + rules.get(count) + " was updated");
- }
- count++;
- }
- }
-
- public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException {
-
+ /**
+ * Call to delete a globus rule related to the specified dataset.
+ *
+ * @param ruleId - Globus rule id - assumed to be associated with the
+ * dataset's file path (should not be called with a user
+ * specified rule id w/o further checking)
+ * @param datasetId - the id of the dataset associated with the rule
+ * @param globusLogger - a separate logger instance, may be null
+ */
+ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) {
+ globusLogger.fine("Start deleting rule " + ruleId + " for dataset " + dataset.getId());
if (ruleId.length() > 0) {
- AccessToken clientTokenUser = getClientToken();
- globusLogger.info("Start deleting permissions.");
- String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, "");
-
- URL url = new URL(
- "https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId);
- MakeRequestResponse result = makeRequest(url, "Bearer",
- clientTokenUser.getOtherTokens().get(0).getAccessToken(), "DELETE", null);
- if (result.status != 200) {
- globusLogger.warning("Cannot delete access rule " + ruleId);
- } else {
- globusLogger.info("Access rule " + ruleId + " was deleted successfully");
+ if (dataset != null) {
+ GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+ if (endpoint != null) {
+ String accessToken = endpoint.getClientToken();
+ globusLogger.info("Start deleting permissions.");
+ try {
+ URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId()
+ + "/access/" + ruleId);
+ MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "DELETE", null);
+ if (result.status != 200) {
+ globusLogger.warning("Cannot delete access rule " + ruleId);
+ } else {
+ globusLogger.info("Access rule " + ruleId + " was deleted successfully");
+ }
+ } catch (MalformedURLException ex) {
+ logger.log(Level.WARNING,
+ "Failed to delete access rule " + ruleId + " on endpoint " + endpoint.getId(), ex);
+ }
+ }
}
}
-
}
- public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser,
- String directory, String globusEndpoint) throws MalformedURLException {
+ /**
+ * Request read/write access for the specified principal and generate a list of
+ * accessible paths for new files for the specified dataset.
+ *
+ * @param principal - the id of the Globus principal doing the transfer
+ * @param dataset
+ * @param numberOfPaths - how many files are to be transferred
+ * @return
+ */
+ public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) {
- ArrayList> rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, principal);
+ GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+ String principalType = "identity";
Permissions permissions = new Permissions();
permissions.setDATA_TYPE("access");
permissions.setPrincipalType(principalType);
permissions.setPrincipal(principal);
- permissions.setPath(directory + "/");
- permissions.setPermissions(perm);
+ permissions.setPath(endpoint.getBasePath() + "/");
+ permissions.setPermissions("rw");
+
+ JsonObjectBuilder response = Json.createObjectBuilder();
+ //Try to create the directory (202 status) if it does not exist (502-already exists)
+ int mkDirStatus = makeDirs(endpoint, dataset);
+ if (!(mkDirStatus== 202 || mkDirStatus == 502)) {
+ return response.add("status", mkDirStatus).build();
+ }
+ //The dir for the dataset's data exists, so try to request permission for the principal
+ int requestPermStatus = requestPermission(endpoint, dataset, permissions);
+ response.add("status", requestPermStatus);
+ if (requestPermStatus == 201) {
+ String driverId = dataset.getEffectiveStorageDriverId();
+ JsonObjectBuilder paths = Json.createObjectBuilder();
+ for (int i = 0; i < numberOfPaths; i++) {
+ String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId);
+ int lastIndex = Math.max(storageIdentifier.lastIndexOf("/"), storageIdentifier.lastIndexOf(":"));
+ paths.add(storageIdentifier, endpoint.getBasePath() + "/" + storageIdentifier.substring(lastIndex + 1));
+ }
+ response.add("paths", paths.build());
+ }
+ return response.build();
+ }
+
+ /**
+ * Call to create the directories for the specified dataset.
+ *
+ * @param dataset
+ * @return - an error status at whichever subdir the process fails at or the
+ * final success status
+ */
+ private int makeDirs(GlobusEndpoint endpoint, Dataset dataset) {
+ logger.fine("Creating dirs: " + endpoint.getBasePath());
+ int index = endpoint.getBasePath().lastIndexOf(dataset.getAuthorityForFileStorage())
+ + dataset.getAuthorityForFileStorage().length();
+ String nextDir = endpoint.getBasePath().substring(0, index);
+ int response = makeDir(endpoint, nextDir);
+ String identifier = dataset.getIdentifierForFileStorage();
+ //Usually identifiers will have 0 or 1 slashes (e.g. FK2/ABCDEF) but the while loop will handle any that could have more
+ //Will skip if the first makeDir above failed
+ while ((identifier.length() > 0) && ((response == 202 || response == 502))) {
+ index = identifier.indexOf('/');
+ if (index == -1) {
+ //Last dir to create
+ response = makeDir(endpoint, nextDir + "/" + identifier);
+ identifier = "";
+ } else {
+ //The next dir to create
+ nextDir = nextDir + "/" + identifier.substring(0, index);
+ response = makeDir(endpoint, nextDir);
+ //The rest of the identifier
+ identifier = identifier.substring(index + 1);
+ }
+ }
+ return response;
+ }
+
+ private int makeDir(GlobusEndpoint endpoint, String dir) {
+ MakeRequestResponse result = null;
+ String body = "{\"DATA_TYPE\":\"mkdir\",\"path\":\"" + dir + "\"}";
+ try {
+ logger.fine(body);
+ URL url = new URL(
+ "https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint.getId() + "/mkdir");
+ result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", body);
+
+ switch (result.status) {
+ case 202:
+ logger.fine("Dir " + dir + " was created successfully.");
+ break;
+ case 502:
+ logger.fine("Dir " + dir + " already exists.");
+ break;
+ default:
+ logger.warning("Status " + result.status + " received when creating dir " + dir);
+ logger.fine("Response: " + result.jsonResponse);
+ }
+ } catch (MalformedURLException ex) {
+ // Misconfiguration
+ logger.warning("Failed to create dir on " + endpoint.getId());
+ return 500;
+ }
+ return result.status;
+ }
+
+ private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissions permissions) {
Gson gson = new GsonBuilder().create();
MakeRequestResponse result = null;
- if (rules.size() == 0) {
- logger.info("Start creating the rule");
- URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access");
- result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST",
- gson.toJson(permissions));
+ logger.fine("Start creating the rule");
- if (result.status == 400) {
+ try {
+ URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access");
+ result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions));
+
+ switch (result.status) {
+ case 404:
+ logger.severe("Endpoint " + endpoint.getId() + " was not found");
+ break;
+ case 400:
logger.severe("Path " + permissions.getPath() + " is not valid");
- } else if (result.status == 409) {
+ break;
+ case 409:
logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules");
+ break;
+ case 201:
+ JsonObject globusResponse = JsonUtil.getJsonObject(result.jsonResponse);
+ if (globusResponse != null && globusResponse.containsKey("access_id")) {
+ permissions.setId(globusResponse.getString("access_id"));
+ monitorTemporaryPermissions(permissions.getId(), dataset.getId());
+ logger.fine("Access rule " + permissions.getId() + " was created successfully");
+ } else {
+ // Shouldn't happen!
+ logger.warning("Access rule id not returned for dataset " + dataset.getId());
+ }
}
-
return result.status;
- } else {
- logger.info("Start Updating the rule");
- URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/"
- + rules.get(0));
- result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT",
- gson.toJson(permissions));
-
- if (result.status == 400) {
- logger.severe("Path " + permissions.getPath() + " is not valid");
- } else if (result.status == 409) {
- logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules");
- }
- logger.info("Result status " + result.status);
+ } catch (MalformedURLException ex) {
+ // Misconfiguration
+ logger.warning("Failed to create access rule URL for " + endpoint.getId());
+ return 500;
}
-
- return result.status;
}
- public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException {
-
- URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId
- + "/successful_transfers");
-
- MakeRequestResponse result = makeRequest(url, "Bearer",
- clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null);
+ /**
+ * Given an array of remote files to be referenced in the dataset, create a set
+ * of valid storage identifiers and return a map of the remote file paths to
+ * storage identifiers.
+ *
+ * @param dataset
+ * @param referencedFiles - a JSON array of remote files to be referenced in the
+ * dataset - each should be a string with the /path/to/file
+ * @return - a map of supplied paths to valid storage identifiers
+ */
+ public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) {
+ String driverId = dataset.getEffectiveStorageDriverId();
+ JsonArray endpoints = GlobusAccessibleStore.getReferenceEndpointsWithPaths(driverId);
+
+ JsonObjectBuilder fileMap = Json.createObjectBuilder();
+ referencedFiles.forEach(value -> {
+ if (value.getValueType() != ValueType.STRING) {
+ throw new JsonParsingException("ReferencedFiles must be strings", null);
+ }
+ String referencedFile = ((JsonString) value).getString();
+ boolean valid = false;
+ for (int i = 0; i < endpoints.size(); i++) {
+ if (referencedFile.startsWith(((JsonString) endpoints.get(i)).getString())) {
+ valid = true;
+ }
+ }
+ if (!valid) {
+ throw new IllegalArgumentException(
+ "Referenced file " + referencedFile + " is not in an allowed endpoint/path");
+ }
+ String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId);
+ fileMap.add(referencedFile, storageIdentifier + "//" + referencedFile);
+ });
+ return fileMap.build();
+ }
- if (result.status == 200) {
- logger.info(" SUCCESS ====== ");
- return true;
- }
- return false;
+ /**
+ * A cache of temporary permission requests - for upload (rw) and download (r)
+ * access. When a temporary permission request is created, it is added to the
+ * cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started,
+ * the permission will be revoked/deleted. (If a transfer has been started, the
+ * permission will not be revoked/deleted until the transfer is complete. This
+ * is handled in other methods.)
+ */
+ // ToDo - nominally this doesn't need to be as long as the allowed time for the
+ // downloadCache so there could be two separate settings.
+ // Single cache of open rules/permission requests
+ private final Cache rulesCache = Caffeine.newBuilder()
+ .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES))
+ .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> {
+ // Delete rules that expire
+ logger.fine("Rule " + ruleId + " expired");
+ Dataset dataset = datasetSvc.find(datasetId);
+ deletePermission((String) ruleId, dataset, logger);
+ })
+
+ .build();
+
+ // Convenience method to add a temporary permission request to the cache -
+ // allows logging of temporary permission requests
+ private void monitorTemporaryPermissions(String ruleId, long datasetId) {
+ logger.fine("Adding rule " + ruleId + " for dataset " + datasetId);
+ rulesCache.put(ruleId, datasetId);
}
- public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger globusLogger) throws MalformedURLException {
+ /**
+ * Call the Globus API to get info about the transfer.
+ *
+ * @param accessToken
+ * @param taskId - the Globus task id supplied by the user
+ * @param globusLogger - the transaction-specific logger to use (separate log
+ * files are created in general, some calls may use the
+ * class logger)
+ * @return
+ * @throws MalformedURLException
+ */
+ public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException {
URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId);
- MakeRequestResponse result = makeRequest(url, "Bearer",
- clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null);
+ MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "GET", null);
GlobusTask task = null;
@@ -264,49 +398,34 @@ public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger glo
return task;
}
- public AccessToken getClientToken() throws MalformedURLException {
- String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "");
- URL url = new URL(
- "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials");
-
- MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null);
+ /**
+ * Globus call to get an access token for the user using the long-term token we
+ * hold.
+ *
+ * @param globusBasicToken - the base64 encoded Globus Basic token comprised of
+ * the :
+ * @return - a valid Globus access token
+ */
+ public static AccessToken getClientToken(String globusBasicToken) {
+ URL url;
AccessToken clientTokenUser = null;
- if (result.status == 200) {
- clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true);
- }
- return clientTokenUser;
- }
-
- public AccessToken getAccessToken(HttpServletRequest origRequest, String globusBasicToken)
- throws UnsupportedEncodingException, MalformedURLException {
- String serverName = origRequest.getServerName();
- if (serverName.equals("localhost")) {
- logger.severe("Changing localhost to utoronto");
- serverName = "utl-192-123.library.utoronto.ca";
- }
-
- String redirectURL = "https://" + serverName + "/globus.xhtml";
-
- redirectURL = URLEncoder.encode(redirectURL, "UTF-8");
- URL url = new URL("https://auth.globus.org/v2/oauth2/token?code=" + code + "&redirect_uri=" + redirectURL
- + "&grant_type=authorization_code");
- logger.info(url.toString());
-
- MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null);
- AccessToken accessTokenUser = null;
+ try {
+ url = new URL(
+ "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials");
- if (result.status == 200) {
- logger.info("Access Token: \n" + result.toString());
- accessTokenUser = parseJson(result.jsonResponse, AccessToken.class, true);
- logger.info(accessTokenUser.getAccessToken());
+ MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null);
+ if (result.status == 200) {
+ clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true);
+ }
+ } catch (MalformedURLException e) {
+ // On a statically defined URL...
+ e.printStackTrace();
}
-
- return accessTokenUser;
-
+ return clientTokenUser;
}
- public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method,
+ private static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method,
String jsonString) {
String str = null;
HttpURLConnection connection = null;
@@ -314,8 +433,7 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode
try {
connection = (HttpURLConnection) url.openConnection();
// Basic
- // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9
- logger.info(authType + " " + authCode);
+ logger.fine("For URL: " + url.toString());
connection.setRequestProperty("Authorization", authType + " " + authCode);
// connection.setRequestProperty("Content-Type",
// "application/x-www-form-urlencoded");
@@ -323,32 +441,30 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode
if (jsonString != null) {
connection.setRequestProperty("Content-Type", "application/json");
connection.setRequestProperty("Accept", "application/json");
- logger.info(jsonString);
+ logger.fine(jsonString);
connection.setDoOutput(true);
+
OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream());
wr.write(jsonString);
wr.flush();
}
status = connection.getResponseCode();
- logger.info("Status now " + status);
+ logger.fine("Status now " + status);
InputStream result = connection.getInputStream();
if (result != null) {
- logger.info("Result is not null");
str = readResultJson(result).toString();
- logger.info("str is ");
- logger.info(result.toString());
+ logger.fine("str is " + result.toString());
} else {
- logger.info("Result is null");
+ logger.fine("Result is null");
str = null;
}
- logger.info("status: " + status);
+ logger.fine("status: " + status);
} catch (IOException ex) {
- logger.info("IO");
logger.severe(ex.getMessage());
- logger.info(ex.getCause().toString());
- logger.info(ex.getStackTrace().toString());
+ logger.fine(ex.getCause().toString());
+ logger.fine(ex.getStackTrace().toString());
} finally {
if (connection != null) {
connection.disconnect();
@@ -359,18 +475,16 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode
}
- private StringBuilder readResultJson(InputStream in) {
+ private static StringBuilder readResultJson(InputStream in) {
StringBuilder sb = null;
- try {
-
- BufferedReader br = new BufferedReader(new InputStreamReader(in));
+ try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) {
sb = new StringBuilder();
String line;
while ((line = br.readLine()) != null) {
sb.append(line + "\n");
}
br.close();
- logger.info(sb.toString());
+ logger.fine(sb.toString());
} catch (IOException e) {
sb = null;
logger.severe(e.getMessage());
@@ -378,7 +492,7 @@ private StringBuilder readResultJson(InputStream in) {
return sb;
}
- private T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) {
+ private static T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) {
if (sb != null) {
Gson gson = null;
if (namingPolicy) {
@@ -395,32 +509,7 @@ private T parseJson(String sb, Class jsonParserClass, boolean namingPolic
}
}
- public String getDirectory(String datasetId) {
- Dataset dataset = null;
- String directory = null;
- try {
- dataset = datasetSvc.find(Long.parseLong(datasetId));
- if (dataset == null) {
- logger.severe("Dataset not found " + datasetId);
- return null;
- }
- String storeId = dataset.getStorageIdentifier();
- storeId.substring(storeId.indexOf("//") + 1);
- directory = storeId.substring(storeId.indexOf("//") + 1);
- logger.info(storeId);
- logger.info(directory);
- logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage());
- return directory;
-
- } catch (NumberFormatException nfe) {
- logger.severe(nfe.getMessage());
-
- return null;
- }
-
- }
-
- class MakeRequestResponse {
+ static class MakeRequestResponse {
public String jsonResponse;
public int status;
@@ -431,81 +520,61 @@ class MakeRequestResponse {
}
- private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint)
- throws MalformedURLException {
- URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path="
- + directory + "/");
-
- MakeRequestResponse result = makeRequest(url, "Bearer",
- clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null);
- logger.info("find directory status:" + result.status);
-
- return result;
+ /**
+ * Cache of open download Requests This cache keeps track of the set of files
+ * selected for transfer out (download) via Globus. It is a means of
+ * transferring the list from the DatasetPage, where it is generated via user UI
+ * actions, and the Datasets/globusDownloadParameters API.
+ *
+ * Nominally, the dataverse-globus app will call that API endpoint and then
+ * /requestGlobusDownload, at which point the cached info is sent to the app. If
+ * the app doesn't call within 5 minutes (the time allowed to call
+ * /globusDownloadParameters) + GLOBUS_CACHE_MAXAGE minutes (a ~longer period
+ * giving the user time to make choices in the app), the cached info is deleted.
+ *
+ */
+ private final Cache downloadCache = Caffeine.newBuilder()
+ .expireAfterWrite(
+ Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES))
+ .scheduler(Scheduler.systemScheduler()).evictionListener((downloadId, datasetId, cause) -> {
+ // Delete downloads that expire
+ logger.fine("Download for " + downloadId + " expired");
+ })
+
+ .build();
+
+ public JsonObject getFilesForDownload(String downloadId) {
+ return downloadCache.getIfPresent(downloadId);
}
- public boolean giveGlobusPublicPermissions(String datasetId)
- throws UnsupportedEncodingException, MalformedURLException {
-
- String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, "");
- String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, "");
- if (globusEndpoint.equals("") || globusBasicToken.equals("")) {
- return false;
- }
- AccessToken clientTokenUser = getClientToken();
- if (clientTokenUser == null) {
- logger.severe("Cannot get client token ");
- return false;
- }
+ public int setPermissionForDownload(Dataset dataset, String principal) {
+ GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+ String principalType = "identity";
- String directory = getDirectory(datasetId);
- logger.info(directory);
-
- MakeRequestResponse status = findDirectory(directory, clientTokenUser, globusEndpoint);
-
- if (status.status == 200) {
-
- /*
- * FilesList fl = parseJson(status.jsonResponse, FilesList.class, false);
- * ArrayList files = fl.getDATA(); if (files != null) { for (FileG file:
- * files) { if (!file.getName().contains("cached") &&
- * !file.getName().contains(".thumb")) { int perStatus =
- * givePermission("all_authenticated_users", "", "r", clientTokenUser, directory
- * + "/" + file.getName(), globusEndpoint); logger.info("givePermission status "
- * + perStatus + " for " + file.getName()); if (perStatus == 409) {
- * logger.info("Permissions already exist or limit was reached for " +
- * file.getName()); } else if (perStatus == 400) {
- * logger.info("No file in Globus " + file.getName()); } else if (perStatus !=
- * 201) { logger.info("Cannot get permission for " + file.getName()); } } } }
- */
-
- int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory,
- globusEndpoint);
- logger.info("givePermission status " + perStatus);
- if (perStatus == 409) {
- logger.info("Permissions already exist or limit was reached");
- } else if (perStatus == 400) {
- logger.info("No directory in Globus");
- } else if (perStatus != 201 && perStatus != 200) {
- logger.info("Cannot give read permission");
- return false;
- }
-
- } else if (status.status == 404) {
- logger.info("There is no globus directory");
- } else {
- logger.severe("Cannot find directory in globus, status " + status);
- return false;
- }
+ Permissions permissions = new Permissions();
+ permissions.setDATA_TYPE("access");
+ permissions.setPrincipalType(principalType);
+ permissions.setPrincipal(principal);
+ permissions.setPath(endpoint.getBasePath() + "/");
+ permissions.setPermissions("r");
- return true;
+ return requestPermission(endpoint, dataset, permissions);
}
- // Generates the URL to launch the Globus app
+ // Generates the URL to launch the Globus app for upload
public String getGlobusAppUrlForDataset(Dataset d) {
return getGlobusAppUrlForDataset(d, true, null);
}
- public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) {
+ /**
+ * Generated the App URl for upload (in) or download (out)
+ *
+ * @param d - the dataset involved
+ * @param upload - boolean, true for upload, false for download
+ * @param dataFiles - a list of the DataFiles to be downloaded
+ * @return
+ */
+ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List dataFiles) {
String localeCode = session.getLocaleCode();
ApiToken apiToken = null;
User user = session.getUser();
@@ -518,46 +587,53 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df)
apiToken = authSvc.generateApiTokenForUser((AuthenticatedUser) user);
}
}
- String storePrefix = "";
String driverId = d.getEffectiveStorageDriverId();
try {
- storePrefix = DataAccess.getDriverPrefix(driverId);
} catch (Exception e) {
logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId);
}
- //Use URLTokenUtil for params currently in common with external tools.
- URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode);
- String appUrl;
+
+ // Use URLTokenUtil for params currently in common with external tools.
+ URLTokenUtil tokenUtil = new URLTokenUtil(d, null, apiToken, localeCode);
+ String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost");
+ String callback = null;
if (upload) {
- appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost")
- + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}";
+ appUrl = appUrl + "/upload?dvLocale={localeCode}";
+ callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId()
+ + "/globusUploadParameters?locale=" + localeCode;
} else {
- if (df == null) {
- appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost")
- + "/download?datasetPid={datasetPid}&siteUrl={siteUrl}"
- + ((apiToken != null) ? "&apiToken={apiToken}" : "")
- + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}";
- } else {
- String rawStorageId = df.getStorageIdentifier();
- rawStorageId=rawStorageId.substring(rawStorageId.lastIndexOf(":")+1);
- appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost")
- + "/download-file?datasetPid={datasetPid}&siteUrl={siteUrl}"
- + ((apiToken != null) ? "&apiToken={apiToken}" : "")
- + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}&fileId={fileId}&storageIdentifier="
- + rawStorageId + "&fileName=" + df.getCurrentName();
- }
+ // Download
+ JsonObject files = GlobusUtil.getFilesMap(dataFiles, d);
+
+ String downloadId = UUID.randomUUID().toString();
+ downloadCache.put(downloadId, files);
+ appUrl = appUrl + "/download?dvLocale={localeCode}";
+ callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId()
+ + "/globusDownloadParameters?locale=" + localeCode + "&downloadId=" + downloadId;
+
+ }
+ if (apiToken != null) {
+ callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(),
+ HttpMethod.GET,
+ JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + apiToken.getTokenString());
+ } else {
+ // Shouldn't happen
+ logger.warning("Unable to get api token for user: " + user.getIdentifier());
}
- return tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix;
+ appUrl = appUrl + "&callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback));
+
+ String finalUrl = tokenUtil.replaceTokensWithValues(appUrl);
+ logger.fine("Calling app: " + finalUrl);
+ return finalUrl;
}
- public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) {
- return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, null));
-
+ private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List downloadDFList) {
+ return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList));
}
-
+
@Asynchronous
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
- public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl,
+ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl,
AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException {
Integer countAll = 0;
@@ -585,41 +661,34 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
globusLogger = logger;
}
- globusLogger.info("Starting an globusUpload ");
-
- String datasetIdentifier = dataset.getStorageIdentifier();
-
- // ToDo - use DataAccess methods?
- String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3);
- datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3);
-
- Thread.sleep(5000);
-
- JsonObject jsonObject = null;
- try {
- jsonObject = JsonUtil.getJsonObject(jsonData);
- } catch (Exception jpe) {
- jpe.printStackTrace();
- logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}", jsonData);
- // TODO: I think an (parsing) exception should stop the process, shouldn't it?
- }
- logger.log(Level.INFO, "json: {0}", JsonUtil.prettyPrint(jsonObject));
+ logger.fine("json: " + JsonUtil.prettyPrint(jsonData));
- String taskIdentifier = jsonObject.getString("taskIdentifier");
+ String taskIdentifier = jsonData.getString("taskIdentifier");
- String ruleId = "";
- try {
- ruleId = jsonObject.getString("ruleId");
- } catch (NullPointerException npe) {
- logger.warning("NPE for jsonData object");
+ GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+ GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger);
+ String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw");
+ logger.fine("Found rule: " + ruleId);
+ if (ruleId != null) {
+ Long datasetId = rulesCache.getIfPresent(ruleId);
+ if (datasetId != null) {
+ // Will not delete rule
+ rulesCache.invalidate(ruleId);
+ }
}
+ // Wait before first check
+ Thread.sleep(5000);
// globus task status check
- GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger);
+ task = globusStatusCheck(endpoint, taskIdentifier, globusLogger);
String taskStatus = getTaskStatus(task);
- if (ruleId.length() > 0) {
- deletePermision(ruleId, globusLogger);
+ globusLogger.info("Starting a globusUpload ");
+
+ if (ruleId != null) {
+ // Transfer is complete, so delete rule
+ deletePermission(ruleId, dataset, globusLogger);
+
}
// If success, switch to an EditInProgress lock - do this before removing the
@@ -661,21 +730,30 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
//
List inputList = new ArrayList();
- JsonArray filesJsonArray = jsonObject.getJsonArray("files");
+ JsonArray filesJsonArray = jsonData.getJsonArray("files");
if (filesJsonArray != null) {
+ String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/"
+ + dataset.getIdentifierForFileStorage();
for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) {
// storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from
// externalTool
String storageIdentifier = fileJsonObject.getString("storageIdentifier");
- String[] bits = storageIdentifier.split(":");
- String bucketName = bits[1].replace("/", "");
+ String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier);
+ String storeId = parts[0];
+ // If this is an S3 store, we need to split out the bucket name
+ String[] bits = parts[1].split(":");
+ String bucketName = "";
+ if (bits.length > 1) {
+ bucketName = bits[0];
+ }
String fileId = bits[bits.length - 1];
// fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873
- String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId;
+ // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873
+ String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId;
String fileName = fileJsonObject.getString("fileName");
inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName);
@@ -684,7 +762,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
// calculateMissingMetadataFields: checksum, mimetype
JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger);
JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files");
-
+ logger.fine("Size: " + newfilesJsonArray.size());
+ logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0)));
JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder();
for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) {
@@ -692,29 +771,33 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
countAll++;
String storageIdentifier = fileJsonObject.getString("storageIdentifier");
String fileName = fileJsonObject.getString("fileName");
- String directoryLabel = fileJsonObject.getString("directoryLabel");
- String[] bits = storageIdentifier.split(":");
+ String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier);
+ // If this is an S3 store, we need to split out the bucket name
+ String[] bits = parts[1].split(":");
+ if (bits.length > 1) {
+ }
String fileId = bits[bits.length - 1];
List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size())
.mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId))
.filter(Objects::nonNull).collect(Collectors.toList());
-
if (newfileJsonObject != null) {
- if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) {
- JsonPatch path = Json.createPatchBuilder()
- .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build();
- fileJsonObject = path.apply(fileJsonObject);
- path = Json.createPatchBuilder()
- .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build();
- fileJsonObject = path.apply(fileJsonObject);
- jsonDataSecondAPI.add(fileJsonObject);
- countSuccess++;
- } else {
- globusLogger.info(fileName
- + " will be skipped from adding to dataset by second API due to missing values ");
- countError++;
- }
+ logger.fine("List Size: " + newfileJsonObject.size());
+ // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) {
+ JsonPatch path = Json.createPatchBuilder()
+ .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build();
+ fileJsonObject = path.apply(fileJsonObject);
+ path = Json.createPatchBuilder()
+ .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build();
+ fileJsonObject = path.apply(fileJsonObject);
+ jsonDataSecondAPI.add(fileJsonObject);
+ countSuccess++;
+ // } else {
+ // globusLogger.info(fileName
+ // + " will be skipped from adding to dataset by second API due to missing
+ // values ");
+ // countError++;
+ // }
} else {
globusLogger.info(fileName
+ " will be skipped from adding to dataset by second API due to missing values ");
@@ -731,6 +814,9 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
+ datasetIdentifier + " -F jsonData='" + newjsonData + "'";
System.out.println("*******====command ==== " + command);
+ // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of
+ // calling API
+
String output = addFilesAsync(command, globusLogger);
if (output.equalsIgnoreCase("ok")) {
// if(!taskSkippedFiles)
@@ -757,10 +843,6 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
globusLogger.info("Files failures: " + countError.toString());
globusLogger.info("Finished upload via Globus job.");
- if (fileHandlerSuceeded) {
- fileHandler.close();
- }
-
} catch (Exception e) {
logger.info("Exception from globusUpload call ");
e.printStackTrace();
@@ -768,6 +850,13 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin
datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress);
}
}
+ if (ruleId != null) {
+ deletePermission(ruleId, dataset, globusLogger);
+ globusLogger.info("Removed upload permission: " + ruleId);
+ }
+ if (fileHandlerSuceeded) {
+ fileHandler.close();
+ }
}
public String addFilesAsync(String curlCommand, Logger globusLogger)
@@ -809,18 +898,16 @@ private String addFiles(String curlCommand, Logger globusLogger) {
sb.append(line);
globusLogger.info(" API Output : " + sb.toString());
JsonObject jsonObject = null;
- try {
- jsonObject = JsonUtil.getJsonObject(sb.toString());
- } catch (Exception jpe) {
- jpe.printStackTrace();
- globusLogger.log(Level.SEVERE, "Error parsing dataset json.");
- // TODO: a parsing exception should cause the process to stop.
- }
+ jsonObject = JsonUtil.getJsonObject(sb.toString());
status = jsonObject.getString("status");
} catch (Exception ex) {
- globusLogger.log(Level.SEVERE,
- "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex);
+ if (ex instanceof JsonParsingException) {
+ globusLogger.log(Level.SEVERE, "Error parsing dataset json.");
+ } else {
+ globusLogger.log(Level.SEVERE,
+ "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex);
+ }
}
return status;
@@ -852,7 +939,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
globusLogger = logger;
}
- globusLogger.info("Starting an globusDownload ");
+ globusLogger.info("Starting a globusDownload ");
JsonObject jsonObject = null;
try {
@@ -864,20 +951,35 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
}
String taskIdentifier = jsonObject.getString("taskIdentifier");
- String ruleId = "";
- try {
- jsonObject.getString("ruleId");
- } catch (NullPointerException npe) {
-
- }
+ GlobusEndpoint endpoint = getGlobusEndpoint(dataset);
+ logger.fine("Endpoint path: " + endpoint.getBasePath());
+ // If the rules_cache times out, the permission will be deleted. Presumably that
+ // doesn't affect a
// globus task status check
- GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger);
+ GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger);
+ String ruleId = getRuleId(endpoint, task.getOwner_id(), "r");
+ if (ruleId != null) {
+ logger.fine("Found rule: " + ruleId);
+ Long datasetId = rulesCache.getIfPresent(ruleId);
+ if (datasetId != null) {
+ logger.fine("Deleting from cache: rule: " + ruleId);
+ // Will not delete rule
+ rulesCache.invalidate(ruleId);
+ }
+ } else {
+ // Something is wrong - the rule should be there (a race with the cache timing
+ // out?)
+ logger.warning("ruleId not found for taskId: " + taskIdentifier);
+ }
+ task = globusStatusCheck(endpoint, taskIdentifier, globusLogger);
String taskStatus = getTaskStatus(task);
- if (ruleId.length() > 0) {
- deletePermision(ruleId, globusLogger);
+ // Transfer is done (success or failure) so delete the rule
+ if (ruleId != null) {
+ logger.fine("Deleting: rule: " + ruleId);
+ deletePermission(ruleId, dataset, globusLogger);
}
if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) {
@@ -902,18 +1004,18 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro
Executor executor = Executors.newFixedThreadPool(10);
- private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException {
+ private GlobusTask globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger)
+ throws MalformedURLException {
boolean taskCompletion = false;
String status = "";
GlobusTask task = null;
- int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50);
+ int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault(
+ settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50);
do {
try {
globusLogger.info("checking globus transfer task " + taskId);
Thread.sleep(pollingInterval * 1000);
- AccessToken clientTokenUser = getClientToken();
- // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId);
- task = getTask(clientTokenUser, taskId, globusLogger);
+ task = getTask(endpoint.getClientToken(), taskId, globusLogger);
if (task != null) {
status = task.getStatus();
if (status != null) {
@@ -956,7 +1058,7 @@ private String getTaskStatus(GlobusTask task) {
if (task != null) {
status = task.getStatus();
if (status != null) {
- // The task is in progress.
+ // The task is in progress but is not ok or queued
if (status.equalsIgnoreCase("ACTIVE")) {
status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description();
} else {
@@ -986,7 +1088,7 @@ public JsonObject calculateMissingMetadataFields(List inputList, Logger
.collect(Collectors.toList());
});
- CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> {
+ CompletableFuture> completableFuture = allCompletableFuture.thenApply(files -> {
return files.stream().map(d -> json(d)).collect(toJsonArray());
});
@@ -999,7 +1101,6 @@ public JsonObject calculateMissingMetadataFields(List inputList, Logger
}
private CompletableFuture calculateDetailsAsync(String id, Logger globusLogger) {
- // logger.info(" calcualte additional details for these globus id ==== " + id);
return CompletableFuture.supplyAsync(() -> {
try {
@@ -1027,7 +1128,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger)
String fullPath = id.split("IDsplit")[1];
String fileName = id.split("IDsplit")[2];
- // ToDo: what if the file doesnot exists in s3
+ // ToDo: what if the file does not exist in s3
// ToDo: what if checksum calculation failed
do {
@@ -1038,9 +1139,9 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger)
count = 3;
} catch (IOException ioex) {
count = 3;
- logger.info(ioex.getMessage());
- globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath
- + ") does not appear to be an S3 object associated with driver: ");
+ logger.fine(ioex.getMessage());
+ globusLogger.info(
+ "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: ");
} catch (Exception ex) {
count = count + 1;
ex.printStackTrace();
@@ -1051,7 +1152,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger)
} while (count < 3);
if (checksumVal.length() == 0) {
- checksumVal = "NULL";
+ checksumVal = "Not available in Dataverse";
}
String mimeType = calculatemime(fileName);
@@ -1067,7 +1168,7 @@ public String calculatemime(String fileName) throws InterruptedException {
String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT;
String type = FileUtil.determineFileTypeByNameAndExtension(fileName);
- if (type!=null && !type.isBlank()) {
+ if (type != null && !type.isBlank()) {
if (FileUtil.useRecognizedType(finalType, type)) {
finalType = type;
}
@@ -1075,194 +1176,106 @@ public String calculatemime(String fileName) throws InterruptedException {
return finalType;
}
- /*
- * public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user)
- * throws MalformedURLException {
- *
- * logger.info("=====Tasklist == dataset id :" + dataset.getId()); String
- * directory = null;
- *
- * try {
- *
- * List fileMetadatas = new ArrayList<>();
- *
- * StorageIO datasetSIO = DataAccess.getStorageIO(dataset);
- *
- *
- *
- * DatasetVersion workingVersion = dataset.getEditVersion();
- *
- * if (workingVersion.getCreateTime() != null) {
- * workingVersion.setCreateTime(new Timestamp(new Date().getTime())); }
- *
- * directory = dataset.getAuthorityForFileStorage() + "/" +
- * dataset.getIdentifierForFileStorage();
- *
- * System.out.println("======= directory ==== " + directory +
- * " ==== datasetId :" + dataset.getId()); Map checksumMapOld
- * = new HashMap<>();
- *
- * Iterator fmIt = workingVersion.getFileMetadatas().iterator();
- *
- * while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); if (fm.getDataFile()
- * != null && fm.getDataFile().getId() != null) { String chksum =
- * fm.getDataFile().getChecksumValue(); if (chksum != null) {
- * checksumMapOld.put(chksum, 1); } } }
- *
- * List dFileList = new ArrayList<>(); boolean update = false; for
- * (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) {
- *
- * String s3ObjectKey = s3ObjectSummary.getKey();
- *
- *
- * String t = s3ObjectKey.replace(directory, "");
- *
- * if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String
- * filePath = s3ObjectKey; String fileName =
- * filePath.split("/")[filePath.split("/").length - 1]; String fullPath =
- * datasetSIO.getStorageLocation() + "/" + fileName;
- *
- * logger.info("Full path " + fullPath); StorageIO dataFileStorageIO =
- * DataAccess.getDirectStorageIO(fullPath); InputStream in =
- * dataFileStorageIO.getInputStream();
- *
- * String checksumVal = FileUtil.calculateChecksum(in,
- * DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag();
- * logger.info("The checksum is " + checksumVal); if
- * ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" +
- * dataset.getId() + "======= filename ==== " + filePath +
- * " == file already exists "); } else if (filePath.contains("cached") ||
- * filePath.contains(".thumb")) { logger.info(filePath + " is ignored"); } else
- * { update = true; logger.info("datasetId :" + dataset.getId() +
- * "======= filename ==== " + filePath + " == new file "); try {
- *
- * DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE);
- * //MIME_TYPE_GLOBUS datafile.setModificationTime(new Timestamp(new
- * Date().getTime())); datafile.setCreateDate(new Timestamp(new
- * Date().getTime())); datafile.setPermissionModificationTime(new Timestamp(new
- * Date().getTime()));
- *
- * FileMetadata fmd = new FileMetadata();
- *
- *
- * fmd.setLabel(fileName); fmd.setDirectoryLabel(filePath.replace(directory,
- * "").replace(File.separator + fileName, ""));
- *
- * fmd.setDataFile(datafile);
- *
- * datafile.getFileMetadatas().add(fmd);
- *
- * FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile);
- * logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== "
- * + filePath + " == added to datafile, filemetadata ");
- *
- * try { // We persist "SHA1" rather than "SHA-1".
- * //datafile.setChecksumType(DataFile.ChecksumType.SHA1);
- * datafile.setChecksumType(DataFile.ChecksumType.MD5);
- * datafile.setChecksumValue(checksumVal); } catch (Exception cksumEx) {
- * logger.info("==== datasetId :" + dataset.getId() +
- * "======Could not calculate checksumType signature for the new file "); }
- *
- * datafile.setFilesize(totalSize);
- *
- * dFileList.add(datafile);
- *
- * } catch (Exception ioex) { logger.info("datasetId :" + dataset.getId() +
- * "======Failed to process and/or save the file " + ioex.getMessage()); return
- * false;
- *
- * } } } } if (update) {
- *
- * List filesAdded = new ArrayList<>();
- *
- * if (dFileList != null && dFileList.size() > 0) {
- *
- * // Dataset dataset = version.getDataset();
- *
- * for (DataFile dataFile : dFileList) {
- *
- * if (dataFile.getOwner() == null) { dataFile.setOwner(dataset);
- *
- * workingVersion.getFileMetadatas().add(dataFile.getFileMetadata());
- * dataFile.getFileMetadata().setDatasetVersion(workingVersion);
- * dataset.getFiles().add(dataFile);
- *
- * }
- *
- * filesAdded.add(dataFile);
- *
- * }
- *
- * logger.info("==== datasetId :" + dataset.getId() +
- * " ===== Done! Finished saving new files to the dataset."); }
- *
- * fileMetadatas.clear(); for (DataFile addedFile : filesAdded) {
- * fileMetadatas.add(addedFile.getFileMetadata()); } filesAdded = null;
- *
- * if (workingVersion.isDraft()) {
- *
- * logger.info("Async: ==== datasetId :" + dataset.getId() +
- * " ==== inside draft version ");
- *
- * Timestamp updateTime = new Timestamp(new Date().getTime());
- *
- * workingVersion.setLastUpdateTime(updateTime);
- * dataset.setModificationTime(updateTime);
- *
- *
- * for (FileMetadata fileMetadata : fileMetadatas) {
- *
- * if (fileMetadata.getDataFile().getCreateDate() == null) {
- * fileMetadata.getDataFile().setCreateDate(updateTime);
- * fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); }
- * fileMetadata.getDataFile().setModificationTime(updateTime); }
- *
- *
- * } else { logger.info("datasetId :" + dataset.getId() +
- * " ==== inside released version ");
- *
- * for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { for
- * (FileMetadata fileMetadata : fileMetadatas) { if
- * (fileMetadata.getDataFile().getStorageIdentifier() != null) {
- *
- * if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion.
- * getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) {
- * workingVersion.getFileMetadatas().set(i, fileMetadata); } } } }
- *
- *
- * }
- *
- *
- * try { Command