Skip to content

Commit

Permalink
add S3 tests, LocalStack, MinIO #6783
Browse files Browse the repository at this point in the history
Developers can now test S3 locally by using the Dockerized
development environment, which now includes both LocalStack
and MinIO. See S3AccessIT which executes API (end to end) tests.

In addition, a new integration test test class
(not an API test, the new kind launched with `mvn verify`)
has been added at S3AccessIOLocalstackIT. It uses Testcontainers
to spin up Localstack for S3 testing and does not require
Dataverse to be running.

Note that the format of docker-compose-dev.yml had to change to
allow for JVM options to be added.

Finally, docs were improved for listing and setting stores via API.
  • Loading branch information
pdurbin committed Nov 20, 2023
1 parent 9186b06 commit d30ecfd
Show file tree
Hide file tree
Showing 7 changed files with 436 additions and 38 deletions.
3 changes: 3 additions & 0 deletions conf/localstack/buckets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env bash
# https://stackoverflow.com/questions/53619901/auto-create-s3-buckets-on-localstack
awslocal s3 mb s3://mybucket
3 changes: 3 additions & 0 deletions doc/release-notes/6783-s3-tests.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. See S3AccessIT which executes API (end to end) tests.

In addition, a new integration test test class (not an API test, the new kind launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running.
4 changes: 4 additions & 0 deletions doc/sphinx-guides/source/admin/dataverses-datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,15 @@ Configure a Dataverse Collection to Store All New Files in a Specific File Store
To direct new files (uploaded when datasets are created or edited) for all datasets in a given Dataverse collection, the store can be specified via the API as shown below, or by editing the 'General Information' for a Dataverse collection on the Dataverse collection page. Only accessible to superusers. ::
curl -H "X-Dataverse-key: $API_TOKEN" -X PUT -d $storageDriverLabel http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver

(Note that for ``dataverse.files.store1.label=MyLabel``, you should pass ``MyLabel``.)

The current driver can be seen using::

curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver

(Note that for ``dataverse.files.store1.label=MyLabel``, ``store1`` will be returned.)

and can be reset to the default store with::

curl -H "X-Dataverse-key: $API_TOKEN" -X DELETE http://$SERVER/api/admin/dataverse/$dataverse-alias/storageDriver
Expand Down
78 changes: 68 additions & 10 deletions docker-compose-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,38 @@ services:
restart: on-failure
user: payara
environment:
- DATAVERSE_DB_HOST=postgres
- DATAVERSE_DB_PASSWORD=secret
- DATAVERSE_DB_USER=${DATAVERSE_DB_USER}
- ENABLE_JDWP=1
- DATAVERSE_FEATURE_API_BEARER_AUTH=1
- DATAVERSE_AUTH_OIDC_ENABLED=1
- DATAVERSE_AUTH_OIDC_CLIENT_ID=test
- DATAVERSE_AUTH_OIDC_CLIENT_SECRET=94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8
- DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL=http://keycloak.mydomain.com:8090/realms/test
- DATAVERSE_JSF_REFRESH_PERIOD=1
DATAVERSE_DB_HOST: postgres
DATAVERSE_DB_PASSWORD: secret
DATAVERSE_DB_USER: ${DATAVERSE_DB_USER}
ENABLE_JDWP: "1"
DATAVERSE_FEATURE_API_BEARER_AUTH: "1"
DATAVERSE_AUTH_OIDC_ENABLED: "1"
DATAVERSE_AUTH_OIDC_CLIENT_ID: test
DATAVERSE_AUTH_OIDC_CLIENT_SECRET: 94XHrfNRwXsjqTqApRrwWmhDLDHpIYV8
DATAVERSE_AUTH_OIDC_AUTH_SERVER_URL: http://keycloak.mydomain.com:8090/realms/test
DATAVERSE_JSF_REFRESH_PERIOD: "1"
JVM_ARGS: -Ddataverse.files.storage-driver-id=file1
-Ddataverse.files.file1.type=file
-Ddataverse.files.file1.label=Filesystem
-Ddataverse.files.file1.directory=${STORAGE_DIR}/store
-Ddataverse.files.localstack1.type=s3
-Ddataverse.files.localstack1.label=LocalStack
-Ddataverse.files.localstack1.custom-endpoint-url=http://localstack:4566
-Ddataverse.files.localstack1.custom-endpoint-region=us-east-2
-Ddataverse.files.localstack1.bucket-name=mybucket
-Ddataverse.files.localstack1.path-style-access=true
-Ddataverse.files.localstack1.upload-redirect=false
-Ddataverse.files.localstack1.access-key=default
-Ddataverse.files.localstack1.secret-key=default
-Ddataverse.files.minio1.type=s3
-Ddataverse.files.minio1.label=MinIO
-Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000
-Ddataverse.files.minio1.custom-endpoint-region=us-east-1
-Ddataverse.files.minio1.bucket-name=mybucket
-Ddataverse.files.minio1.path-style-access=true
-Ddataverse.files.minio1.upload-redirect=false
-Ddataverse.files.minio1.access-key=minioadmin
-Ddataverse.files.minio1.secret-key=minioadmin
ports:
- "8080:8080" # HTTP (Dataverse Application)
- "4848:4848" # HTTP (Payara Admin Console)
Expand Down Expand Up @@ -156,6 +178,42 @@ services:
networks:
- dataverse

dev_localstack:
container_name: "dev_localstack"
hostname: "localstack"
image: localstack/localstack:2.3.2
restart: on-failure
ports:
- "127.0.0.1:4566:4566"
environment:
- DEBUG=${DEBUG-}
- DOCKER_HOST=unix:///var/run/docker.sock
- HOSTNAME_EXTERNAL=localstack
networks:
- dataverse
volumes:
- ./conf/localstack:/etc/localstack/init/ready.d
tmpfs:
- /localstack:mode=770,size=128M,uid=1000,gid=1000

dev_minio:
container_name: "dev_minio"
hostname: "minio"
image: minio/minio
restart: on-failure
ports:
- "9000:9000"
- "9001:9001"
networks:
- dataverse
volumes:
- minio_storage:/data
environment:
# these are the defaults but are here for clarity
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
command: server /data

networks:
dataverse:
driver: bridge
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,11 @@
<version>3.0.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>localstack</artifactId>
<scope>test</scope>
</dependency>
<!--
Brute force solution until we are on Jakarta EE 10.
Otherwise, we get very cryptic errors about missing bundle files on test runs.
Expand Down
228 changes: 200 additions & 28 deletions src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
Original file line number Diff line number Diff line change
@@ -1,66 +1,238 @@
package edu.harvard.iq.dataverse.api;

import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.amazonaws.services.s3.model.Bucket;
import com.amazonaws.services.s3.model.HeadBucketRequest;
import com.amazonaws.services.s3.model.HeadBucketResult;
import io.restassured.RestAssured;
import static io.restassured.RestAssured.given;
import io.restassured.path.json.JsonPath;
import io.restassured.response.Response;
import java.util.List;
import java.util.logging.Logger;
import static org.hamcrest.CoreMatchers.equalTo;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import static org.hamcrest.Matchers.startsWith;
import org.junit.jupiter.api.Assertions;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;

/**
* NOTE: This test WILL NOT pass if your installation is not configured for Amazon S3 storage.
* For S3 storage, you must set two jvm options: storage-driver-id and s3-bucket-name
* Refer to the guides or to https://github.com/IQSS/dataverse/issues/3921#issuecomment-319973245
* @author bsilverstein
* This test requires services spun up in Docker.
*/
public class S3AccessIT {

private static final Logger logger = Logger.getLogger(S3AccessIT.class.getCanonicalName());

public enum TypesOfS3 {
MINIO,
LOCALSTACK
};

static final String accessKey = "minioadmin";
static final String secretKey = "minioadmin";
static final String bucketName = "mybucket";
static String driverId;
static String driverLabel;
static AmazonS3 s3 = null;

@BeforeAll
public static void setUp() {
RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();


TypesOfS3 typeToTest = TypesOfS3.LOCALSTACK;
typeToTest = TypesOfS3.MINIO;

switch (typeToTest) {
case LOCALSTACK -> {
driverId = "localstack1";
driverLabel = "LocalStack";
s3 = AmazonS3ClientBuilder.standard()
.withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)))
.withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build();
}
case MINIO -> {
driverId = "minio1";
driverLabel = "MinIO";
s3 = AmazonS3ClientBuilder.standard()
// https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local
.withPathStyleAccessEnabled(Boolean.TRUE)
.withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)))
.withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build();
// String location = s3.getBucketLocation(bucketName);
// if (location != "US") {
// Bucket bucket = s3.createBucket(bucketName);
// }
}
}
System.out.println("buckets before attempting to create " + bucketName);
for (Bucket bucket : s3.listBuckets()) {
System.out.println("bucket: " + bucket);
}

// create bucket if it doesn't exist
// Note that we create the localstack bucket with conf/localstack/buckets.sh
// because we haven't figured out how to create it properly in Java.
// Perhaps it is missing ACLs.
try {
s3.headBucket(new HeadBucketRequest(bucketName));
} catch (AmazonS3Exception ex) {
s3.createBucket(bucketName);
}

// String location = s3.getBucketLocation(bucketName);
//// HeadBucketRequest headBucketRequest;
// s3.headBucket(headBucketRequest);
// if (location != null && !"US".equals(location)) {
// System.out.println("Creating bucket. Location was " + location);
// Bucket createdBucket = s3.createBucket(bucketName);
// System.out.println("created bucket: " + createdBucket);
// }
// System.out.println("buckets after creating " + bucketName);
// for (Bucket bucket : s3.listBuckets()) {
// System.out.println("bucket: " + bucket);
// }
}

@Test
public void testAddDataFileS3Prefix() {
Response createSuperuser = UtilIT.createRandomUser();
String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
UtilIT.makeSuperUser(superusername);
Response storageDrivers = listStorageDrivers(superuserApiToken);
storageDrivers.prettyPrint();
// TODO where is "Local/local" coming from?
String drivers = """
{
"status": "OK",
"data": {
"LocalStack": "localstack1",
"MinIO": "minio1",
"Local": "local",
"Filesystem": "file1"
}
}""";

//create user who will make a dataverse/dataset
Response createUser = UtilIT.createRandomUser();
String username = UtilIT.getUsernameFromResponse(createUser);
String apiToken = UtilIT.getApiTokenFromResponse(createUser);

Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
createDataverseResponse.prettyPrint();
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);


Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
originalStorageDriver.prettyPrint();
originalStorageDriver.then().assertThat()
.body("data.message", equalTo("undefined"))
.statusCode(200);

Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken);
setStorageDriverToS3.prettyPrint();
setStorageDriverToS3.then().assertThat()
.statusCode(200);

Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
updatedStorageDriver.prettyPrint();
updatedStorageDriver.then().assertThat()
.statusCode(200);

Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
createDatasetResponse.prettyPrint();

//upload a tabular file via native, check storage id prefix for s3
createDatasetResponse.then().assertThat().statusCode(201);
Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
String datasetPid = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId");
String datasetStorageIdentifier = datasetPid.substring(4);

Response getDatasetMetadata = UtilIT.nativeGet(datasetId, apiToken);
getDatasetMetadata.prettyPrint();
getDatasetMetadata.then().assertThat().statusCode(200);

//upload a tabular file via native, check storage id prefix for driverId
String pathToFile = "scripts/search/data/tabular/1char";
Response addFileResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
addFileResponse.prettyPrint();
addFileResponse.then().assertThat()
.body("data.files[0].dataFile.storageIdentifier", startsWith("s3://"));

//clean up test dvobjects and user
Response deleteDataset = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken);
deleteDataset.prettyPrint();
deleteDataset.then().assertThat()
.statusCode(200);
.statusCode(200)
.body("data.files[0].dataFile.storageIdentifier", startsWith(driverId + "://"));

Response deleteDataverse = UtilIT.deleteDataverse(dataverseAlias, apiToken);
deleteDataverse.prettyPrint();
deleteDataverse.then().assertThat()
.statusCode(200);

Response deleteUser = UtilIT.deleteUser(username);
deleteUser.prettyPrint();
deleteUser.then().assertThat()
.statusCode(200);
String fileId = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.id");

Response getfileMetadata = UtilIT.getFileData(fileId, apiToken);
getfileMetadata.prettyPrint();
getfileMetadata.then().assertThat().statusCode(200);

String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier");
String keyInDataverse = storageIdentifier.split(":")[2];
Assertions.assertEquals(driverId + "://" + bucketName + ":" + keyInDataverse, storageIdentifier);

for (Bucket bucket : s3.listBuckets()) {
System.out.println("bucket: " + bucket);
}

// List<S3ObjectSummary> summaries = s3.listObjects(bucketName).getObjectSummaries();
// for (S3ObjectSummary summary : summaries) {
// System.out.println("summary: " + summary);
// /**
// * summary: S3ObjectSummary{bucketName='mybucket',
// * key='10.5072/FK2/6MGSJD/18b631645ef-4c6a6c2d49f8',
// * eTag='60b725f10c9c85c70d97880dfe8191b3', size=2, lastModified=Tue
// * Oct 24 19:08:06 UTC 2023, storageClass='STANDARD', owner=S3Owner
// * [name=webfile,id=75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a]}
// */
// }
String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse;
String s3Object = s3.getObjectAsString(bucketName, keyInS3);
System.out.println("s3Object: " + s3Object);

// The file uploaded above only contains the character "a".
assertEquals("a".trim(), s3Object.trim());

Response deleteFile = UtilIT.deleteFileApi(Integer.parseInt(fileId), apiToken);
deleteFile.prettyPrint();
deleteFile.then().assertThat().statusCode(200);

AmazonS3Exception expectedException = null;
try {
s3.getObjectAsString(bucketName, keyInS3);
} catch (AmazonS3Exception ex) {
expectedException = ex;
}
assertNotNull(expectedException);
// 404 because the file has been sucessfully deleted
assertEquals(404, expectedException.getStatusCode());

}

//TODO: move these into UtilIT. They are here for now to avoid merge conflicts
static Response listStorageDrivers(String apiToken) {
return given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
.get("/api/admin/dataverse/storageDrivers");
}

static Response getStorageDriver(String dvAlias, String apiToken) {
return given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
.get("/api/admin/dataverse/" + dvAlias + "/storageDriver");
}

static Response setStorageDriver(String dvAlias, String label, String apiToken) {
return given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
.body(label)
.put("/api/admin/dataverse/" + dvAlias + "/storageDriver");
}

}
Loading

0 comments on commit d30ecfd

Please sign in to comment.