Skip to content

Commit

Permalink
refactor and add direct upload test #6783
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Nov 1, 2023
1 parent 7f578b7 commit b624206
Show file tree
Hide file tree
Showing 2 changed files with 245 additions and 78 deletions.
2 changes: 1 addition & 1 deletion docker-compose-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ services:
-Ddataverse.files.localstack1.custom-endpoint-region=us-east-2
-Ddataverse.files.localstack1.bucket-name=mybucket
-Ddataverse.files.localstack1.path-style-access=true
-Ddataverse.files.localstack1.upload-redirect=false
-Ddataverse.files.localstack1.upload-redirect=true
-Ddataverse.files.localstack1.access-key=default
-Ddataverse.files.localstack1.secret-key=default
-Ddataverse.files.minio1.type=s3
Expand Down
321 changes: 244 additions & 77 deletions src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,73 +9,65 @@
import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.amazonaws.services.s3.model.Bucket;
import com.amazonaws.services.s3.model.HeadBucketRequest;
import com.amazonaws.services.s3.model.HeadBucketResult;
import io.restassured.RestAssured;
import static io.restassured.RestAssured.given;
import io.restassured.path.json.JsonPath;
import io.restassured.response.Response;
import java.util.List;
import io.restassured.specification.RequestSpecification;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.logging.Logger;
import org.apache.commons.lang3.math.NumberUtils;
import static org.hamcrest.CoreMatchers.equalTo;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import static org.hamcrest.Matchers.startsWith;
import org.junit.jupiter.api.Assertions;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

/**
* This test requires services spun up in Docker.
* This test requires LocalStack and Minio to be running. Developers can use our
* docker-compose file, which has all the necessary configuration.
*/
public class S3AccessIT {

private static final Logger logger = Logger.getLogger(S3AccessIT.class.getCanonicalName());

public enum TypesOfS3 {
MINIO,
LOCALSTACK
};

static final String accessKey = "minioadmin";
static final String secretKey = "minioadmin";
static final String bucketName = "mybucket";
static String driverId;
static String driverLabel;
static AmazonS3 s3 = null;
static final String BUCKET_NAME = "mybucket";
static AmazonS3 s3localstack = null;
static AmazonS3 s3minio = null;

@BeforeAll
public static void setUp() {
RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();

TypesOfS3 typeToTest = TypesOfS3.LOCALSTACK;
typeToTest = TypesOfS3.MINIO;

switch (typeToTest) {
case LOCALSTACK -> {
driverId = "localstack1";
driverLabel = "LocalStack";
s3 = AmazonS3ClientBuilder.standard()
.withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)))
.withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build();
}
case MINIO -> {
driverId = "minio1";
driverLabel = "MinIO";
s3 = AmazonS3ClientBuilder.standard()
// https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local
.withPathStyleAccessEnabled(Boolean.TRUE)
.withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)))
.withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build();
// String location = s3.getBucketLocation(bucketName);
// if (location != "US") {
// Bucket bucket = s3.createBucket(bucketName);
// }
}
// At least in when spun up by our docker-compose file, the creds don't matter for LocalStack.
String accessKeyLocalStack = "whatever";
String secretKeyLocalStack = "not used";

s3localstack = AmazonS3ClientBuilder.standard()
.withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyLocalStack, secretKeyLocalStack)))
.withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build();

String accessKeyMinio = "minioadmin";
String secretKeyMinio = "minioadmin";
s3minio = AmazonS3ClientBuilder.standard()
// https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local
.withPathStyleAccessEnabled(Boolean.TRUE)
.withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyMinio, secretKeyMinio)))
.withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build();

System.out.println("buckets on LocalStack before attempting to create " + BUCKET_NAME);
for (Bucket bucket : s3localstack.listBuckets()) {
System.out.println("bucket: " + bucket);
}
System.out.println("buckets before attempting to create " + bucketName);
for (Bucket bucket : s3.listBuckets()) {

System.out.println("buckets on MinIO before attempting to create " + BUCKET_NAME);
for (Bucket bucket : s3minio.listBuckets()) {
System.out.println("bucket: " + bucket);
}

Expand All @@ -84,27 +76,27 @@ public static void setUp() {
// because we haven't figured out how to create it properly in Java.
// Perhaps it is missing ACLs.
try {
s3.headBucket(new HeadBucketRequest(bucketName));
s3localstack.headBucket(new HeadBucketRequest(BUCKET_NAME));
} catch (AmazonS3Exception ex) {
s3.createBucket(bucketName);
s3localstack.createBucket(BUCKET_NAME);
}

try {
s3minio.headBucket(new HeadBucketRequest(BUCKET_NAME));
} catch (AmazonS3Exception ex) {
s3minio.createBucket(BUCKET_NAME);
}

// String location = s3.getBucketLocation(bucketName);
//// HeadBucketRequest headBucketRequest;
// s3.headBucket(headBucketRequest);
// if (location != null && !"US".equals(location)) {
// System.out.println("Creating bucket. Location was " + location);
// Bucket createdBucket = s3.createBucket(bucketName);
// System.out.println("created bucket: " + createdBucket);
// }
// System.out.println("buckets after creating " + bucketName);
// for (Bucket bucket : s3.listBuckets()) {
// System.out.println("bucket: " + bucket);
// }
}

/**
* We're using MinIO for testing non-direct upload.
*/
@Test
public void testAddDataFileS3Prefix() {
public void testNonDirectUpload() {
String driverId = "minio1";
String driverLabel = "MinIO";

Response createSuperuser = UtilIT.createRandomUser();
String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
Expand Down Expand Up @@ -175,25 +167,10 @@ public void testAddDataFileS3Prefix() {

String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier");
String keyInDataverse = storageIdentifier.split(":")[2];
Assertions.assertEquals(driverId + "://" + bucketName + ":" + keyInDataverse, storageIdentifier);
Assertions.assertEquals(driverId + "://" + BUCKET_NAME + ":" + keyInDataverse, storageIdentifier);

for (Bucket bucket : s3.listBuckets()) {
System.out.println("bucket: " + bucket);
}

// List<S3ObjectSummary> summaries = s3.listObjects(bucketName).getObjectSummaries();
// for (S3ObjectSummary summary : summaries) {
// System.out.println("summary: " + summary);
// /**
// * summary: S3ObjectSummary{bucketName='mybucket',
// * key='10.5072/FK2/6MGSJD/18b631645ef-4c6a6c2d49f8',
// * eTag='60b725f10c9c85c70d97880dfe8191b3', size=2, lastModified=Tue
// * Oct 24 19:08:06 UTC 2023, storageClass='STANDARD', owner=S3Owner
// * [name=webfile,id=75aa57f09aa0c8caeab4f8c24e99d10f8e7faeebf76c078efc7c6caea54ba06a]}
// */
// }
String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse;
String s3Object = s3.getObjectAsString(bucketName, keyInS3);
String s3Object = s3minio.getObjectAsString(BUCKET_NAME, keyInS3);
System.out.println("s3Object: " + s3Object);

// The file uploaded above only contains the character "a".
Expand All @@ -205,7 +182,175 @@ public void testAddDataFileS3Prefix() {

AmazonS3Exception expectedException = null;
try {
s3.getObjectAsString(bucketName, keyInS3);
s3minio.getObjectAsString(BUCKET_NAME, keyInS3);
} catch (AmazonS3Exception ex) {
expectedException = ex;
}
assertNotNull(expectedException);
// 404 because the file has been sucessfully deleted
assertEquals(404, expectedException.getStatusCode());

}

/**
* We use LocalStack to test direct upload.
*/
@Test
public void testDirectUpload() {
String driverId = "localstack1";
String driverLabel = "LocalStack";
Response createSuperuser = UtilIT.createRandomUser();
String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
String superusername = UtilIT.getUsernameFromResponse(createSuperuser);
UtilIT.makeSuperUser(superusername);
Response storageDrivers = listStorageDrivers(superuserApiToken);
storageDrivers.prettyPrint();
// TODO where is "Local/local" coming from?
String drivers = """
{
"status": "OK",
"data": {
"LocalStack": "localstack1",
"MinIO": "minio1",
"Local": "local",
"Filesystem": "file1"
}
}""";

//create user who will make a dataverse/dataset
Response createUser = UtilIT.createRandomUser();
String username = UtilIT.getUsernameFromResponse(createUser);
String apiToken = UtilIT.getApiTokenFromResponse(createUser);

Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
createDataverseResponse.prettyPrint();
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

Response originalStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
originalStorageDriver.prettyPrint();
originalStorageDriver.then().assertThat()
.body("data.message", equalTo("undefined"))
.statusCode(200);

Response setStorageDriverToS3 = setStorageDriver(dataverseAlias, driverLabel, superuserApiToken);
setStorageDriverToS3.prettyPrint();
setStorageDriverToS3.then().assertThat()
.statusCode(200);

Response updatedStorageDriver = getStorageDriver(dataverseAlias, superuserApiToken);
updatedStorageDriver.prettyPrint();
updatedStorageDriver.then().assertThat()
.statusCode(200);

Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
createDatasetResponse.prettyPrint();
createDatasetResponse.then().assertThat().statusCode(201);
Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
String datasetPid = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId");
String datasetStorageIdentifier = datasetPid.substring(4);

Response getDatasetMetadata = UtilIT.nativeGet(datasetId, apiToken);
getDatasetMetadata.prettyPrint();
getDatasetMetadata.then().assertThat().statusCode(200);

// //upload a tabular file via native, check storage id prefix for driverId
// String pathToFile = "scripts/search/data/tabular/1char";
// Response addFileResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
// addFileResponse.prettyPrint();
// addFileResponse.then().assertThat()
// .statusCode(200)
// .body("data.files[0].dataFile.storageIdentifier", startsWith(driverId + "://"));
//
// String fileId = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.id");
long size = 1000000000l;
Response getUploadUrls = getUploadUrls(datasetPid, size, apiToken);
getUploadUrls.prettyPrint();
getUploadUrls.then().assertThat().statusCode(200);

String url = JsonPath.from(getUploadUrls.asString()).getString("data.url");
String partSize = JsonPath.from(getUploadUrls.asString()).getString("data.partSize");
String storageIdentifier = JsonPath.from(getUploadUrls.asString()).getString("data.storageIdentifier");
System.out.println("url: " + url);
System.out.println("partSize: " + partSize);
System.out.println("storageIdentifier: " + storageIdentifier);

System.out.println("uploading file via direct upload");
String decodedUrl = null;
try {
decodedUrl = URLDecoder.decode(url, StandardCharsets.UTF_8.name());
} catch (UnsupportedEncodingException ex) {
}

// change to localhost because LocalStack is running in a container locally
String localhostUrl = decodedUrl.replace("http://localstack", "http://localhost");
String contentsOfFile = "foobar";

InputStream inputStream = new ByteArrayInputStream(contentsOfFile.getBytes(StandardCharsets.UTF_8));
Response uploadFileDirect = uploadFileDirect(localhostUrl, inputStream);
uploadFileDirect.prettyPrint();
/*
Direct upload to MinIO is failing with errors like this:
<Error>
<Code>SignatureDoesNotMatch</Code>
<Message>The request signature we calculated does not match the signature you provided. Check your key and signing method.</Message>
<Key>10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5</Key>
<BucketName>mybucket</BucketName>
<Resource>/mybucket/10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5</Resource>
<RequestId>1793915CCC5BC95C</RequestId>
<HostId>dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8</HostId>
</Error>
*/
uploadFileDirect.then().assertThat().statusCode(200);

// TODO: Use MD5 or whatever Dataverse is configured for and
// actually calculate it.
String jsonData = """
{
"description": "My description.",
"directoryLabel": "data/subdir1",
"categories": [
"Data"
],
"restrict": "false",
"storageIdentifier": "%s",
"fileName": "file1.txt",
"mimeType": "text/plain",
"checksum": {
"@type": "SHA-1",
"@value": "123456"
}
}
""".formatted(storageIdentifier);

// "There was an error when trying to add the new file. File size must be explicitly specified when creating DataFiles with Direct Upload"
Response addRemoteFile = UtilIT.addRemoteFile(datasetId.toString(), jsonData, apiToken);
addRemoteFile.prettyPrint();
addRemoteFile.then().assertThat()
.statusCode(200);

String fileId = JsonPath.from(addRemoteFile.asString()).getString("data.files[0].dataFile.id");
Response getfileMetadata = UtilIT.getFileData(fileId, apiToken);
getfileMetadata.prettyPrint();
getfileMetadata.then().assertThat().statusCode(200);

// String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier");
String keyInDataverse = storageIdentifier.split(":")[2];
Assertions.assertEquals(driverId + "://" + BUCKET_NAME + ":" + keyInDataverse, storageIdentifier);

String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse;
String s3Object = s3localstack.getObjectAsString(BUCKET_NAME, keyInS3);
System.out.println("s3Object: " + s3Object);

// assertEquals(contentsOfFile.trim(), s3Object.trim());
assertEquals(contentsOfFile, s3Object);

Response deleteFile = UtilIT.deleteFileApi(Integer.parseInt(fileId), apiToken);
deleteFile.prettyPrint();
deleteFile.then().assertThat().statusCode(200);

AmazonS3Exception expectedException = null;
try {
s3localstack.getObjectAsString(BUCKET_NAME, keyInS3);
} catch (AmazonS3Exception ex) {
expectedException = ex;
}
Expand Down Expand Up @@ -235,4 +380,26 @@ static Response setStorageDriver(String dvAlias, String label, String apiToken)
.put("/api/admin/dataverse/" + dvAlias + "/storageDriver");
}

static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) {
String idInPath = idOrPersistentIdOfDataset; // Assume it's a number.
String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) {
idInPath = ":persistentId";
optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset;
}
RequestSpecification requestSpecification = given();
if (apiToken != null) {
requestSpecification = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken);
}
return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam);
}

static Response uploadFileDirect(String url, InputStream inputStream) {
return given()
.header("x-amz-tagging", "dv-state=temp")
.body(inputStream)
.put(url);
}

}

0 comments on commit b624206

Please sign in to comment.