Skip to content

Commit

Permalink
DCM: allow rsync script download via API IQSS#3145
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Jun 17, 2016
1 parent 0ad7dbe commit 3a517ce
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 18 deletions.
30 changes: 30 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand;
import edu.harvard.iq.dataverse.engine.command.impl.ListVersionsCommand;
import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand;
import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand;
import edu.harvard.iq.dataverse.engine.command.impl.SetDatasetCitationDateCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetTargetURLCommand;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand;
Expand Down Expand Up @@ -562,4 +563,33 @@ public Response createAssignment(String userOrGroup, @PathParam("identifier") St
}
}

@GET
@Path("{identifier}/dataCaptureModule/rsync")
public Response getRsync(@PathParam("identifier") String id) {
try {
Dataset dataset = findDatasetOrDie(id);
/**
* @todo This logic really doesn't belong here but for now the Data
* Capture Module will blindly create an rsync script for *any*
* dataset, regardless of if the dataset has been configured to
* support rsync or not.
*/
for (DatasetField datasetField : dataset.getLatestVersion().getDatasetFields()) {
/**
* @todo What should the trigger be for kicking off the
* RequestRsyncScriptCommand? For now we're looking for the
* presence of the "dataType" field, which is way too course.
* This is copied from CreateDatasetCommand.
*/
if ("dataType".equals(datasetField.getDatasetFieldType().getName())) {
JsonObjectBuilder jab = execCommand(new RequestRsyncScriptCommand(createDataverseRequest(findUserOrDie()), dataset));
return okResponse(jab);
}
}
} catch (WrappedResponse ex) {
return ex.getResponse();
}
return errorResponse(Response.Status.NOT_FOUND, "An rsync script was not found for dataset id " + id);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersionUser;
import edu.harvard.iq.dataverse.DatasetField;
import edu.harvard.iq.dataverse.DatasetFieldConstant;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
import edu.harvard.iq.dataverse.RoleAssignment;
Expand All @@ -14,7 +12,6 @@
import edu.harvard.iq.dataverse.api.imports.ImportUtil.ImportType;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
Expand All @@ -28,7 +25,6 @@
import java.util.Iterator;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.validation.ConstraintViolation;
Expand Down Expand Up @@ -229,7 +225,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
*/
if ("dataType".equals(datasetField.getDatasetFieldType().getName())) {
try {
ctxt.engine().submit(new RequestRsyncScriptCommand(getRequest(), savedDataset, datasetField));
ctxt.engine().submit(new RequestRsyncScriptCommand(getRequest(), savedDataset));
} catch (CommandException | RuntimeException ex) {
logger.info("Attempt to request rsync script failed: " + ex.getLocalizedMessage());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
package edu.harvard.iq.dataverse.engine.command.impl;

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetField;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.JsonNode;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
import java.util.Collections;
import java.util.logging.Logger;
import javax.json.Json;
Expand All @@ -23,25 +25,27 @@
* RuntimeException because otherwise ctxt.engine().submit() will put "OK" for
* "actiontype" in the actionlogrecord rather than "InternalError" if you throw
* a CommandExecutionException.
*
* @todo Who is responsible for knowing when it's appropriate to create an rsync
* script for a dataset, Dataverse or the Data Capture Module? For now the DCM
* will always create an rsync script, which may not be what we want.
*/
@RequiredPermissions(Permission.AddDataset)
public class RequestRsyncScriptCommand extends AbstractVoidCommand {
public class RequestRsyncScriptCommand extends AbstractCommand<JsonObjectBuilder> {

private static final Logger logger = Logger.getLogger(RequestRsyncScriptCommand.class.getCanonicalName());

private final Dataset dataset;
private final DatasetField datasetField;
private final DataverseRequest request;

RequestRsyncScriptCommand(DataverseRequest requestArg, Dataset datasetArg, DatasetField datasetFieldArg) {
public RequestRsyncScriptCommand(DataverseRequest requestArg, Dataset datasetArg) {
super(requestArg, datasetArg);
request = requestArg;
dataset = datasetArg;
datasetField = datasetFieldArg;
}

@Override
protected void executeImpl(CommandContext ctxt) throws PermissionException, RuntimeException {
public JsonObjectBuilder execute(CommandContext ctxt) throws CommandException {
// {"dep_email": "bob.smith@example.com", "uid": 42, "depositor_name": ["Smith", "Bob"], "lab_email": "john.doe@example.com", "datacite.resourcetype": "X-Ray Diffraction"}
User user = request.getUser();
if (!(user instanceof AuthenticatedUser)) {
Expand Down Expand Up @@ -102,16 +106,22 @@ protected void executeImpl(CommandContext ctxt) throws PermissionException, Runt
if (statusCode != 200) {
throw new RuntimeException(errorPreamble + "Rather than 200 the status code was " + statusCode + ". The body was \'" + response.getBody() + "\'.");
}
/**
* @todo What happens when no datasetId is in the JSON?
*/
long datasetId = response.getBody().getObject().getLong("datasetId");
String script = response.getBody().getObject().getString("script");
if (script == null || script.isEmpty()) {
throw new RuntimeException(errorPreamble + "The script was null or empty.");
}
/**
* @todo Put this in the database somewhere. Will I be able to query the
* DCM at any time and GET the script again, based on an id?
* @todo Put the script in the database somewhere.
*/
logger.info("script for dataset " + datasetId + ": " + script);
logger.fine("script for dataset " + datasetId + ": " + script);
NullSafeJsonBuilder nullSafeJsonBuilder = jsonObjectBuilder()
.add("datasetId", datasetId)
.add("script", script);
return nullSafeJsonBuilder;
}

}
68 changes: 65 additions & 3 deletions src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.hamcrest.Matchers.startsWith;

public class DatasetsIT {

Expand Down Expand Up @@ -48,10 +49,11 @@ public void testCreateDataset() {

/**
* In order for this test to pass you must have the Data Capture Module
* running: https://github.com/sbgrid/data-capture-module
* running:
* https://github.com/sbgrid/data-capture-module/blob/master/api/dcm.py
*
* Configure it to avoid the error "The ':DataCaptureModuleUrl' setting has
* not been configured."
* Configure :DataCaptureModuleUrl to point at wherever you are running the
* DCM.
*/
@Test
public void testCreateDatasetWithDcmDependency() {
Expand Down Expand Up @@ -89,6 +91,64 @@ public void testCreateDatasetWithDcmDependency() {
assertEquals("X-Ray Diffraction", dataTypeField.get(0).get("value"));
assertTrue(dataTypeField.get(0).get("multiple").equals(false));

/**
* @todo Also test user who doesn't have permission.
*/
Response getRsyncScriptPermErrorGuest = given()
.get("/api/datasets/" + datasetId + "/dataCaptureModule/rsync");
getRsyncScriptPermErrorGuest.prettyPrint();
getRsyncScriptPermErrorGuest.then().assertThat()
.statusCode(401)
.body("message", equalTo("User :guest is not permitted to perform requested action."));

Response createNoPermsUser = UtilIT.createRandomUser();
String noPermsUsername = UtilIT.getUsernameFromResponse(createNoPermsUser);
String noPermsApiToken = UtilIT.getApiTokenFromResponse(createNoPermsUser);

Response getRsyncScriptPermErrorNonGuest = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, noPermsApiToken)
.get("/api/datasets/" + datasetId + "/dataCaptureModule/rsync");
getRsyncScriptPermErrorNonGuest.then().assertThat()
.statusCode(401)
.body("message", equalTo("User @" + noPermsUsername + " is not permitted to perform requested action."));

Response getRsyncScript = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
.get("/api/datasets/" + datasetId + "/dataCaptureModule/rsync");
getRsyncScript.prettyPrint();
getRsyncScript.then().assertThat()
.statusCode(200)
.body("data.datasetId", equalTo(datasetId))
.body("data.script", startsWith("#!"));

Response attmeptToGetRsyncScriptForNonRsyncDataset = given()
.header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken1)
.get("/api/datasets/" + datasetId1 + "/dataCaptureModule/rsync");
attmeptToGetRsyncScriptForNonRsyncDataset.prettyPrint();
attmeptToGetRsyncScriptForNonRsyncDataset.then().assertThat()
.statusCode(404)
.body("message", equalTo("An rsync script was not found for dataset id " + datasetId1));

/**
* Here we are pretending to be the Data Capture Module reporting on if
* checksum validation success or failure. Don't notify the user yet
* (too chatty). This should kick off crawling of the files so they are
* imported into Dataverse. Once the crawling and importing is complete,
* notify the user.
*
* @todo What authentication should be used here? The API token of the
* user? (If so, pass the token in the initial upload request payload.)
* Or should Dataverse be able to be configured so that it only will
* receive these messages from trusted IP addresses? Should there be a
* shared secret that's used for *all* requests from the Data Capture
* Module to Dataverse?
*/
// Response uploadSuccessful = given()
// .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken)
// // send json here: { "userId":"$bar", "datasetId" : "$foo" , "status" : "$status"}
// // status either "validation passed" or "validation failed"
// .post("/api/dataCaptureModule/checksumValidation");
// uploadSuccessful.prettyPrint();
Response deleteDatasetResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken);
deleteDatasetResponse.prettyPrint();
assertEquals(200, deleteDatasetResponse.getStatusCode());
Expand All @@ -101,6 +161,8 @@ public void testCreateDatasetWithDcmDependency() {
deleteUserResponse.prettyPrint();
assertEquals(200, deleteUserResponse.getStatusCode());

UtilIT.deleteUser(noPermsUsername);

}

@Test
Expand Down

0 comments on commit 3a517ce

Please sign in to comment.