Skip to content

Commit

Permalink
Merge pull request #8532 from lubitchv/8525-injest-optional-skip
Browse files Browse the repository at this point in the history
8525 ingest optional skip
  • Loading branch information
kcondon authored Apr 12, 2022
2 parents d4bc102 + 66b774b commit 9094b94
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 21 deletions.
1 change: 1 addition & 0 deletions doc/release-notes/8525-ingest-optional-skip.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Tabular ingest can be skipped via API.
5 changes: 3 additions & 2 deletions doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,7 @@ When adding a file to a dataset, you can optionally specify the following:
- A description of the file.
- The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset.
- Whether or not the file is restricted.
- Whether or not the file skips :doc:`tabular ingest </user/tabulardataingest/index>`. If the ``tabIngest`` parameter is not specified, it defaults to ``true``.
Note that when a Dataverse instance is configured to use S3 storage with direct upload enabled, there is API support to send a file directly to S3. This is more complex and is described in the :doc:`/developers/s3-direct-upload-api` guide.
Expand All @@ -1315,13 +1316,13 @@ In the curl example below, all of the above are specified but they are optional.
export SERVER_URL=https://demo.dataverse.org
export PERSISTENT_ID=doi:10.5072/FK2/J8SJZB
curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false"}' "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID"
curl -H X-Dataverse-key:$API_TOKEN -X POST -F "file=@$FILENAME" -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "tabIngest":"false"}' "$SERVER_URL/api/datasets/:persistentId/add?persistentId=$PERSISTENT_ID"
The fully expanded example above (without environment variables) looks like this:
.. code-block:: bash
curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -F file=@data.tsv -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false"}' "https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB"
curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST -F file=@data.tsv -F 'jsonData={"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "tabIngest":"false"}' "https://demo.dataverse.org/api/datasets/:persistentId/add?persistentId=doi:10.5072/FK2/J8SJZB"
You should expect a 201 ("CREATED") response and JSON indicating the database id that has been assigned to your newly uploaded file.
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -3592,7 +3592,7 @@ public String save() {
// have been created in the dataset.
dataset = datasetService.find(dataset.getId());

List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getEditVersion(), newFiles, null);
List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(dataset.getEditVersion(), newFiles, null, true);
newFiles.clear();

// and another update command:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,7 @@ public String save() {
}

// Try to save the NEW files permanently:
List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null);
List<DataFile> filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true);

// reset the working list of fileMetadatas, as to only include the ones
// that have been added to the version successfully:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au
throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Unable to add file(s) to dataset: " + violation.getMessage() + " The invalid value was \"" + violation.getInvalidValue() + "\".");
} else {

ingestService.saveAndAddFilesToDataset(editVersion, dataFiles, null);
ingestService.saveAndAddFilesToDataset(editVersion, dataFiles, null, true);

}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -501,9 +501,11 @@ private boolean runAddReplaceFile(Dataset owner,
if (!phase1Success){
return false;
}


return runAddReplacePhase2();
boolean tabIngest = true;
if (optionalFileParams != null) {
tabIngest = optionalFileParams.getTabIngest();
}
return runAddReplacePhase2(tabIngest);

}

Expand Down Expand Up @@ -653,7 +655,7 @@ private boolean runAddReplacePhase1(Dataset owner,


public boolean runReplaceFromUI_Phase2(){
return runAddReplacePhase2();
return runAddReplacePhase2(true);
}


Expand Down Expand Up @@ -744,7 +746,7 @@ public boolean updateLabelDescriptionRestrictedFromUI(String label, String descr
*
* @return
*/
private boolean runAddReplacePhase2(){
private boolean runAddReplacePhase2(boolean tabIngest){

if (this.hasError()){
return false; // possible to have errors already...
Expand All @@ -756,7 +758,7 @@ private boolean runAddReplacePhase2(){
}

msgt("step_060_addFilesViaIngestService");
if (!this.step_060_addFilesViaIngestService()){
if (!this.step_060_addFilesViaIngestService(tabIngest)){
return false;

}
Expand Down Expand Up @@ -1570,7 +1572,7 @@ private boolean step_055_loadOptionalFileParams(OptionalFileParams optionalFileP
return true;
}

private boolean step_060_addFilesViaIngestService(){
private boolean step_060_addFilesViaIngestService(boolean tabIngest){

if (this.hasError()){
return false;
Expand All @@ -1583,7 +1585,7 @@ private boolean step_060_addFilesViaIngestService(){
}

int nFiles = finalFileList.size();
finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, fileToReplace);
finalFileList = ingestService.saveAndAddFilesToDataset(workingVersion, finalFileList, fileToReplace, tabIngest);

if (nFiles != finalFileList.size()) {
if (nFiles == 1) {
Expand Down Expand Up @@ -2244,4 +2246,4 @@ public String getFileName()
1) Recovery from adding same file and duplicate being found
- draft ok
- published verion - nope
*/
*/
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ public class OptionalFileParams {

private boolean restrict = false;
public static final String RESTRICT_ATTR_NAME = "restrict";

private boolean tabIngest = true;
public static final String TAB_INGEST_ATTR_NAME = "tabIngest";

private String storageIdentifier;
public static final String STORAGE_IDENTIFIER_ATTR_NAME = "storageIdentifier";
Expand Down Expand Up @@ -173,7 +176,15 @@ public void setRestriction(boolean restrict){
public boolean getRestriction(){
return this.restrict;
}


public void setTabIngest(boolean tabIngest) {
this.tabIngest = tabIngest;
}

public boolean getTabIngest() {
return this.tabIngest;
}

public boolean hasCategories(){
if ((categories == null)||(this.categories.isEmpty())){
return false;
Expand Down Expand Up @@ -347,6 +358,14 @@ private void loadParamsFromJson(String jsonData) throws DataFileTagException{

this.restrict = Boolean.valueOf(jsonObj.get(RESTRICT_ATTR_NAME).getAsString());
}

// -------------------------------
// get tabIngest as boolean
// -------------------------------
if ((jsonObj.has(TAB_INGEST_ATTR_NAME)) && (!jsonObj.get(TAB_INGEST_ATTR_NAME).isJsonNull())){

this.tabIngest = Boolean.valueOf(jsonObj.get(TAB_INGEST_ATTR_NAME).getAsString());
}

// -------------------------------
// get storage identifier as string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,11 @@ public class IngestServiceBean {
// attached to the Dataset via some cascade path (for example, via
// DataFileCategory objects, if any were already assigned to the files).
// It must be called before we attempt to permanently save the files in
// the database by calling the Save command on the dataset and/or version.
public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version, List<DataFile> newFiles, DataFile fileToReplace) {
// the database by calling the Save command on the dataset and/or version.
public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version,
List<DataFile> newFiles,
DataFile fileToReplace,
boolean tabIngest) {
List<DataFile> ret = new ArrayList<>();

if (newFiles != null && newFiles.size() > 0) {
Expand Down Expand Up @@ -327,7 +330,7 @@ public List<DataFile> saveAndAddFilesToDataset(DatasetVersion version, List<Data
String fileName = fileMetadata.getLabel();

boolean metadataExtracted = false;
if (FileUtil.canIngestAsTabular(dataFile)) {
if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) {
/*
* Note that we don't try to ingest the file right away - instead we mark it as
* "scheduled for ingest", then at the end of the save process it will be queued
Expand Down
2 changes: 2 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -2586,5 +2586,7 @@ public void testFilesUnchangedAfterDatasetMetadataUpdate() throws IOException {
.body("data.latestVersion.files[0].directoryLabel", equalTo("code"));

}



}
60 changes: 57 additions & 3 deletions src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import com.jayway.restassured.RestAssured;
import com.jayway.restassured.response.Response;
import java.util.logging.Logger;
import org.junit.BeforeClass;

import org.junit.Test;
import org.junit.BeforeClass;
import com.jayway.restassured.path.json.JsonPath;
import com.jayway.restassured.path.xml.XmlPath;
import static edu.harvard.iq.dataverse.api.AccessIT.apiToken;
Expand Down Expand Up @@ -39,11 +40,10 @@
import static org.hamcrest.CoreMatchers.startsWith;
import static org.hamcrest.CoreMatchers.nullValue;
import org.hamcrest.Matchers;
import org.junit.AfterClass;

import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import org.junit.Ignore;

public class FilesIT {

Expand Down Expand Up @@ -1784,4 +1784,58 @@ public void testRange() throws IOException {

}

@Test
public void testAddFileToDatasetSkipTabIngest() throws IOException, InterruptedException {

Response createUser = UtilIT.createRandomUser();
assertEquals(200, createUser.getStatusCode());
String username = UtilIT.getUsernameFromResponse(createUser);
String apiToken = UtilIT.getApiTokenFromResponse(createUser);

Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
assertEquals(201, createDataverseResponse.getStatusCode());
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
assertEquals(201, createDatasetResponse.getStatusCode());
Integer datasetIdInt = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");

String pathToFile = "src/test/resources/sav/dct.sav";
String jsonAsString = "{\"description\":\"My description.\",\"directoryLabel\":\"data/subdir1\",\"categories\":[\"Data\"], \"restrict\":\"false\", \"tabIngest\":\"false\"}";
Response r = UtilIT.uploadFileViaNative(datasetIdInt.toString(), pathToFile, jsonAsString, apiToken);
logger.info(r.prettyPrint());
assertEquals(200, r.getStatusCode());

assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetIdInt, "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));

Long dataFileId = JsonPath.from(r.body().asString()).getLong("data.files[0].dataFile.id");
Response fileMeta = UtilIT.getDataFileMetadataDraft(dataFileId, apiToken);
String label = JsonPath.from(fileMeta.body().asString()).getString("label");
assertEquals("dct.sav", label);

pathToFile = "src/test/resources/sav/frequency-test.sav";
jsonAsString = "{\"description\":\"My description.\",\"directoryLabel\":\"data/subdir1\",\"categories\":[\"Data\"], \"restrict\":\"false\" }";
Response rTabIngest = UtilIT.uploadFileViaNative(datasetIdInt.toString(), pathToFile, jsonAsString, apiToken);
logger.info(rTabIngest.prettyPrint());
assertEquals(200, rTabIngest.getStatusCode());

assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetIdInt, "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));

Long ingDataFileId = JsonPath.from(rTabIngest.body().asString()).getLong("data.files[0].dataFile.id");
Response ingFileMeta = UtilIT.getDataFileMetadataDraft(ingDataFileId, apiToken);
String ingLabel = JsonPath.from(ingFileMeta.body().asString()).getString("label");
assertEquals("frequency-test.tab", ingLabel);

//cleanup
Response destroyDatasetResponse = UtilIT.destroyDataset(datasetIdInt, apiToken);
assertEquals(200, destroyDatasetResponse.getStatusCode());

Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken);
assertEquals(200, deleteDataverseResponse.getStatusCode());

Response deleteUserResponse = UtilIT.deleteUser(username);
assertEquals(200, deleteUserResponse.getStatusCode());

}

}

0 comments on commit 9094b94

Please sign in to comment.