diff --git a/doc/release-notes/7900-add-multipleFilesMetadata-dataset.md b/doc/release-notes/7900-add-multipleFilesMetadata-dataset.md new file mode 100644 index 00000000000..13b77259c69 --- /dev/null +++ b/doc/release-notes/7900-add-multipleFilesMetadata-dataset.md @@ -0,0 +1,4 @@ +### Direct Upload API Now Available for Adding Multiple Files Metadata to the Dataset + +Users can now add metadata of multiple files to the dataset once the files exists in the s3 bucket using the direct upload API. +For more information, see the [Direct DataFile Upload/Replace API section](https://guides.dataverse.org/en/5.6/developers/s3-direct-upload-api.html) of the Dataverse Software Guides. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 07550fb98ed..823efe05669 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -2116,6 +2116,49 @@ The fully expanded example above (without environment variables) looks like this Note: The ``id`` returned in the json response is the id of the file metadata version. + + +Adding File Metadata +~~~~~~~~~~~~~~~~~~~~ + +This API call requires a ``jsonString`` expressing the metadata of multiple files. It adds file metadata to the database table where the file has already been copied to the storage. + +The jsonData object includes values for: + +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +.. note:: See :ref:`curl-examples-and-environment-variables` if you are unfamiliar with the use of ``export`` below. + +A curl example using an ``PERSISTENT_ID`` + +* ``SERVER_URL`` - e.g. https://demo.dataverse.org +* ``API_TOKEN`` - API endpoints require an API token that can be passed as the X-Dataverse-key HTTP header. For more details, see the :doc:`auth` section. +* ``PERSISTENT_IDENTIFIER`` - Example: ``doi:10.5072/FK2/7U7YBV`` + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ + {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST https://demo.dataverse.org/api/datasets/:persistentId/addFiles?persistentId=doi:10.5072/FK2/7U7YBV -F jsonData='[{"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42", "fileName":"file1.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123456"}}, {"description":"My description.","directoryLabel":"data/subdir1","categories":["Data"], "restrict":"false", "storageIdentifier":"s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53", "fileName":"file2.txt", "mimeType":"text/plain", "checksum": {"@type": "SHA-1", "@value": "123789"}}]' + Updating File Metadata ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst index 9f2386facb1..d1a71c313ca 100644 --- a/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst +++ b/doc/sphinx-guides/source/developers/s3-direct-upload-api.rst @@ -7,7 +7,7 @@ Direct upload involves a series of three activities, each involving interacting * Requesting initiation of a transfer from the server * Use of the pre-signed URL(s) returned in that call to perform an upload/multipart-upload of the file to S3 -* A call to the server to register the file as part of the dataset/replace a file in the dataset or to cancel the transfer +* A call to the server to register the file/files as part of the dataset/replace a file in the dataset or to cancel the transfer This API is only enabled when a Dataset is configured with a data store supporting direct S3 upload. Administrators should be aware that partial transfers, where a client starts uploading the file/parts of the file and does not contact the server to complete/cancel the transfer, will result in data stored in S3 that is not referenced in the Dataverse installation (e.g. should be considered temporary and deleted.) @@ -116,6 +116,38 @@ The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.Data Note that this API call can be used independently of the others, e.g. supporting use cases in which the file already exists in S3/has been uploaded via some out-of-band method. With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. +To add multiple Uploaded Files to the Dataset +------------------------------------------------- + +Once the files exists in the s3 bucket, a final API call is needed to add all the files to the Dataset. In this API call, additional metadata is added using the "jsonData" parameter. +jsonData normally includes information such as a file description, tags, provenance, whether the file is restricted, etc. For direct uploads, the jsonData object must also include values for: + +* "description" - A description of the file +* "directoryLabel" - The "File Path" of the file, indicating which folder the file should be uploaded to within the dataset +* "storageIdentifier" - String +* "fileName" - String +* "mimeType" - String +* "fixity/checksum" either: + + * "md5Hash" - String with MD5 hash value, or + * "checksum" - Json Object with "@type" field specifying the algorithm used and "@value" field with the value from that algorithm, both Strings + +The allowed checksum algorithms are defined by the edu.harvard.iq.dataverse.DataFile.CheckSumType class and currently include MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/7U7YBV + export JSON_DATA="[{'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357c42', 'fileName':'file1.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123456'}}, \ + {'description':'My description.','directoryLabel':'data/subdir1','categories':['Data'], 'restrict':'false', 'storageIdentifier':'s3://demo-dataverse-bucket:176e28068b0-1c3f80357d53', 'fileName':'file2.txt', 'mimeType':'text/plain', 'checksum': {'@type': 'SHA-1', '@value': '123789'}}]" + + curl -X POST -H "X-Dataverse-key: $API_TOKEN" "$SERVER_URL/api/datasets/:persistentId/addFiles?persistentId=$PERSISTENT_IDENTIFIER" -F "jsonData=$JSON_DATA" + +Note that this API call can be used independently of the others, e.g. supporting use cases in which the files already exists in S3/has been uploaded via some out-of-band method. +With current S3 stores the object identifier must be in the correct bucket for the store, include the PID authority/identifier of the parent dataset, and be guaranteed unique, and the supplied storage identifer must be prefaced with the store identifier used in the Dataverse installation, as with the internally generated examples above. + + Replacing an existing file in the Dataset ----------------------------------------- diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 645b3cc8355..0404746b694 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2532,4 +2532,73 @@ public Response getTimestamps(@PathParam("identifier") String id) { return wr.getResponse(); } } + + + /** + * Add multiple Files to an existing Dataset + * + * @param idSupplied + * @param jsonData + * @return + */ + @POST + @Path("{id}/addFiles") + @Consumes(MediaType.MULTIPART_FORM_DATA) + public Response addFilesToDataset(@PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { + + if (!systemConfig.isHTTPUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + } + + // ------------------------------------- + // (1) Get the user from the API key + // ------------------------------------- + User authUser; + try { + authUser = findUserOrDie(); + } catch (WrappedResponse ex) { + return error(Response.Status.FORBIDDEN, BundleUtil.getStringFromBundle("file.addreplace.error.auth") + ); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(idSupplied); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + + //------------------------------------ + // (2a) Make sure dataset does not have package file + // -------------------------------------- + + for (DatasetVersion dv : dataset.getVersions()) { + if (dv.isHasPackageFile()) { + return error(Response.Status.FORBIDDEN, + BundleUtil.getStringFromBundle("file.api.alreadyHasPackageFile") + ); + } + } + + DataverseRequest dvRequest = createDataverseRequest(authUser); + + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dvRequest, + this.ingestService, + this.datasetService, + this.fileService, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + return addFileHelper.addFiles(jsonData, dataset, authUser); + + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index d01e9b4e2f3..1fcc355ae6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -9,12 +9,15 @@ import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetLock; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.api.Util; +import edu.harvard.iq.dataverse.api.Files; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -31,6 +34,7 @@ import edu.harvard.iq.dataverse.util.json.JsonPrinter; import java.io.IOException; import java.io.InputStream; +import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -42,13 +46,25 @@ import java.util.logging.Level; import java.util.logging.Logger; import javax.ejb.EJBException; +import javax.json.Json; +import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; +import javax.json.JsonArray; import javax.json.JsonObjectBuilder; +import javax.json.JsonReader; import javax.validation.ConstraintViolation; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; + +import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import org.apache.commons.lang3.StringUtils; import org.apache.commons.io.IOUtils; import org.ocpsoft.common.util.Strings; +import static edu.harvard.iq.dataverse.api.AbstractApiBean.STATUS_ERROR; +import static edu.harvard.iq.dataverse.api.AbstractApiBean.STATUS_OK; +import static javax.ws.rs.core.Response.Status.BAD_REQUEST; + /** * Methods to add or replace a single file. * @@ -100,7 +116,7 @@ public class AddReplaceFileHelper{ public static String FILE_ADD_OPERATION = "FILE_ADD_OPERATION"; public static String FILE_REPLACE_OPERATION = "FILE_REPLACE_OPERATION"; public static String FILE_REPLACE_FORCE_OPERATION = "FILE_REPLACE_FORCE_OPERATION"; - + public static String MULTIPLEFILES_ADD_OPERATION = "MULTIPLEFILES_ADD_OPERATION"; private String currentOperation; @@ -299,33 +315,49 @@ public AddReplaceFileHelper(DataverseRequest dvRequest, this.user = dvRequest.getUser(); } - + /** - * + * * @param chosenDataset * @param newFileName * @param newFileContentType * @param newFileInputStream * @param optionalFileParams - * @return + * @return */ - public boolean runAddFileByDataset(Dataset chosenDataset, - String newFileName, - String newFileContentType, - String newStorageIdentifier, - InputStream newFileInputStream, - OptionalFileParams optionalFileParams){ - + public boolean runAddFileByDataset(Dataset chosenDataset, + String newFileName, + String newFileContentType, + String newStorageIdentifier, + InputStream newFileInputStream, + OptionalFileParams optionalFileParams){ + return this.runAddFileByDataset(chosenDataset,newFileName,newFileContentType,newStorageIdentifier,newFileInputStream,optionalFileParams,false); + + } + + public boolean runAddFileByDataset(Dataset chosenDataset, + String newFileName, + String newFileContentType, + String newStorageIdentifier, + InputStream newFileInputStream, + OptionalFileParams optionalFileParams, + boolean multipleFiles) { + msgt(">> runAddFileByDatasetId"); initErrorHandling(); - - this.currentOperation = FILE_ADD_OPERATION; - + + if(multipleFiles) { + this.currentOperation = MULTIPLEFILES_ADD_OPERATION; + } + else { + this.currentOperation = FILE_ADD_OPERATION; + } + if (!this.step_001_loadDataset(chosenDataset)){ return false; } - + //return this.runAddFile(this.dataset, newFileName, newFileContentType, newFileInputStream, optionalFileParams); return this.runAddReplaceFile(dataset, newFileName, newFileContentType, newStorageIdentifier, newFileInputStream, optionalFileParams); @@ -727,8 +759,10 @@ private boolean runAddReplacePhase2(){ }else{ msgt("step_070_run_update_dataset_command"); - if (!this.step_070_run_update_dataset_command()){ - return false; + if (!this.isMultipleFilesAddOperation()) { + if (!this.step_070_run_update_dataset_command()) { + return false; + } } } @@ -791,6 +825,16 @@ public boolean isFileAddOperation(){ return this.currentOperation.equals(FILE_ADD_OPERATION); } + /** + * Is this a multiple files add operation ? + * @return + */ + + public boolean isMultipleFilesAddOperation(){ + + return this.currentOperation.equals(MULTIPLEFILES_ADD_OPERATION); + } + /** * Initialize error handling vars */ @@ -1864,14 +1908,13 @@ private boolean step_100_startIngestJobs(){ //if (true){ //return true; //} - - msg("pre ingest start"); - // start the ingest! - // - - ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); - - msg("post ingest start"); + + if (!this.isMultipleFilesAddOperation()) { + msg("pre ingest start"); + // start the ingest! + ingestService.startIngestJobsForDataset(dataset, dvRequest.getAuthenticatedUser()); + msg("post ingest start"); + } return true; } @@ -1959,6 +2002,160 @@ public String getDuplicateFileWarning() { public void setDuplicateFileWarning(String duplicateFileWarning) { this.duplicateFileWarning = duplicateFileWarning; } + + public Response addFiles(String jsonData, Dataset dataset, User authUser) { + msgt("(addFilesToDataset) jsonData: " + jsonData.toString()); + + JsonArrayBuilder jarr = Json.createArrayBuilder(); + + JsonArray filesJson = null; + + int totalNumberofFiles = 0; + int successNumberofFiles = 0; + // ----------------------------------------------------------- + // Read jsonData and Parse files information from jsondata : + // ----------------------------------------------------------- + try (StringReader rdr = new StringReader(jsonData)) { + JsonReader dbJsonReader = Json.createReader(rdr); + filesJson = dbJsonReader.readArray(); + dbJsonReader.close(); + + + if (filesJson != null) { + totalNumberofFiles = filesJson.getValuesAs(JsonObject.class).size(); + + for (JsonObject fileJson : filesJson.getValuesAs(JsonObject.class)) { + + OptionalFileParams optionalFileParams = null; + try { + optionalFileParams = new OptionalFileParams(fileJson.toString()); + + String newFilename = null; + String newFileContentType = null; + String newStorageIdentifier = null; + if (optionalFileParams.hasStorageIdentifier()) { + newStorageIdentifier = optionalFileParams.getStorageIdentifier(); + if (optionalFileParams.hasFileName()) { + newFilename = optionalFileParams.getFileName(); + if (optionalFileParams.hasMimetype()) { + newFileContentType = optionalFileParams.getMimeType(); + } + } + + msgt("ADD! = " + newFilename); + + runAddFileByDataset(dataset, + newFilename, + newFileContentType, + newStorageIdentifier, + null, + optionalFileParams, true); + + if (hasError()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("errorMessage", getHttpErrorCode().toString() +":"+ getErrorMessagesAsString("\n")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } else { + JsonObject successresult = getSuccessResultAsJsonObjectBuilder().build(); + String duplicateWarning = getDuplicateFileWarning(); + + if (duplicateWarning != null && !duplicateWarning.isEmpty()) { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("warningMessage", getDuplicateFileWarning()) + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("storageIdentifier", newStorageIdentifier) + .add("successMessage", "Added successfully to the dataset") + .add("fileDetails", successresult.getJsonArray("files").getJsonObject(0)); + jarr.add(fileoutput); + } + } + successNumberofFiles = successNumberofFiles + 1; + } else { + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorMessage", "You must provide a storageidentifier, filename, and mimetype.") + .add("fileDetails", fileJson); + + jarr.add(fileoutput); + } + + } catch (DataFileTagException ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", ex.getMessage()) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + + } + catch (NoFilesException ex) { + Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); + JsonObjectBuilder fileoutput = Json.createObjectBuilder() + .add("errorCode", Response.Status.BAD_REQUEST.getStatusCode()) + .add("message", BundleUtil.getStringFromBundle("NoFileException! Serious Error! See administrator!")) + .add("fileDetails", fileJson); + jarr.add(fileoutput); + } + + }// End of adding files + + DatasetLock eipLock = dataset.getLockFor(DatasetLock.Reason.EditInProgress); + if (eipLock == null) { + logger.log(Level.WARNING, "Dataset not locked for EditInProgress "); + } else { + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + logger.log(Level.INFO, "Removed EditInProgress lock "); + } + + try { + Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest); + ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); + commandEngine.submit(cmd); + } catch (CommandException ex) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "CommandException updating DatasetVersion from addFiles job: " + ex.getMessage()); + } + + dataset = datasetService.find(dataset.getId()); + + List s = dataset.getFiles(); + for (DataFile dataFile : s) { + } + //ingest job + ingestService.startIngestJobsForDataset(dataset, (AuthenticatedUser) authUser); + + } + } + catch ( javax.json.stream.JsonParsingException ex) { + ex.printStackTrace(); + return error(BAD_REQUEST, "Json Parsing Exception :" + ex.getMessage()); + } + catch (Exception e) { + e.printStackTrace(); + return error(BAD_REQUEST, e.getMessage()); + } + + JsonObjectBuilder result = Json.createObjectBuilder() + .add("Total number of files", totalNumberofFiles) + .add("Number of files successfully added", successNumberofFiles); + + + return Response.ok().entity(Json.createObjectBuilder() + .add("status", STATUS_OK) + .add("data", Json.createObjectBuilder().add("Files", jarr).add("Result", result)).build() ).build(); + } + + protected static Response error(Response.Status sts, String msg ) { + return Response.status(sts) + .entity( NullSafeJsonBuilder.jsonObjectBuilder() + .add("status", STATUS_ERROR) + .add( "message", msg ).build() + ).type(MediaType.APPLICATION_JSON_TYPE).build(); + } } // end class /* diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java index 13d4ed96815..9484a412913 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java @@ -57,7 +57,7 @@ public class IngestUtil { * * @param version the dataset version * @param newFiles the list of new data files to add to it - * @param fileToReplace + * @param fileToReplace */ public static void checkForDuplicateFileNamesFinal(DatasetVersion version, List newFiles, DataFile fileToReplace) { @@ -257,7 +257,7 @@ public static Set existingPathNamesAsSet(DatasetVersion version, FileMet // #6942 added proxy for existing files to a boolean set when dataset version copy is done for (Iterator fmIt = version.getFileMetadatas().iterator(); fmIt.hasNext();) { FileMetadata fm = fmIt.next(); - if((fm.isInPriorVersion() || fm.getId() != null) && (replacedFmd==null) || (!fm.getDataFile().equals(replacedFmd.getDataFile()))) { + if((fm.isInPriorVersion() || fm.getId() != null) && (replacedFmd==null || !fm.getDataFile().equals(replacedFmd.getDataFile()))) { String existingName = fm.getLabel(); String existingDir = fm.getDirectoryLabel();