From abb72e771afb1b663fbc138587a16099c2051a04 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 14 Jul 2020 14:58:17 -0400 Subject: [PATCH 1/6] add API to download all files by dataset #4529 --- doc/release-notes/4529-download-by-dataset.md | 6 + doc/sphinx-guides/source/api/dataaccess.rst | 67 ++++ .../source/api/getting-started.rst | 4 +- .../edu/harvard/iq/dataverse/api/Access.java | 74 ++++- .../harvard/iq/dataverse/api/Datasets.java | 4 +- src/main/java/propertyFiles/Bundle.properties | 1 + .../iq/dataverse/api/DownloadFilesIT.java | 293 ++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 24 ++ 8 files changed, 467 insertions(+), 6 deletions(-) create mode 100644 doc/release-notes/4529-download-by-dataset.md create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java diff --git a/doc/release-notes/4529-download-by-dataset.md b/doc/release-notes/4529-download-by-dataset.md new file mode 100644 index 00000000000..22227e149f0 --- /dev/null +++ b/doc/release-notes/4529-download-by-dataset.md @@ -0,0 +1,6 @@ +In previous versions of Dataverse, downloading all files from a dataset via API was a two step process: + +- Find all the database id of the files. +- Download all the files, using those ids (comma-separated). + +Now you can download all files from a dataset (assuming you have access to them) via API by passing the dataset persistent ID (PID such as DOI or Handle) or the dataset's database id. Versions are also supported like with the "download metadata" API you can pass :draft, :latest, :latest-published, or numbers (1.1, 2.0). diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst index a3fbdfd73f4..52b1fff8eef 100755 --- a/doc/sphinx-guides/source/api/dataaccess.rst +++ b/doc/sphinx-guides/source/api/dataaccess.rst @@ -7,6 +7,73 @@ More advanced features of the Access API include format-specific transformations .. contents:: |toctitle| :local: +.. _download-all-api: + +Downloading All Files in a Dataset +---------------------------------- + +The "download all" API downloads as many files as possible from a dataset as a zipped bundle. + +There are a number of reasons why not all of the files can be downloaded: + +- Some of the files are restricted and your API token doesn't have access (you will still get the unrestricted files). +- The Dataverse installation has limited how large the zip bundle can be. + +In the curl example below, the flags ``-O`` and ``J`` are used. When there are no errors, this has the effect of saving the file as "dataverse_files.zip" (just like the web interface). The flags force errors to be downloaded as a file. + +Please note that in addition to the files from dataset, an additional file call "MANIFEST.TXT" will be included in the zipped bundle. It has additional information about the files. + +There are two forms of the ``downloadAll`` API, a basic form and one that supports dataset versions. + +Basic Download All +~~~~~~~~~~~~~~~~~~ + +The basic form downloads files from the latest accessible version of the dataset. If you are not using an API token, this means the most recently published version. If you are using an API token with full access to the dataset, this means the draft version or the most recently published version if no draft exists. + +A curl example using a DOI (no version): + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.70122/FK2/N2XGBJ + + curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/downloadAll/:persistentId/?persistentId=$PERSISTENT_ID + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/downloadAll/:persistentId/?persistentId=doi:10.70122/FK2/N2XGBJ + +Download All By Version +~~~~~~~~~~~~~~~~~~~~~~~ + +The second form of the ``downloadAll`` API allows you to specify which version you'd like to download files from. As with the ``datasets`` API endpoints described in the :doc:`native-api` section, the following identifiers can be used. + +* ``:draft`` the draft version, if any +* ``:latest`` either a draft (if exists) or the latest published version. +* ``:latest-published`` the latest published version +* ``x.y`` a specific version, where ``x`` is the major version number and ``y`` is the minor version number. +* ``x`` same as ``x.0`` + +A curl example using a DOI (with version): + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_ID=doi:10.70122/FK2/N2XGBJ + export VERSION=2.0 + + curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/downloadAll/:persistentId/versions/$VERSION?persistentId=$PERSISTENT_ID + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/downloadAll/:persistentId/versions/2.0?persistentId=doi:10.70122/FK2/N2XGBJ + Basic File Access ----------------- diff --git a/doc/sphinx-guides/source/api/getting-started.rst b/doc/sphinx-guides/source/api/getting-started.rst index 3c9bfd9b137..b9239d45ef4 100644 --- a/doc/sphinx-guides/source/api/getting-started.rst +++ b/doc/sphinx-guides/source/api/getting-started.rst @@ -106,7 +106,9 @@ Downloading Files The :doc:`dataaccess` section explains how to download files. -In order to download files, you must know their database IDs which you can get from the ``dataverse_json`` metadata at the dataset level. See :ref:`export-dataset-metadata-api`. +To download all the files in a dataset, see :ref:`download-all-api`. + +In order to download individual files, you must know their database IDs which you can get from the ``dataverse_json`` metadata at the dataset level. See :ref:`export-dataset-metadata-api`. Downloading Metadata ~~~~~~~~~~~~~~~~~~~~ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index ea2d512f98f..0835cff859d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -28,6 +28,7 @@ import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.UserNotificationServiceBean; import static edu.harvard.iq.dataverse.api.AbstractApiBean.error; +import static edu.harvard.iq.dataverse.api.Datasets.handleVersion; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.RoleAssignee; @@ -44,10 +45,16 @@ import edu.harvard.iq.dataverse.dataaccess.StoredOriginalFile; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; +import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; import edu.harvard.iq.dataverse.engine.command.impl.RequestAccessCommand; import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; @@ -541,12 +548,73 @@ public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") bool return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); } + + @Path("downloadAll/{id}") + @GET + @Produces({"application/zip"}) + public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + try { + DataverseRequest req = createDataverseRequest(findUserOrDie()); + final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(datasetIdOrPersistentId))); + final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved)); + String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); + return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + } catch (WrappedResponse ex) { + return error(BAD_REQUEST, ex.getLocalizedMessage()); + } + } + + @Path("downloadAll/{id}/versions/{versionId}") + @GET + @Produces({"application/zip"}) + public Response downloadAllFromVersion(@PathParam("id") String datasetIdOrPersistentId, @PathParam("versionId") String versionId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { + try { + DataverseRequest req = createDataverseRequest(findUserOrDie()); + final Dataset ds = execCommand(new GetDatasetCommand(req, findDatasetOrDie(datasetIdOrPersistentId))); + DatasetVersion dsv = execCommand(handleVersion(versionId, new Datasets.DsVersionHandler>() { + + @Override + public Command handleLatest() { + return new GetLatestAccessibleDatasetVersionCommand(req, ds); + } + + @Override + public Command handleDraft() { + return new GetDraftDatasetVersionCommand(req, ds); + } + + @Override + public Command handleSpecific(long major, long minor) { + return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor); + } + + @Override + public Command handleLatestPublished() { + return new GetLatestPublishedDatasetVersionCommand(req, ds); + } + })); + if (dsv == null) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.exception.version.not.found")); + } + String fileIds = getFileIdsAsCommaSeparated(dsv.getFileMetadatas()); + return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); + } catch (WrappedResponse ex) { + return error(BAD_REQUEST, ex.getLocalizedMessage()); + } + } + + private static String getFileIdsAsCommaSeparated(List fileMetadatas) { + List ids = new ArrayList<>(); + for (FileMetadata fileMetadata : fileMetadatas) { + Long fileId = fileMetadata.getDataFile().getId(); + ids.add(String.valueOf(fileId)); + } + return String.join(",", ids); + } + /* * API method for downloading zipped bundles of multiple files: */ - - // TODO: Rather than only supporting looking up files by their database IDs, - // consider supporting persistent identifiers. @Path("datafiles/{fileIds}") @GET @Produces({"application/zip"}) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 628db853e5c..61844ab6b9c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -219,7 +219,7 @@ public class Datasets extends AbstractApiBean { * Used to consolidate the way we parse and handle dataset versions. * @param */ - private interface DsVersionHandler { + public interface DsVersionHandler { T handleLatest(); T handleDraft(); T handleSpecific( long major, long minor ); @@ -1684,7 +1684,7 @@ private void msgt(String m){ } - private T handleVersion( String versionId, DsVersionHandler hdl ) + public static T handleVersion( String versionId, DsVersionHandler hdl ) throws WrappedResponse { switch (versionId) { case ":latest": return hdl.handleLatest(); diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 97d40798485..fc0695aa00d 100755 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2329,6 +2329,7 @@ access.api.requestList.fileNotFound=Could not find datafile with id {0}. access.api.requestList.noKey=You must provide a key to get list of access requests for a file. access.api.requestList.noRequestsFound=There are no access requests for this file {0}. access.api.exception.metadata.not.available.for.nontabular.file=This type of metadata is only available for tabular files. +access.api.exception.version.not.found=Could not find requested dataset version. #permission permission.AddDataverse.label=AddDataverse diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java new file mode 100644 index 00000000000..6fa58b85bff --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java @@ -0,0 +1,293 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.path.json.JsonPath; +import com.jayway.restassured.response.Response; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.HashSet; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import static javax.ws.rs.core.Response.Status.BAD_REQUEST; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.OK; +import static org.hamcrest.CoreMatchers.equalTo; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +public class DownloadFilesIT { + + @BeforeClass + public static void setUpClass() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + /** + * This test is focused on downloading all files by their version. All files + * are public. + */ + @Test + public void downloadAllFilesByVersion() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + Path pathtoReadme = Paths.get(Files.createTempDirectory(null) + File.separator + "README.md"); + Files.write(pathtoReadme, "In the beginning...".getBytes()); + + Response uploadReadme = UtilIT.uploadFileViaNative(datasetId.toString(), pathtoReadme.toString(), apiToken); + uploadReadme.prettyPrint(); + uploadReadme.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("README.md")); + + Path pathtoContribFile = Paths.get(Files.createTempDirectory(null) + File.separator + "CONTRIBUTING.md"); + Files.write(pathtoContribFile, "Patches welcome!".getBytes()); + + Response uploadContribFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathtoContribFile.toString(), apiToken); + uploadContribFile.prettyPrint(); + uploadContribFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("CONTRIBUTING.md")); + + // The creator gets the files from the draft. + Response downloadFiles1 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles1.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound1 = gatherFilenames(downloadFiles1.getBody().asInputStream()); + + // Note that a MANIFEST.TXT file is added. + HashSet expectedFiles1 = new HashSet<>(Arrays.asList("MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles1, filenamesFound1); + + // A guest user can't download unpublished files. + Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles2.prettyPrint(); + downloadFiles2.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()); + + UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + // Publishing version 1.0. + UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + // Now a guest user can download files (published now) + Response downloadFiles3 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles3.then().assertThat() + .statusCode(OK.getStatusCode()); + + Path pathtoLicenseFile = Paths.get(Files.createTempDirectory(null) + File.separator + "LICENSE.md"); + Files.write(pathtoLicenseFile, "Apache".getBytes()); + + Response uploadLicenseFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathtoLicenseFile.toString(), apiToken); + uploadLicenseFile.prettyPrint(); + uploadLicenseFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("LICENSE.md")); + + Response downloadFiles4 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles4.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound2 = gatherFilenames(downloadFiles4.getBody().asInputStream()); + + // The creator gets the draft version with an extra file. + HashSet expectedFiles2 = new HashSet<>(Arrays.asList("LICENSE.md", "MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles2, filenamesFound2); + + Response downloadFiles5 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles5.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound3 = gatherFilenames(downloadFiles5.getBody().asInputStream()); + + // A guest user gets the 1.0 version with only 3 files. + HashSet expectedFiles3 = new HashSet<>(Arrays.asList("MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles3, filenamesFound3); + + // Publishing version 2.0 + UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + Response downloadFiles6 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles6.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound4 = gatherFilenames(downloadFiles6.getBody().asInputStream()); + + // By not specifying a version, the creator gets the latest version. In this case, 2.0 (published) with 4 files. + HashSet expectedFiles4 = new HashSet<>(Arrays.asList("LICENSE.md", "MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles4, filenamesFound4); + + String datasetVersion = "1.0"; + Response downloadFiles7 = UtilIT.downloadFiles(datasetPid, datasetVersion, apiToken); + downloadFiles7.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound5 = gatherFilenames(downloadFiles7.getBody().asInputStream()); + + // Creator specifies the 1.0 version and gets the expected 3 files. + HashSet expectedFiles5 = new HashSet<>(Arrays.asList("MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles5, filenamesFound5); + + // Add Code of Conduct file + Path pathtoCocFile = Paths.get(Files.createTempDirectory(null) + File.separator + "CODE_OF_CONDUCT.md"); + Files.write(pathtoCocFile, "Be excellent to each other.".getBytes()); + + // This Code of Conduct file will be in version 3.0 once it's published. For now it's a draft. + Response uploadCocFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathtoCocFile.toString(), apiToken); + uploadCocFile.prettyPrint(); + uploadCocFile.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("CODE_OF_CONDUCT.md")); + + Response downloadFiles8 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles8.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound6 = gatherFilenames(downloadFiles8.getBody().asInputStream()); + + // If the creator doesn't specify a version, they get the latest draft with 5 files. + HashSet expectedFiles6 = new HashSet<>(Arrays.asList("CODE_OF_CONDUCT.md", "LICENSE.md", "MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles6, filenamesFound6); + + String datasetVersionLatestPublished = ":latest-published"; + Response downloadFiles9 = UtilIT.downloadFiles(datasetPid, datasetVersionLatestPublished, apiToken); + downloadFiles9.then().assertThat() + .statusCode(OK.getStatusCode()); + + HashSet filenamesFound7 = gatherFilenames(downloadFiles9.getBody().asInputStream()); + + // The contributor requested "latest published" and got version 3 with 4 files. + HashSet expectedFiles7 = new HashSet<>(Arrays.asList("LICENSE.md", "MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); + Assert.assertEquals(expectedFiles7, filenamesFound7); + + } + + /** + * This test is focused on downloading all files by their version. All files + * are public. + */ + @Test + public void downloadAllFilesRestricted() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + Path pathToSecrets = Paths.get(Files.createTempDirectory(null) + File.separator + "secrets.md"); + Files.write(pathToSecrets, "The Nobel Prize will be mine.".getBytes()); + + Response uploadSecrets = UtilIT.uploadFileViaNative(datasetId.toString(), pathToSecrets.toString(), apiToken); + uploadSecrets.prettyPrint(); + uploadSecrets.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("secrets.md")); + + Long origFileId = JsonPath.from(uploadSecrets.body().asString()).getLong("data.files[0].dataFile.id"); + + Response restrictResponse = UtilIT.restrictFile(origFileId.toString(), true, apiToken); + restrictResponse.prettyPrint(); + restrictResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo("File secrets.md restricted.")); + + Response downloadFiles1 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles1.then().assertThat() + .statusCode(OK.getStatusCode()); + + // The creator can download a restricted file from a draft. + Assert.assertEquals(new HashSet<>(Arrays.asList("secrets.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles1.getBody().asInputStream())); + + Path pathToReadme = Paths.get(Files.createTempDirectory(null) + File.separator + "README.md"); + Files.write(pathToReadme, "My findings.".getBytes()); + + Response uploadReadme = UtilIT.uploadFileViaNative(datasetId.toString(), pathToReadme.toString(), apiToken); + uploadReadme.prettyPrint(); + uploadReadme.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("README.md")); + + Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles2.then().assertThat() + .statusCode(OK.getStatusCode()); + + // The creator can download a restricted file and an unrestricted file from a draft. + Assert.assertEquals(new HashSet<>(Arrays.asList("secrets.md", "README.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles2.getBody().asInputStream())); + + UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + // Now a guest user can download files (published now) + Response downloadFiles3 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles3.then().assertThat() + .statusCode(OK.getStatusCode()); + + // The guest can only get the unrestricted file (and the manifest). + Assert.assertEquals(new HashSet<>(Arrays.asList("README.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles3.getBody().asInputStream())); + + } + + private HashSet gatherFilenames(InputStream inputStream) throws IOException { + HashSet filenamesFound = new HashSet<>(); + try (ZipInputStream zipStream = new ZipInputStream(inputStream)) { + ZipEntry entry = null; + while ((entry = zipStream.getNextEntry()) != null) { + String entryName = entry.getName(); + filenamesFound.add(entryName); + zipStream.closeEntry(); + } + } + return filenamesFound; + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index ab9b9e05073..728d10714b5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -750,6 +750,30 @@ static Response downloadFilesOriginal(Integer[] fileIds, String apiToken) { return given().get(getString + "?format=original&key=" + apiToken); } + static Response downloadFiles(String datasetIdOrPersistentId, String apiToken) { + String datasetVersion = null; + return downloadFiles(datasetIdOrPersistentId, datasetVersion, apiToken); + } + + static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String apiToken) { + String idInPath = datasetIdOrPersistentId; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isNumber(datasetIdOrPersistentId)) { + idInPath = ":persistentId"; + optionalQueryParam = "?persistentId=" + datasetIdOrPersistentId; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); + } + String optionalVersion = ""; + if (datasetVersion != null) { + optionalVersion = "/versions/" + datasetVersion; + } + return requestSpecification.get("/api/access/downloadAll/" + idInPath + optionalVersion + optionalQueryParam); + } + static Response subset(String fileId, String variables, String apiToken) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) From 44cc8156ce0e523aaaf2ccec881073fad6729671 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 15 Jul 2020 16:05:55 -0400 Subject: [PATCH 2/6] add docs and tests for format=original #4529 --- doc/sphinx-guides/source/api/dataaccess.rst | 2 + .../iq/dataverse/api/DownloadFilesIT.java | 56 +++++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 26 ++++++++- 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst index 52b1fff8eef..c3ea96dcea8 100755 --- a/doc/sphinx-guides/source/api/dataaccess.rst +++ b/doc/sphinx-guides/source/api/dataaccess.rst @@ -14,6 +14,8 @@ Downloading All Files in a Dataset The "download all" API downloads as many files as possible from a dataset as a zipped bundle. +By default, tabular files are downloaded in their "archival" form (tab-separated values). To download the original files (Stata, for example), add ``format=original`` as a query parameter. + There are a number of reasons why not all of the files can be downloaded: - Some of the files are restricted and your API token doesn't have access (you will still get the unrestricted files). diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java index 6fa58b85bff..5b05fbcfac3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java @@ -18,6 +18,7 @@ import static javax.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; import org.junit.Assert; +import static org.junit.Assert.assertTrue; import org.junit.BeforeClass; import org.junit.Test; @@ -277,6 +278,61 @@ public void downloadAllFilesRestricted() throws IOException { } + /** + * This test is focused on downloading all files when tabular files are + * present (original vs archival). + */ + @Test + public void downloadAllFilesTabular() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDataset.prettyPrint(); + createDataset.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset); + + String pathToFile = "scripts/search/data/tabular/50by1000.dta"; + + Response uploadTabular = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadTabular.prettyPrint(); + uploadTabular.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("50by1000.dta")); + + assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + Response downloadFiles1 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles1.then().assertThat() + .statusCode(OK.getStatusCode()); + + // By default we get the archival version (.tab). + Assert.assertEquals(new HashSet<>(Arrays.asList("50by1000.tab", "MANIFEST.TXT")), gatherFilenames(downloadFiles1.getBody().asInputStream())); + + String format = "original"; + Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, format, apiToken); + downloadFiles2.then().assertThat() + .statusCode(OK.getStatusCode()); + + // By passing format=original we get the original version, Stata (.dta) in this case. + Assert.assertEquals(new HashSet<>(Arrays.asList("50by1000.dta", "MANIFEST.TXT")), gatherFilenames(downloadFiles2.getBody().asInputStream())); + } + private HashSet gatherFilenames(InputStream inputStream) throws IOException { HashSet filenamesFound = new HashSet<>(); try (ZipInputStream zipStream = new ZipInputStream(inputStream)) { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 728d10714b5..08f56462111 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -752,10 +752,22 @@ static Response downloadFilesOriginal(Integer[] fileIds, String apiToken) { static Response downloadFiles(String datasetIdOrPersistentId, String apiToken) { String datasetVersion = null; - return downloadFiles(datasetIdOrPersistentId, datasetVersion, apiToken); + String format = null; + return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken); } - static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String apiToken) { + /** + * @param format can be "original" for tabular files. + */ + static Response downloadFiles(String datasetIdOrPersistentId, String format, String apiToken) { + String datasetVersion = null; + return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken); + } + + /** + * @param format can be "original" for tabular files. + */ + static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String format, String apiToken) { String idInPath = datasetIdOrPersistentId; // Assume it's a number. String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. if (!NumberUtils.isNumber(datasetIdOrPersistentId)) { @@ -771,7 +783,15 @@ static Response downloadFiles(String datasetIdOrPersistentId, String datasetVers if (datasetVersion != null) { optionalVersion = "/versions/" + datasetVersion; } - return requestSpecification.get("/api/access/downloadAll/" + idInPath + optionalVersion + optionalQueryParam); + String optionalFormat = ""; + if (format != null) { + if (!"".equals(optionalQueryParam)) { + optionalFormat = "&format=" + format; + } else { + optionalFormat = "?format=" + format; + } + } + return requestSpecification.get("/api/access/downloadAll/" + idInPath + optionalVersion + optionalQueryParam + optionalFormat); } static Response subset(String fileId, String variables, String apiToken) { From 30bb828dcce27468dbad56be401272bea2832e9c Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 15 Jul 2020 16:09:47 -0400 Subject: [PATCH 3/6] add DownloadFilesIT to API test suite #4529 --- conf/docker-aio/run-test-suite.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/docker-aio/run-test-suite.sh b/conf/docker-aio/run-test-suite.sh index 988250efada..9cbc90a152a 100755 --- a/conf/docker-aio/run-test-suite.sh +++ b/conf/docker-aio/run-test-suite.sh @@ -8,4 +8,4 @@ fi # Please note the "dataverse.test.baseurl" is set to run for "all-in-one" Docker environment. # TODO: Rather than hard-coding the list of "IT" classes here, add a profile to pom.xml. -mvn test -Dtest=DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT -Ddataverse.test.baseurl=$dvurl +mvn test -Dtest=DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT -Ddataverse.test.baseurl=$dvurl From 05e55c2f02773155ae5d4cccb25bcaaadd583cf8 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Jul 2020 12:03:43 -0400 Subject: [PATCH 4/6] resolve naming conflict between test methods #4529 Added enum so we don't have two methods both with 3 String args. --- .../iq/dataverse/api/DownloadFilesIT.java | 3 +-- .../edu/harvard/iq/dataverse/api/UtilIT.java | 21 +++++++++++-------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java index 5b05fbcfac3..13389b8bf10 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java @@ -324,8 +324,7 @@ public void downloadAllFilesTabular() throws IOException { // By default we get the archival version (.tab). Assert.assertEquals(new HashSet<>(Arrays.asList("50by1000.tab", "MANIFEST.TXT")), gatherFilenames(downloadFiles1.getBody().asInputStream())); - String format = "original"; - Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, format, apiToken); + Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, UtilIT.DownloadFormat.original, apiToken); downloadFiles2.then().assertThat() .statusCode(OK.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 08f56462111..c467d9c242c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -750,24 +750,27 @@ static Response downloadFilesOriginal(Integer[] fileIds, String apiToken) { return given().get(getString + "?format=original&key=" + apiToken); } + public enum DownloadFormat { + original + } + static Response downloadFiles(String datasetIdOrPersistentId, String apiToken) { String datasetVersion = null; - String format = null; + DownloadFormat format = null; return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken); } - /** - * @param format can be "original" for tabular files. - */ - static Response downloadFiles(String datasetIdOrPersistentId, String format, String apiToken) { + static Response downloadFiles(String datasetIdOrPersistentId, DownloadFormat format, String apiToken) { String datasetVersion = null; return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken); } - /** - * @param format can be "original" for tabular files. - */ - static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String format, String apiToken) { + static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String apiToken) { + DownloadFormat format = null; + return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken); + } + + static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, DownloadFormat format, String apiToken) { String idInPath = datasetIdOrPersistentId; // Assume it's a number. String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. if (!NumberUtils.isNumber(datasetIdOrPersistentId)) { From 4da877baa5a7d9374dd9388d71d389e6ed6e68fe Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 17 Jul 2020 12:25:31 -0400 Subject: [PATCH 5/6] rename "downloadAll" to "dataset" (Data Access API) #4529 --- doc/sphinx-guides/source/api/dataaccess.rst | 24 +++++++++---------- .../source/api/getting-started.rst | 2 +- .../edu/harvard/iq/dataverse/api/Access.java | 4 ++-- .../edu/harvard/iq/dataverse/api/UtilIT.java | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst index c3ea96dcea8..690445852b9 100755 --- a/doc/sphinx-guides/source/api/dataaccess.rst +++ b/doc/sphinx-guides/source/api/dataaccess.rst @@ -7,12 +7,12 @@ More advanced features of the Access API include format-specific transformations .. contents:: |toctitle| :local: -.. _download-all-api: +.. _download-by-dataset-api: Downloading All Files in a Dataset ---------------------------------- -The "download all" API downloads as many files as possible from a dataset as a zipped bundle. +The "download by dataset" API downloads as many files as possible from a dataset as a zipped bundle. By default, tabular files are downloaded in their "archival" form (tab-separated values). To download the original files (Stata, for example), add ``format=original`` as a query parameter. @@ -25,10 +25,10 @@ In the curl example below, the flags ``-O`` and ``J`` are used. When there are n Please note that in addition to the files from dataset, an additional file call "MANIFEST.TXT" will be included in the zipped bundle. It has additional information about the files. -There are two forms of the ``downloadAll`` API, a basic form and one that supports dataset versions. +There are two forms of the "download by dataset" API, a basic form and one that supports dataset versions. -Basic Download All -~~~~~~~~~~~~~~~~~~ +Basic Download By Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~ The basic form downloads files from the latest accessible version of the dataset. If you are not using an API token, this means the most recently published version. If you are using an API token with full access to the dataset, this means the draft version or the most recently published version if no draft exists. @@ -40,18 +40,18 @@ A curl example using a DOI (no version): export SERVER_URL=https://demo.dataverse.org export PERSISTENT_ID=doi:10.70122/FK2/N2XGBJ - curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/downloadAll/:persistentId/?persistentId=$PERSISTENT_ID + curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/dataset/:persistentId/?persistentId=$PERSISTENT_ID The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/downloadAll/:persistentId/?persistentId=doi:10.70122/FK2/N2XGBJ + curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/dataset/:persistentId/?persistentId=doi:10.70122/FK2/N2XGBJ -Download All By Version -~~~~~~~~~~~~~~~~~~~~~~~ +Download By Dataset By Version +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The second form of the ``downloadAll`` API allows you to specify which version you'd like to download files from. As with the ``datasets`` API endpoints described in the :doc:`native-api` section, the following identifiers can be used. +The second form of the "download by dataset" API allows you to specify which version you'd like to download files from. As with the ``datasets`` API endpoints described in the :doc:`native-api` section, the following identifiers can be used. * ``:draft`` the draft version, if any * ``:latest`` either a draft (if exists) or the latest published version. @@ -68,13 +68,13 @@ A curl example using a DOI (with version): export PERSISTENT_ID=doi:10.70122/FK2/N2XGBJ export VERSION=2.0 - curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/downloadAll/:persistentId/versions/$VERSION?persistentId=$PERSISTENT_ID + curl -O -J -H "X-Dataverse-key:$API_TOKEN" $SERVER_URL/api/access/dataset/:persistentId/versions/$VERSION?persistentId=$PERSISTENT_ID The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/downloadAll/:persistentId/versions/2.0?persistentId=doi:10.70122/FK2/N2XGBJ + curl -O -J -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/access/dataset/:persistentId/versions/2.0?persistentId=doi:10.70122/FK2/N2XGBJ Basic File Access ----------------- diff --git a/doc/sphinx-guides/source/api/getting-started.rst b/doc/sphinx-guides/source/api/getting-started.rst index b9239d45ef4..1c96a0af542 100644 --- a/doc/sphinx-guides/source/api/getting-started.rst +++ b/doc/sphinx-guides/source/api/getting-started.rst @@ -106,7 +106,7 @@ Downloading Files The :doc:`dataaccess` section explains how to download files. -To download all the files in a dataset, see :ref:`download-all-api`. +To download all the files in a dataset, see :ref:`download-by-dataset-api`. In order to download individual files, you must know their database IDs which you can get from the ``dataverse_json`` metadata at the dataset level. See :ref:`export-dataset-metadata-api`. diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 0835cff859d..4e7904c324d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -549,7 +549,7 @@ public Response postDownloadDatafiles(String fileIds, @QueryParam("gbrecs") bool return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); } - @Path("downloadAll/{id}") + @Path("dataset/{id}") @GET @Produces({"application/zip"}) public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersistentId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { @@ -564,7 +564,7 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist } } - @Path("downloadAll/{id}/versions/{versionId}") + @Path("dataset/{id}/versions/{versionId}") @GET @Produces({"application/zip"}) public Response downloadAllFromVersion(@PathParam("id") String datasetIdOrPersistentId, @PathParam("versionId") String versionId, @QueryParam("gbrecs") boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index c467d9c242c..4f8351d3beb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -794,7 +794,7 @@ static Response downloadFiles(String datasetIdOrPersistentId, String datasetVers optionalFormat = "?format=" + format; } } - return requestSpecification.get("/api/access/downloadAll/" + idInPath + optionalVersion + optionalQueryParam + optionalFormat); + return requestSpecification.get("/api/access/dataset/" + idInPath + optionalVersion + optionalQueryParam + optionalFormat); } static Response subset(String fileId, String variables, String apiToken) { From fc7df593d7077e068bbcfdf5d61793664577c246 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 28 Jul 2020 14:44:23 -0400 Subject: [PATCH 6/6] improve error handling #4529 Return UNAUTHORIZED instead of BAD_REQUEST and detailed error messages. --- .../edu/harvard/iq/dataverse/api/Access.java | 8 +- .../iq/dataverse/api/DownloadFilesIT.java | 73 ++++++++++++++----- 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 4e7904c324d..e5bcc9e7f3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -559,8 +559,8 @@ public Response downloadAllFromLatest(@PathParam("id") String datasetIdOrPersist final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved)); String fileIds = getFileIdsAsCommaSeparated(latest.getFileMetadatas()); return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); - } catch (WrappedResponse ex) { - return error(BAD_REQUEST, ex.getLocalizedMessage()); + } catch (WrappedResponse wr) { + return wr.getResponse(); } } @@ -598,8 +598,8 @@ public Command handleLatestPublished() { } String fileIds = getFileIdsAsCommaSeparated(dsv.getFileMetadatas()); return downloadDatafiles(fileIds, gbrecs, apiTokenParam, uriInfo, headers, response); - } catch (WrappedResponse ex) { - return error(BAD_REQUEST, ex.getLocalizedMessage()); + } catch (WrappedResponse wr) { + return wr.getResponse(); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java index 13389b8bf10..4f4c34c1c8b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java @@ -13,9 +13,10 @@ import java.util.HashSet; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.FORBIDDEN; import static javax.ws.rs.core.Response.Status.OK; +import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; import static org.hamcrest.CoreMatchers.equalTo; import org.junit.Assert; import static org.junit.Assert.assertTrue; @@ -91,7 +92,9 @@ public void downloadAllFilesByVersion() throws IOException { Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, null); downloadFiles2.prettyPrint(); downloadFiles2.then().assertThat() - .statusCode(BAD_REQUEST.getStatusCode()); + .statusCode(UNAUTHORIZED.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("User :guest is not permitted to perform requested action.")); UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken) .then().assertThat().statusCode(OK.getStatusCode()); @@ -191,11 +194,27 @@ public void downloadAllFilesByVersion() throws IOException { HashSet expectedFiles7 = new HashSet<>(Arrays.asList("LICENSE.md", "MANIFEST.TXT", "README.md", "CONTRIBUTING.md")); Assert.assertEquals(expectedFiles7, filenamesFound7); + // Guests cannot download draft versions. + String datasetVersionDraft = ":draft"; + Response downloadFiles10 = UtilIT.downloadFiles(datasetPid, datasetVersionDraft, null); + downloadFiles10.prettyPrint(); + downloadFiles10.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("User :guest is not permitted to perform requested action.")); + + // Users are told about bad API tokens. + Response downloadFiles11 = UtilIT.downloadFiles(datasetPid, "junkToken"); + downloadFiles11.prettyPrint(); + downloadFiles11.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("Bad api key ")); + } /** - * This test is focused on downloading all files by their version. All files - * are public. + * This test is focused on downloading all files that are restricted. */ @Test public void downloadAllFilesRestricted() throws IOException { @@ -246,21 +265,12 @@ public void downloadAllFilesRestricted() throws IOException { // The creator can download a restricted file from a draft. Assert.assertEquals(new HashSet<>(Arrays.asList("secrets.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles1.getBody().asInputStream())); - Path pathToReadme = Paths.get(Files.createTempDirectory(null) + File.separator + "README.md"); - Files.write(pathToReadme, "My findings.".getBytes()); - - Response uploadReadme = UtilIT.uploadFileViaNative(datasetId.toString(), pathToReadme.toString(), apiToken); - uploadReadme.prettyPrint(); - uploadReadme.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.files[0].label", equalTo("README.md")); - Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, apiToken); downloadFiles2.then().assertThat() .statusCode(OK.getStatusCode()); // The creator can download a restricted file and an unrestricted file from a draft. - Assert.assertEquals(new HashSet<>(Arrays.asList("secrets.md", "README.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles2.getBody().asInputStream())); + Assert.assertEquals(new HashSet<>(Arrays.asList("secrets.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles2.getBody().asInputStream())); UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken) .then().assertThat().statusCode(OK.getStatusCode()); @@ -268,13 +278,41 @@ public void downloadAllFilesRestricted() throws IOException { UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken) .then().assertThat().statusCode(OK.getStatusCode()); - // Now a guest user can download files (published now) + // A guest user can't download the only file because it's restricted. Response downloadFiles3 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles3.prettyPrint(); downloadFiles3.then().assertThat() + .statusCode(FORBIDDEN.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("'/api/v1/access/dataset/%3ApersistentId' you are not authorized to access this object via this api endpoint. Please check your code for typos, or consult our API guide at http://guides.dataverse.org.")); + + // The creator uploads a README that will be public. + Path pathToReadme = Paths.get(Files.createTempDirectory(null) + File.separator + "README.md"); + Files.write(pathToReadme, "My findings.".getBytes()); + + Response uploadReadme = UtilIT.uploadFileViaNative(datasetId.toString(), pathToReadme.toString(), apiToken); + uploadReadme.prettyPrint(); + uploadReadme.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files[0].label", equalTo("README.md")); + + UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + // Now a guest user can download files and get the public one. + Response downloadFiles4 = UtilIT.downloadFiles(datasetPid, null); + downloadFiles4.then().assertThat() .statusCode(OK.getStatusCode()); // The guest can only get the unrestricted file (and the manifest). - Assert.assertEquals(new HashSet<>(Arrays.asList("README.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles3.getBody().asInputStream())); + Assert.assertEquals(new HashSet<>(Arrays.asList("README.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles4.getBody().asInputStream())); + + Response downloadFiles5 = UtilIT.downloadFiles(datasetPid, apiToken); + downloadFiles5.then().assertThat() + .statusCode(OK.getStatusCode()); + + // The creator can download both files (and the manifest). + Assert.assertEquals(new HashSet<>(Arrays.asList("secrets.md", "README.md", "MANIFEST.TXT")), gatherFilenames(downloadFiles5.getBody().asInputStream())); } @@ -315,7 +353,8 @@ public void downloadAllFilesTabular() throws IOException { .statusCode(OK.getStatusCode()) .body("data.files[0].label", equalTo("50by1000.dta")); - assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + // UtilIT.MAXIMUM_INGEST_LOCK_DURATION is 3 but not long enough. + assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, 4)); Response downloadFiles1 = UtilIT.downloadFiles(datasetPid, apiToken); downloadFiles1.then().assertThat()