Skip to content

Commit

Permalink
#6524 add api endpoints for file storage/download size
Browse files Browse the repository at this point in the history
at dataset/version level
  • Loading branch information
sekmiller committed Feb 3, 2020
1 parent e7de306 commit 9457fec
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 26 deletions.
60 changes: 44 additions & 16 deletions src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -866,45 +866,73 @@ public void obtainPersistentIdentifiersForDatafiles(Dataset dataset) {
}

public long findStorageSize(Dataset dataset) throws IOException {
return findStorageSize(dataset, false);
return findStorageSize(dataset, false, "storage", null);
}


public long findStorageSize(Dataset dataset, boolean countCachedExtras) throws IOException {
return findStorageSize(dataset, countCachedExtras, "storage", null);
}

/**
* Returns the total byte size of the files in this dataset
*
* @param dataset
* @param countCachedExtras boolean indicating if the cached disposable extras should also be counted
* @param mode String indicating whether we are getting the result for storage (entire dataset) or download version based
* @param version optional param for dataset version
* @return total size
* @throws IOException if it can't access the objects via StorageIO
* (in practice, this can only happen when called with countCachedExtras=true; when run in the
* default mode, the method doesn't need to access the storage system, as the
* sizes of the main files are recorded in the database)
*/
public long findStorageSize(Dataset dataset, boolean countCachedExtras) throws IOException {
public long findStorageSize(Dataset dataset, boolean countCachedExtras, String mode, DatasetVersion version) throws IOException {
long total = 0L;

if (dataset.isHarvested()) {
return 0L;
}

for (DataFile datafile : dataset.getFiles()) {
total += datafile.getFilesize();
System.out.print("mode: " + mode);
System.out.print("dataset: " + dataset);
System.out.print("version: " + version);
List<DataFile> filesToTalley = new ArrayList();

if (version == null || (mode != null && mode.equals("storage"))){
filesToTalley = dataset.getFiles();
} else {
List <FileMetadata> fmds = version.getFileMetadatas();
System.out.print("FileMetadata: " + fmds.size());
for (FileMetadata fmd : fmds){
filesToTalley.add(fmd.getDataFile());
}

if (!countCachedExtras) {
if (datafile.isTabularData()) {
// count the size of the stored original, in addition to the main tab-delimited file:
Long originalFileSize = datafile.getDataTable().getOriginalFileSize();
if (originalFileSize != null) {
total += originalFileSize;
}

System.out.print("filesToTalley: " + filesToTalley.size());

//CACHED EXTRAS FOR DOWNLOAD?


for (DataFile datafile : filesToTalley) {
total += datafile.getFilesize();

if (!countCachedExtras) {
if (datafile.isTabularData()) {
// count the size of the stored original, in addition to the main tab-delimited file:
Long originalFileSize = datafile.getDataTable().getOriginalFileSize();
if (originalFileSize != null) {
total += originalFileSize;
}
}
} else {
StorageIO<DataFile> storageIO = datafile.getStorageIO();
for (String cachedFileTag : storageIO.listAuxObjects()) {
total += storageIO.getAuxObjectSize(cachedFileTag);
}
}
} else {
StorageIO<DataFile> storageIO = datafile.getStorageIO();
for (String cachedFileTag : storageIO.listAuxObjects()) {
total += storageIO.getAuxObjectSize(cachedFileTag);
}
}
}

// and finally,
if (countCachedExtras) {
Expand Down
16 changes: 13 additions & 3 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -1802,10 +1802,20 @@ public Response getMakeDataCountMetricCurrentMonth(@PathParam("id") String idSup

@GET
@Path("{identifier}/storagesize")
public Response getStorageSize(@PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached) throws WrappedResponse {

public Response getStorageSize(@PathParam("identifier") String dvIdtf, @QueryParam("includeCached") boolean includeCached,
@Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse {

return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize"),
execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), includeCached, "storage", null)))));
}

@GET
@Path("{identifier}/versions/{versionId}/downloadsize")
public Response getDownloadSize(@PathParam("identifier") String dvIdtf, @PathParam("versionId") String version,
@Context UriInfo uriInfo, @Context HttpHeaders headers) throws WrappedResponse {

return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize"),
execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), includeCached)))));
execCommand(new GetDatasetStorageSizeCommand(req, findDatasetOrDie(dvIdtf), false, "download", getDatasetVersionOrDie(req, version , findDatasetOrDie(dvIdtf), uriInfo, headers))))));
}

@GET
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,19 @@
package edu.harvard.iq.dataverse.engine.command.impl;

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.util.BundleUtil;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;

/**
Expand All @@ -26,36 +32,48 @@ public class GetDatasetStorageSizeCommand extends AbstractCommand<Long> {

private final Dataset dataset;
private final Boolean countCachedFiles;
private final String mode;
private final DatasetVersion version;

public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target) {
super(aRequest, target);
dataset = target;
countCachedFiles = false;
mode = "download";
version = null;
}

public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, boolean countCachedFiles) {
public GetDatasetStorageSizeCommand(DataverseRequest aRequest, Dataset target, boolean countCachedFiles, String mode, DatasetVersion version) {
super(aRequest, target);
dataset = target;
this.countCachedFiles = countCachedFiles;
this.mode = mode;
this.version = version;
}

@Override
public Long execute(CommandContext ctxt) throws CommandException {
logger.fine("getDataverseStorageSize called on " + dataset.getDisplayName());

long total = 0L;
if (dataset == null) {
// should never happen - must indicate some data corruption in the database
throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.listing.error"), this);
}

try {
total += ctxt.datasets().findStorageSize(dataset, countCachedFiles);
return ctxt.datasets().findStorageSize(dataset, countCachedFiles, mode, version);
} catch (IOException ex) {
throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.datasize.ioerror"), this);
}

return total;
}

@Override
public Map<String, Set<Permission>> getRequiredPermissions() {
// for data file check permission on owning dataset
return Collections.singletonMap("",
mode != null && mode.equals("storage") ? Collections.singleton(Permission.ManageDatasetPermissions)
: Collections.<Permission>emptySet());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public Long execute(CommandContext ctxt) throws CommandException {
}

try {
total += ctxt.datasets().findStorageSize(dataset, countCachedFiles);
total += ctxt.datasets().findStorageSize(dataset, countCachedFiles, "storage", null);
} catch (IOException ex) {
throw new CommandException(BundleUtil.getStringFromBundle("dataverse.datasize.ioerror"), this);
}
Expand Down
16 changes: 15 additions & 1 deletion src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -1321,6 +1321,15 @@ public void testDataSizeInDataverse() throws InterruptedException {
// wait for it to ingest...
assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile , UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));
// sleep(10000);

Response publishDataversetResp = UtilIT.publishDataverseViaSword(dataverseAlias, apiToken);
publishDataversetResp.then().assertThat()
.statusCode(OK.getStatusCode());

Response publishDatasetResp = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken);
//msg(publishDatasetResp.body().asString());
publishDatasetResp.then().assertThat()
.statusCode(OK.getStatusCode());

// This is the magic number - the number of bytes in the 2 files uploaded
// above, plus the size of the tab-delimited file generated by the ingest
Expand All @@ -1336,11 +1345,16 @@ public void testDataSizeInDataverse() throws InterruptedException {

magicControlString = MessageFormat.format(BundleUtil.getStringFromBundle("datasets.api.datasize"), magicSizeNumber);

Response datasetStorageSizeResponse = UtilIT.findDatasetStorageSize(datasetId.toString());
Response datasetStorageSizeResponse = UtilIT.findDatasetStorageSize(datasetId.toString(), apiToken);
datasetStorageSizeResponse.prettyPrint();

assertEquals(magicControlString, JsonPath.from(datasetStorageSizeResponse.body().asString()).getString("data.message"));

Response datasetDownloadSizeResponse = UtilIT.findDatasetDownloadSize(datasetId.toString());
datasetDownloadSizeResponse.prettyPrint();

assertEquals(magicControlString, JsonPath.from(datasetDownloadSizeResponse.body().asString()).getString("data.message"));

}

@Test
Expand Down
9 changes: 8 additions & 1 deletion src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -2304,8 +2304,15 @@ static Response findDataverseStorageSize(String dataverseId, String apiToken) {
* @param apiToken
* @return response
*/
static Response findDatasetStorageSize(String datasetId) {
static Response findDatasetStorageSize(String datasetId, String apiToken) {
return given()
.header(API_TOKEN_HTTP_HEADER, apiToken)
.get("/api/datasets/" + datasetId + "/storagesize");
}

static Response findDatasetDownloadSize(String datasetId) {
return given()
.get("/api/datasets/" + datasetId + "/versions/:latest/downloadsize");
}

}

0 comments on commit 9457fec

Please sign in to comment.