From de7a914487a592c3ad50fb420e447490c5859218 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 26 May 2022 18:18:26 -0400 Subject: [PATCH 01/15] DRS Archiver and documentation, required pom updates --- .../source/installation/config.rst | 31 +- pom.xml | 18 +- .../impl/DRSSubmitToArchiveCommand.java | 367 ++++++++++++++++++ 3 files changed, 411 insertions(+), 5 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 7b171837c2e..5cf97261c6c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1067,7 +1067,7 @@ Your Dataverse installation may be configured to submit a copy of published Data The Dataverse Software offers an internal archive workflow which may be configured as a PostPublication workflow via an admin API call to manually submit previously published Datasets and prior versions to a configured archive such as Chronopolis. The workflow creates a `JSON-LD `_ serialized `OAI-ORE `_ map file, which is also available as a metadata export format in the Dataverse Software web interface. -At present, archiving classes include the DuraCloudSubmitToArchiveCommand, LocalSubmitToArchiveCommand, GoogleCloudSubmitToArchive, and S3SubmitToArchiveCommand , which all extend the AbstractSubmitToArchiveCommand and using the configurable mechanisms discussed below. +At present, archiving classes include the DuraCloudSubmitToArchiveCommand, LocalSubmitToArchiveCommand, GoogleCloudSubmitToArchive, and S3SubmitToArchiveCommand , which all extend the AbstractSubmitToArchiveCommand and using the configurable mechanisms discussed below. A DRSSubmitToArchiveCommand, which works with Harvard's DRS also exists and, while specific to DRS, is a useful example of how Archivers can support single-version-only semantics and support archiving only from specified collections (and with collection specific parameters). All current options support the archival status APIs and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). @@ -1171,7 +1171,7 @@ The S3 Archiver defines two custom settings, a required :S3ArchiverConfig and op The credentials for your S3 account, can be stored in a profile in a standard credentials file (e.g. ~/.aws/credentials) referenced via the :S3ArchiverProfile setting (will default to the default entry), or can via MicroProfile settings as described for S3 stores (dataverse.s3archiver.access-key and dataverse.s3archiver.secret-key) -The :S3ArchiverConfig setting is a json object that must include an "s3_bucket_name" and may include additional S3-related parameters as described for S3 Stores, including "connection-pool-size","custom-endpoint-url", "custom-endpoint-region", "path-style-access", "payload-signing", and "chunked-encoding". +The :S3ArchiverConfig setting is a JSON object that must include an "s3_bucket_name" and may include additional S3-related parameters as described for S3 Stores, including "connection-pool-size","custom-endpoint-url", "custom-endpoint-region", "path-style-access", "payload-signing", and "chunked-encoding". \:S3ArchiverConfig - minimally includes the name of the bucket to use. For example: @@ -1181,6 +1181,29 @@ The :S3ArchiverConfig setting is a json object that must include an "s3_bucket_n ``curl http://localhost:8080/api/admin/settings/:S3ArchiverProfile -X PUT -d "archiver"`` +.. _Harvard DRS Archiver Configuration: + +Harvard DRS Configuration ++++++++++++++++++++++++++ + +The Harvard DRS Archiver can send Dataverse Archival Bag to the Harvard DRS. It extends the S3 Archiver and uses all of the settings of that Archiver. + +As this Archiver is specific to Harvard and the DRS, a full description of the required configuration is out-of-scope for this guide. However, the basics will be described to support management and to indicate how similar future Archivers might leverage its flexible configuration. + +This Archiver adds a :DRSArchiverConfig setting that is a JSON object containing several keys and sub-objects: +- "DRSendpoint":"https://somewhere.org/drsingest" - the URI for the DRS Ingest Management Service (DIMS) +- "trust_cert":true - whether to trust a self-signed cert from the DIMS +- "single_version":true - whether to limit Dataverse to archiving one version of a dataset +- "timeout":600 - DRS uses JWT for authentication and this key sets the timeout (in seconds) of the token provided +- "admin_metadata" - a sub-object containing many DRS-specific keys and + - "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. + +``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.DRSSubmitToArchiveCommand"`` + +``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":DRSArchiverConfig, :S3ArchiverConfig, :S3ArchiverProfile, :BagGeneratorThreads"`` + +The :DRSArchiverConfig is required as is the :S3ArchiverConfig setting. The :S3ArchiverProfile setting is optional and the DRSArchiver can also use the :BagGeneratorThreads setting as described in the DuraCloud Configuration section above. + .. _Archiving API Call: API Call @@ -2677,6 +2700,10 @@ These are the bucket and project names to be used with the GoogleCloudSubmitToAr These are the json configuration object and S3 profile settings to be used with the S3SubmitToArchiveCommand class. Further information is in the :ref:`S3 Archiver Configuration` section above. +:DRSArchiverConfig +++++++++++++++++++ + +This is the json configuration object required by the DRSSubmitToArchiveCommand class. Further information is in the :ref:`DRS Archiver Configuration` section above. .. _:InstallationName: diff --git a/pom.xml b/pom.xml index ce9f1c4b63d..1c4e53eeb3d 100644 --- a/pom.xml +++ b/pom.xml @@ -52,7 +52,7 @@ --> - + @@ -357,7 +357,7 @@ commons-codec commons-codec - 1.9 + 1.15 @@ -516,7 +516,19 @@ google-cloud-storage - + + + + com.auth0 + java-jwt + 3.19.1 + + + + io.github.erdtman + java-json-canonicalization + 1.1 + diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java new file mode 100644 index 00000000000..be6bcbb0db2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -0,0 +1,367 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.SettingsWrapper; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.workflow.step.Failure; +import edu.harvard.iq.dataverse.workflow.step.WorkflowStepResult; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.security.KeyFactory; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.security.interfaces.RSAPrivateKey; +import java.security.spec.InvalidKeySpecException; +import java.security.spec.PKCS8EncodedKeySpec; +import java.time.Instant; +import java.util.Base64; +import java.util.Date; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.logging.Logger; + +import javax.json.Json; +import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; +import javax.json.JsonValue; +import javax.net.ssl.SSLContext; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.ssl.SSLContextBuilder; + +import org.erdtman.jcs.JsonCanonicalizer; + +import com.auth0.jwt.JWT; +import com.auth0.jwt.algorithms.Algorithm; +import com.auth0.jwt.exceptions.JWTCreationException; + +@RequiredPermissions(Permission.PublishDataset) +public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { + + private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); + private static final String DRS_CONFIG = "This archiver adds"; + + private static final String ADMIN_METADATA = "admin_metadata"; + private static final String S3_BUCKET_NAME = "s3_bucket_name"; + private static final String S3_PATH = "s3_path"; + private static final String COLLECTIONS = "collections"; + private static final String PACKAGE_ID = "package_id"; + private static final String SINGLE_VERSION = "single_version"; + private static final String DRS_ENDPOINT = "DRSendpoint"; + + + private static final String RSA_KEY = "dataverse.archiver.drs.rsa_key"; + + private static final String TRUST_CERT = "trust_cert"; + private static final String TIMEOUT = "timeout"; + + public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion version) { + super(aRequest, version); + } + + @Override + public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, + Map requestedSettings) { + logger.info("In DRSSubmitToArchiveCommand..."); + JsonObject drsConfigObject = null; + + try { + drsConfigObject = JsonUtil.getJsonObject(requestedSettings.get(DRS_CONFIG)); + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + Set collections = adminMetadata.getJsonObject(COLLECTIONS).keySet(); + Dataset dataset = dv.getDataset(); + Dataverse ancestor = dataset.getOwner(); + String alias = getArchivableAncestor(ancestor, collections); + String spaceName = getSpaceName(dataset); + String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); + + if (alias != null) { + if (drsConfigObject.getBoolean(SINGLE_VERSION, false)) { + for (DatasetVersion version : dataset.getVersions()) { + if (version.getArchivalCopyLocation() != null) { + return new Failure("DRS Archiver fail: version " + version.getFriendlyVersionNumber() + + " already archived."); + } + } + } + + JsonObject collectionConfig = adminMetadata.getJsonObject(COLLECTIONS).getJsonObject(alias); + + WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); + + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add(DatasetVersion.STATUS, DatasetVersion.FAILURE); + statusObject.add(DatasetVersion.MESSAGE, "Bag not transferred"); + + if (s3Result == WorkflowStepResult.OK) { + //This will be overwritten if the further steps are successful + statusObject.add(DatasetVersion.STATUS, DatasetVersion.FAILURE); + statusObject.add(DatasetVersion.MESSAGE, "Bag transferred, DRS ingest call failed"); + + // Now contact DRS + boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); + int jwtTimeout = drsConfigObject.getInt(TIMEOUT, 5); + JsonObjectBuilder job = Json.createObjectBuilder(); + + job.add(S3_BUCKET_NAME, bucketName); + + job.add(PACKAGE_ID, packageId); + job.add(S3_PATH, spaceName); + + // We start with the default admin_metadata + JsonObjectBuilder amob = Json.createObjectBuilder(adminMetadata); + // Remove collections and then override any params for the given alias + amob.remove(COLLECTIONS); + + for (Entry entry : collectionConfig.entrySet()) { + amob.add(entry.getKey(), entry.getValue()); + } + job.add(ADMIN_METADATA, amob); + + String drsConfigString = JsonUtil.prettyPrint(job.build()); + + // TODO - ADD code to ignore self-signed cert + CloseableHttpClient client = null; + if (trustCert) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + try { + SSLContext sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()) + .build(); + client = HttpClients.custom().setSSLContext(sslContext) + .setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).build(); + } catch (KeyManagementException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (NoSuchAlgorithmException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (KeyStoreException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + if (client == null) { + client = HttpClients.createDefault(); + } + HttpPost ingestPost; + try { + ingestPost = new HttpPost(); + ingestPost.setURI(new URI(drsConfigObject.getString(DRS_ENDPOINT))); + + byte[] encoded = Base64.getDecoder().decode(System.getProperty(RSA_KEY).replaceAll("[\\r\\n]", "")); + + KeyFactory keyFactory = KeyFactory.getInstance("RSA"); + PKCS8EncodedKeySpec keySpec = new PKCS8EncodedKeySpec(encoded); + RSAPrivateKey privKey = (RSAPrivateKey) keyFactory.generatePrivate(keySpec); + //RSAPublicKey publicKey; + /* + * If public key is needed: encoded = Base64.decodeBase64(publicKeyPEM); + * + * KeyFactory keyFactory = KeyFactory.getInstance("RS256"); X509EncodedKeySpec + * keySpec = new X509EncodedKeySpec(encoded); return (RSAPublicKey) + * keyFactory.generatePublic(keySpec); RSAPublicKey publicKey = new + * RSAPublicKey(System.getProperty(RS256_KEY)); + */ + Algorithm algorithmRSA = Algorithm.RSA256(null, privKey); + + String body = drsConfigString; + String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, jwtTimeout); + logger.info("JWT: " + jwtString); + + ingestPost.setHeader("Authorization", "Bearer " + jwtString); + + logger.info("Body: " + body); + ingestPost.setEntity(new StringEntity(body, "utf-8")); + ingestPost.setHeader("Content-Type", "application/json"); + + try (CloseableHttpResponse response = client.execute(ingestPost)) { + int code = response.getStatusLine().getStatusCode(); + String responseBody = new String(response.getEntity().getContent().readAllBytes(), + StandardCharsets.UTF_8); + if (code == 202) { + logger.info("Status: " + code); + logger.info("Response" + responseBody); + JsonObject responseObject = JsonUtil.getJsonObject(responseBody); + if (responseObject.containsKey(DatasetVersion.STATUS) + && responseObject.containsKey(DatasetVersion.MESSAGE)) { + String status = responseObject.getString(DatasetVersion.STATUS); + if (status.equals(DatasetVersion.PENDING) || status.equals(DatasetVersion.FAILURE) + || status.equals(DatasetVersion.SUCCESS)) { + statusObject.addAll(Json.createObjectBuilder(responseObject)); + switch (status) { + case DatasetVersion.PENDING: + logger.info("DRS Ingest successfully started for: " + packageId + " : " + + responseObject.toString()); + break; + case DatasetVersion.FAILURE: + logger.severe("DRS Ingest Failed for: " + packageId + " : " + + responseObject.toString()); + return new Failure("DRS Archiver fail in Ingest call"); + case DatasetVersion.SUCCESS: + // We don't expect this from DRS + logger.warning("Unexpected Status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with returned status: " + + status); + return new Failure( + "DRS Archiver fail in Ingest call with returned status: " + status); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + + " - response does not include status and message"); + return new Failure( + "DRS Archiver fail in Ingest call \" - response does not include status and message"); + } + } else { + logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); + logger.info("Status: " + code); + logger.info("Response" + responseBody); + return new Failure("DRS Archiver fail in Ingest call with status code: " + code); + } + } catch (ClientProtocolException e2) { + e2.printStackTrace(); + } catch (IOException e2) { + e2.printStackTrace(); + } + } catch (URISyntaxException e) { + return new Failure( + "DRS Archiver workflow step failed: unable to parse " + DRS_ENDPOINT ); + } catch (JWTCreationException exception) { + // Invalid Signing configuration / Couldn't convert Claims. + return new Failure( + "DRS Archiver JWT Creation failure: " + exception.getMessage() ); + + } + // execute + catch (InvalidKeySpecException e) { + e.printStackTrace(); + } catch (NoSuchAlgorithmException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } finally { + //Set status after success or failure + dv.setArchivalCopyLocation(statusObject.build().toString()); + } + } else { + logger.warning("DRS: S3 archiving failed - will not call ingest: " + packageId); + dv.setArchivalCopyLocation(statusObject.build().toString()); + return new Failure("DRS Archiver fail in initial S3 Archiver transfer"); + } + + } else { + logger.info("DRS Archiver: No matching collection found - will not archive: " + packageId); + return WorkflowStepResult.OK; + } + } else { + logger.warning(DRS_CONFIG + " not found"); + return new Failure("DRS Submission not configured - no " + DRS_CONFIG + " found."); + } + return WorkflowStepResult.OK; + } + + public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { + String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); + logger.fine("Canonical body: " + canonicalBody); + String digest = DigestUtils.sha256Hex(canonicalBody); + return JWT.create().withIssuer(BrandingUtil.getInstallationBrandName()).withIssuedAt(Date.from(Instant.now())) + .withExpiresAt(Date.from(Instant.now().plusSeconds(60 * expirationInMinutes))) + .withKeyId("defaultDataverse").withClaim("bodySHA256Hash", digest).sign(algorithmRSA); + } + + private static String getArchivableAncestor(Dataverse ancestor, Set collections) { + String alias = ancestor.getAlias(); + while (ancestor != null && !collections.contains(alias)) { + ancestor = ancestor.getOwner(); + if (ancestor != null) { + alias = ancestor.getAlias(); + } else { + alias = null; + } + } + return alias; + } + + //Overrides inherited method to also check whether the dataset is in a collection for which the DRS Archiver is configured + public static boolean isArchivable(Dataset d, SettingsWrapper sw) { + JsonObject drsConfigObject = null; + + try { + String config = sw.get(DRS_CONFIG, null); + if (config != null) { + drsConfigObject = JsonUtil.getJsonObject(config); + } + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + JsonObject adminMetadata = drsConfigObject.getJsonObject(ADMIN_METADATA); + if (adminMetadata != null) { + JsonObject collectionObj = adminMetadata.getJsonObject(COLLECTIONS); + if (collectionObj != null) { + Set collections = collectionObj.keySet(); + return getArchivableAncestor(d.getOwner(), collections) != null; + } + } + } + return false; + } + + // DRS Archiver supports single-version semantics if the SINGLE_VERSION key in + // the DRS_CONFIG is true + // These methods make that choices visible on the page (cached via + // SettingsWrapper) or in the API (using SettingServiceBean), both using the + // same underlying logic + + public static boolean isSingleVersion(SettingsWrapper sw) { + String config = sw.get(DRS_CONFIG, null); + return isSingleVersion(config); + } + + public static boolean isSingleVersion(SettingsServiceBean ss) { + String config = ss.get(DRS_CONFIG, null); + return isSingleVersion(config); + } + + private static boolean isSingleVersion(String config) { + JsonObject drsConfigObject = null; + try { + if (config != null) { + drsConfigObject = JsonUtil.getJsonObject(config); + } + } catch (Exception e) { + logger.warning("Unable to parse " + DRS_CONFIG + " setting as a Json object"); + } + if (drsConfigObject != null) { + return drsConfigObject.getBoolean(SINGLE_VERSION, false); + } + return false; + } +} From cf265ad03e925fb0a31f61873991e81e4a58395f Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 26 May 2022 18:37:43 -0400 Subject: [PATCH 02/15] try 3 space indent --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 5cf97261c6c..593635c0c22 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1196,7 +1196,7 @@ This Archiver adds a :DRSArchiverConfig setting that is a JSON object containing - "single_version":true - whether to limit Dataverse to archiving one version of a dataset - "timeout":600 - DRS uses JWT for authentication and this key sets the timeout (in seconds) of the token provided - "admin_metadata" - a sub-object containing many DRS-specific keys and - - "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. + - "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. ``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.DRSSubmitToArchiveCommand"`` From a81517f26b84d0460291592ceb2c7542a0ca7612 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 26 May 2022 18:40:29 -0400 Subject: [PATCH 03/15] alternate sublist char --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 593635c0c22..8afecbf2c93 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1196,7 +1196,7 @@ This Archiver adds a :DRSArchiverConfig setting that is a JSON object containing - "single_version":true - whether to limit Dataverse to archiving one version of a dataset - "timeout":600 - DRS uses JWT for authentication and this key sets the timeout (in seconds) of the token provided - "admin_metadata" - a sub-object containing many DRS-specific keys and - - "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. + * "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. ``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.DRSSubmitToArchiveCommand"`` From 42ddd6945778aa01b16c8575087ee4d55522cde2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 2 Jun 2022 08:45:19 -0400 Subject: [PATCH 04/15] blank line before list --- doc/sphinx-guides/source/installation/config.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 8afecbf2c93..536ed8cb98e 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1191,12 +1191,13 @@ The Harvard DRS Archiver can send Dataverse Archival Bag to the Harvard DRS. It As this Archiver is specific to Harvard and the DRS, a full description of the required configuration is out-of-scope for this guide. However, the basics will be described to support management and to indicate how similar future Archivers might leverage its flexible configuration. This Archiver adds a :DRSArchiverConfig setting that is a JSON object containing several keys and sub-objects: + - "DRSendpoint":"https://somewhere.org/drsingest" - the URI for the DRS Ingest Management Service (DIMS) - "trust_cert":true - whether to trust a self-signed cert from the DIMS - "single_version":true - whether to limit Dataverse to archiving one version of a dataset - "timeout":600 - DRS uses JWT for authentication and this key sets the timeout (in seconds) of the token provided - "admin_metadata" - a sub-object containing many DRS-specific keys and - * "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. + - "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. ``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.DRSSubmitToArchiveCommand"`` From 3d39a5eb78ee7b0a5c0a92f72e42e2f51cab8550 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 2 Jun 2022 08:48:11 -0400 Subject: [PATCH 05/15] fix reference --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 536ed8cb98e..ec8cd26b299 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2704,7 +2704,7 @@ These are the json configuration object and S3 profile settings to be used with :DRSArchiverConfig ++++++++++++++++++ -This is the json configuration object required by the DRSSubmitToArchiveCommand class. Further information is in the :ref:`DRS Archiver Configuration` section above. +This is the json configuration object required by the DRSSubmitToArchiveCommand class. Further information is in the :ref:`Harvard DRS Archiver Configuration` section above. .. _:InstallationName: From a793d1be66b5f435ca614155c2a91d9cefc988c0 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Sun, 26 Jun 2022 11:01:37 -0400 Subject: [PATCH 06/15] Updates from HDC3 branch --- .../impl/DRSSubmitToArchiveCommand.java | 28 +++++++++++++++---- .../impl/S3SubmitToArchiveCommand.java | 16 ++++++++--- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index be6bcbb0db2..f72201394f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -61,8 +61,7 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implements Command { private static final Logger logger = Logger.getLogger(DRSSubmitToArchiveCommand.class.getName()); - private static final String DRS_CONFIG = "This archiver adds"; - + private static final String DRS_CONFIG = ":DRSArchivalConfig"; private static final String ADMIN_METADATA = "admin_metadata"; private static final String S3_BUCKET_NAME = "s3_bucket_name"; private static final String S3_PATH = "s3_path"; @@ -99,7 +98,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t Dataverse ancestor = dataset.getOwner(); String alias = getArchivableAncestor(ancestor, collections); String spaceName = getSpaceName(dataset); - String packageId = spaceName + ".v" + dv.getFriendlyVersionNumber(); + String packageId = getFileName(spaceName, dv); if (alias != null) { if (drsConfigObject.getBoolean(SINGLE_VERSION, false)) { @@ -129,7 +128,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t int jwtTimeout = drsConfigObject.getInt(TIMEOUT, 5); JsonObjectBuilder job = Json.createObjectBuilder(); - job.add(S3_BUCKET_NAME, bucketName); + job.add(S3_BUCKET_NAME, adminMetadata.getString(S3_BUCKET_NAME)); job.add(PACKAGE_ID, packageId); job.add(S3_PATH, spaceName); @@ -138,9 +137,15 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t JsonObjectBuilder amob = Json.createObjectBuilder(adminMetadata); // Remove collections and then override any params for the given alias amob.remove(COLLECTIONS); - + // Allow override of bucket name + if (collectionConfig.containsKey(S3_BUCKET_NAME)) { + job.add(S3_BUCKET_NAME, collectionConfig.get(S3_BUCKET_NAME)); + } + for (Entry entry : collectionConfig.entrySet()) { - amob.add(entry.getKey(), entry.getValue()); + if (!entry.getKey().equals(S3_BUCKET_NAME)) { + amob.add(entry.getKey(), entry.getValue()); + } } job.add(ADMIN_METADATA, amob); @@ -287,6 +292,17 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t return WorkflowStepResult.OK; } + @Override + protected String getFileName(String spaceName, DatasetVersion dv) { + return spaceName + (".v" + dv.getFriendlyVersionNumber()).replace('.', '_'); + } + + @Override + protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { + return spaceName + ("_datacite.v" + dv.getFriendlyVersionNumber()).replace('.','_'); + } + + public static String createJWTString(Algorithm algorithmRSA, String installationBrandName, String body, int expirationInMinutes) throws IOException { String canonicalBody = new JsonCanonicalizer(body).getEncodedString(); logger.fine("Canonical body: " + canonicalBody); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index 5d5e7fd2e13..e4afde8dffb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -69,7 +69,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } catch (Exception e) { logger.warning("Unable to parse " + S3_CONFIG + " setting as a Json object"); } - if (configObject != null && bucketName != null) { + if (configObject != null && profileName != null && bucketName != null) { s3 = createClient(configObject, profileName); tm = TransferManagerBuilder.standard().withS3Client(s3).build(); @@ -84,8 +84,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // Add datacite.xml file ObjectMetadata om = new ObjectMetadata(); om.setContentLength(dataciteIn.available()); - String dcKey = spaceName + "/" + spaceName + "_datacite.v" + dv.getFriendlyVersionNumber() - + ".xml"; + String dcKey = spaceName + "/" + getDataCiteFileName(spaceName, dv) + ".xml"; tm.upload(new PutObjectRequest(bucketName, dcKey, dataciteIn, om)).waitForCompletion(); om = s3.getObjectMetadata(bucketName, dcKey); if (om == null) { @@ -94,7 +93,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } // Store BagIt file - String fileName = spaceName + ".v" + dv.getFriendlyVersionNumber(); + String fileName = getFileName(spaceName, dv); + String bagKey = spaceName + "/" + fileName + ".zip"; // Add BagIt ZIP file // Google uses MD5 as one way to verify the @@ -157,6 +157,14 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } + protected String getDataCiteFileName(String spaceName, DatasetVersion dv) { + return spaceName + "_datacite.v" + dv.getFriendlyVersionNumber(); + } + + protected String getFileName(String spaceName, DatasetVersion dv) { + return spaceName + ".v" + dv.getFriendlyVersionNumber(); + } + protected String getSpaceName(Dataset dataset) { if (spaceName == null) { spaceName = dataset.getGlobalId().asString().replace(':', '-').replace('/', '-').replace('.', '-') From 59a39f3b60827b821eb94232fc3685ba303ecb1c Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 21 Jul 2022 19:51:49 -0400 Subject: [PATCH 07/15] update constant names --- .../impl/DRSSubmitToArchiveCommand.java | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index f72201394f4..f421ade082d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -115,13 +115,13 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t WorkflowStepResult s3Result = super.performArchiveSubmission(dv, token, requestedSettings); JsonObjectBuilder statusObject = Json.createObjectBuilder(); - statusObject.add(DatasetVersion.STATUS, DatasetVersion.FAILURE); - statusObject.add(DatasetVersion.MESSAGE, "Bag not transferred"); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); if (s3Result == WorkflowStepResult.OK) { //This will be overwritten if the further steps are successful - statusObject.add(DatasetVersion.STATUS, DatasetVersion.FAILURE); - statusObject.add(DatasetVersion.MESSAGE, "Bag transferred, DRS ingest call failed"); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag transferred, DRS ingest call failed"); // Now contact DRS boolean trustCert = drsConfigObject.getBoolean(TRUST_CERT, false); @@ -213,22 +213,22 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t logger.info("Status: " + code); logger.info("Response" + responseBody); JsonObject responseObject = JsonUtil.getJsonObject(responseBody); - if (responseObject.containsKey(DatasetVersion.STATUS) - && responseObject.containsKey(DatasetVersion.MESSAGE)) { - String status = responseObject.getString(DatasetVersion.STATUS); - if (status.equals(DatasetVersion.PENDING) || status.equals(DatasetVersion.FAILURE) - || status.equals(DatasetVersion.SUCCESS)) { + if (responseObject.containsKey(DatasetVersion.ARCHIVAL_STATUS) + && responseObject.containsKey(DatasetVersion.ARCHIVAL_STATUS_MESSAGE)) { + String status = responseObject.getString(DatasetVersion.ARCHIVAL_STATUS); + if (status.equals(DatasetVersion.ARCHIVAL_STATUS_PENDING) || status.equals(DatasetVersion.ARCHIVAL_STATUS_FAILURE) + || status.equals(DatasetVersion.ARCHIVAL_STATUS_SUCCESS)) { statusObject.addAll(Json.createObjectBuilder(responseObject)); switch (status) { - case DatasetVersion.PENDING: + case DatasetVersion.ARCHIVAL_STATUS_PENDING: logger.info("DRS Ingest successfully started for: " + packageId + " : " + responseObject.toString()); break; - case DatasetVersion.FAILURE: + case DatasetVersion.ARCHIVAL_STATUS_FAILURE: logger.severe("DRS Ingest Failed for: " + packageId + " : " + responseObject.toString()); return new Failure("DRS Archiver fail in Ingest call"); - case DatasetVersion.SUCCESS: + case DatasetVersion.ARCHIVAL_STATUS_SUCCESS: // We don't expect this from DRS logger.warning("Unexpected Status: " + status); } From d142868e5630f36969277c94e2d75187e100fdd9 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 25 Jul 2022 09:55:29 -0400 Subject: [PATCH 08/15] fix merge issue --- doc/sphinx-guides/source/api/native-api.rst | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 04bcfcb12cf..cc3e5609db0 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1910,13 +1910,9 @@ The body is a JSON object that must contain a "status" which may be "success", " export VERSION=1.0 export JSON='{"status":"failure","message":"Something went wrong"}' -<<<<<<< HEAD - curl -H "X-Dataverse-key: $API_TOKEN" -H "Content-Type:application/json" -X PUT "$SERVER_URL/api/datasets/submitDatasetVersionToArchive/$VERSION/status?persistentId=$PERSISTENT_IDENTIFIER" -d "$JSON" - -Note that if the configured archiver only supports archiving a single version, the call may return 409 CONFLICT if/when another version already has a non-null status. -======= curl -H "X-Dataverse-key: $API_TOKEN" -H "Content-Type:application/json" -X PUT "$SERVER_URL/api/datasets/:persistentId/$VERSION/archivalStatus?persistentId=$PERSISTENT_IDENTIFIER" -d "$JSON" ->>>>>>> refs/remotes/IQSS/develop + +Note that if the configured archiver only supports archiving a single version, the call may return 409 CONFLICT if/when another version already has a non-null status. Delete the Archival Status of a Dataset By Version ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 7dc92e74e3c69deb034148d9ef1f4945966301ab Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 25 Jul 2022 16:14:19 -0400 Subject: [PATCH 09/15] Apply suggestions from code review Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/installation/config.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index ada2a1801ea..acb0937427c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1188,9 +1188,9 @@ The :S3ArchiverConfig setting is a JSON object that must include an "s3_bucket_n Harvard DRS Configuration +++++++++++++++++++++++++ -The Harvard DRS Archiver can send Dataverse Archival Bag to the Harvard DRS. It extends the S3 Archiver and uses all of the settings of that Archiver. +The Harvard Digital Repository Service (DRS) Archiver can send Dataverse Archival Bags to the Harvard DRS. It extends the S3 Archiver and uses all of the settings of that Archiver. -As this Archiver is specific to Harvard and the DRS, a full description of the required configuration is out-of-scope for this guide. However, the basics will be described to support management and to indicate how similar future Archivers might leverage its flexible configuration. +As this Archiver is specific to Harvard and the DRS, a full description of the required configuration is out-of-scope for this guide. However, the basics will be described to support its setup and to indicate how similar future Archivers might leverage its flexible configuration. In particular, the DRS Archive supports single-version-only semantics and supports archiving only from specified collections (and with collection specific parameters). This Archiver adds a :DRSArchiverConfig setting that is a JSON object containing several keys and sub-objects: @@ -1199,7 +1199,7 @@ This Archiver adds a :DRSArchiverConfig setting that is a JSON object containing - "single_version":true - whether to limit Dataverse to archiving one version of a dataset - "timeout":600 - DRS uses JWT for authentication and this key sets the timeout (in seconds) of the token provided - "admin_metadata" - a sub-object containing many DRS-specific keys and - - "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. + - "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. ``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.DRSSubmitToArchiveCommand"`` From 47a7eb32950808a9eb7a69dcc814cbc0daee6b35 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Mon, 25 Jul 2022 16:15:46 -0400 Subject: [PATCH 10/15] param name change per review --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index f421ade082d..7665bbc729c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -68,7 +68,7 @@ public class DRSSubmitToArchiveCommand extends S3SubmitToArchiveCommand implemen private static final String COLLECTIONS = "collections"; private static final String PACKAGE_ID = "package_id"; private static final String SINGLE_VERSION = "single_version"; - private static final String DRS_ENDPOINT = "DRSendpoint"; + private static final String DRS_ENDPOINT = "DRS_endpoint"; private static final String RSA_KEY = "dataverse.archiver.drs.rsa_key"; From e03c3623ff8ff0576322566d19e12d5576b1cc86 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 28 Jul 2022 17:43:13 -0400 Subject: [PATCH 11/15] remove DRS Archiver info from guide --- .../source/installation/config.rst | 30 +------------------ 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index acb0937427c..56536c9a73a 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1069,7 +1069,7 @@ Your Dataverse installation may be configured to submit a copy of published Data The Dataverse Software offers an internal archive workflow which may be configured as a PostPublication workflow via an admin API call to manually submit previously published Datasets and prior versions to a configured archive such as Chronopolis. The workflow creates a `JSON-LD `_ serialized `OAI-ORE `_ map file, which is also available as a metadata export format in the Dataverse Software web interface. -At present, archiving classes include the DuraCloudSubmitToArchiveCommand, LocalSubmitToArchiveCommand, GoogleCloudSubmitToArchive, and S3SubmitToArchiveCommand , which all extend the AbstractSubmitToArchiveCommand and using the configurable mechanisms discussed below. A DRSSubmitToArchiveCommand, which works with Harvard's DRS also exists and, while specific to DRS, is a useful example of how Archivers can support single-version-only semantics and support archiving only from specified collections (and with collection specific parameters). +At present, archiving classes include the DuraCloudSubmitToArchiveCommand, LocalSubmitToArchiveCommand, GoogleCloudSubmitToArchive, and S3SubmitToArchiveCommand , which all extend the AbstractSubmitToArchiveCommand and using the configurable mechanisms discussed below. (A DRSSubmitToArchiveCommand, which works with Harvard's DRS also exists and, while specific to DRS, is a useful example of how Archivers can support single-version-only semantics and support archiving only from specified collections (with collection specific parameters)). All current options support the archival status APIs and the same status is available in the dataset page version table (for contributors/those who could view the unpublished dataset, with more detail available to superusers). @@ -1183,29 +1183,6 @@ The :S3ArchiverConfig setting is a JSON object that must include an "s3_bucket_n ``curl http://localhost:8080/api/admin/settings/:S3ArchiverProfile -X PUT -d "archiver"`` -.. _Harvard DRS Archiver Configuration: - -Harvard DRS Configuration -+++++++++++++++++++++++++ - -The Harvard Digital Repository Service (DRS) Archiver can send Dataverse Archival Bags to the Harvard DRS. It extends the S3 Archiver and uses all of the settings of that Archiver. - -As this Archiver is specific to Harvard and the DRS, a full description of the required configuration is out-of-scope for this guide. However, the basics will be described to support its setup and to indicate how similar future Archivers might leverage its flexible configuration. In particular, the DRS Archive supports single-version-only semantics and supports archiving only from specified collections (and with collection specific parameters). - -This Archiver adds a :DRSArchiverConfig setting that is a JSON object containing several keys and sub-objects: - -- "DRSendpoint":"https://somewhere.org/drsingest" - the URI for the DRS Ingest Management Service (DIMS) -- "trust_cert":true - whether to trust a self-signed cert from the DIMS -- "single_version":true - whether to limit Dataverse to archiving one version of a dataset -- "timeout":600 - DRS uses JWT for authentication and this key sets the timeout (in seconds) of the token provided -- "admin_metadata" - a sub-object containing many DRS-specific keys and - - "collections" - a sub-object containing keys that identify specific collections in Dataverse by their alias. If there is an alias entry for a given collection, a) the DRS Archiver will submit any Dataverse within that collection or its subcollection for archiving, and b) will use any keys in the object supplied for that alias as overrides for the admin_metadata provided in the parent object. The latter allows, for example, different billing codes and contacts to be assigned for different collections. - -``curl http://localhost:8080/api/admin/settings/:ArchiverClassName -X PUT -d "edu.harvard.iq.dataverse.engine.command.impl.DRSSubmitToArchiveCommand"`` - -``curl http://localhost:8080/api/admin/settings/:ArchiverSettings -X PUT -d ":DRSArchiverConfig, :S3ArchiverConfig, :S3ArchiverProfile, :BagGeneratorThreads"`` - -The :DRSArchiverConfig is required as is the :S3ArchiverConfig setting. The :S3ArchiverProfile setting is optional and the DRSArchiver can also use the :BagGeneratorThreads setting as described in the DuraCloud Configuration section above. .. _Archiving API Call: @@ -2703,11 +2680,6 @@ These are the bucket and project names to be used with the GoogleCloudSubmitToAr These are the json configuration object and S3 profile settings to be used with the S3SubmitToArchiveCommand class. Further information is in the :ref:`S3 Archiver Configuration` section above. -:DRSArchiverConfig -++++++++++++++++++ - -This is the json configuration object required by the DRSSubmitToArchiveCommand class. Further information is in the :ref:`Harvard DRS Archiver Configuration` section above. - .. _:InstallationName: :InstallationName From 062d9823f826176fc84daa52c42492a81d4bb730 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 28 Jul 2022 17:45:47 -0400 Subject: [PATCH 12/15] info -> fine --- .../command/impl/DRSSubmitToArchiveCommand.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 7665bbc729c..0f8e176c064 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -83,7 +83,7 @@ public DRSSubmitToArchiveCommand(DataverseRequest aRequest, DatasetVersion versi @Override public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken token, Map requestedSettings) { - logger.info("In DRSSubmitToArchiveCommand..."); + logger.fine("In DRSSubmitToArchiveCommand..."); JsonObject drsConfigObject = null; try { @@ -197,11 +197,11 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String body = drsConfigString; String jwtString = createJWTString(algorithmRSA, BrandingUtil.getInstallationBrandName(), body, jwtTimeout); - logger.info("JWT: " + jwtString); + logger.fine("JWT: " + jwtString); ingestPost.setHeader("Authorization", "Bearer " + jwtString); - logger.info("Body: " + body); + logger.fine("Body: " + body); ingestPost.setEntity(new StringEntity(body, "utf-8")); ingestPost.setHeader("Content-Type", "application/json"); @@ -210,8 +210,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String responseBody = new String(response.getEntity().getContent().readAllBytes(), StandardCharsets.UTF_8); if (code == 202) { - logger.info("Status: " + code); - logger.info("Response" + responseBody); + logger.fine("Status: " + code); + logger.fine("Response" + responseBody); JsonObject responseObject = JsonUtil.getJsonObject(responseBody); if (responseObject.containsKey(DatasetVersion.ARCHIVAL_STATUS) && responseObject.containsKey(DatasetVersion.ARCHIVAL_STATUS_MESSAGE)) { @@ -246,8 +246,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } else { logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); - logger.info("Status: " + code); - logger.info("Response" + responseBody); + logger.fine("Status: " + code); + logger.fine("Response" + responseBody); return new Failure("DRS Archiver fail in Ingest call with status code: " + code); } } catch (ClientProtocolException e2) { @@ -282,7 +282,7 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } else { - logger.info("DRS Archiver: No matching collection found - will not archive: " + packageId); + logger.fine("DRS Archiver: No matching collection found - will not archive: " + packageId); return WorkflowStepResult.OK; } } else { From a4cd742a0407fe9e8466f3788d81718d47391812 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Thu, 28 Jul 2022 17:54:54 -0400 Subject: [PATCH 13/15] remove redundant/obsolete comment/log stmt --- .../engine/command/impl/DRSSubmitToArchiveCommand.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java index 0f8e176c064..ecea6f1dcb2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DRSSubmitToArchiveCommand.java @@ -151,7 +151,6 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t String drsConfigString = JsonUtil.prettyPrint(job.build()); - // TODO - ADD code to ignore self-signed cert CloseableHttpClient client = null; if (trustCert) { // use the TrustSelfSignedStrategy to allow Self Signed Certificates @@ -246,7 +245,6 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t } } else { logger.severe("DRS Ingest Failed for: " + packageId + " with status code: " + code); - logger.fine("Status: " + code); logger.fine("Response" + responseBody); return new Failure("DRS Archiver fail in Ingest call with status code: " + code); } From c6ca334a2ce5139ce585f666939ba437c020eb29 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 Aug 2022 15:42:11 -0400 Subject: [PATCH 14/15] single quotes in curl --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index bbf240b388a..716d58cd21d 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1191,7 +1191,6 @@ The :S3ArchiverConfig setting is a JSON object that must include an "s3_bucket_n \:S3ArchiverConfig - example to also set the name of an S3 profile to use. For example: - ``curl http://localhost:8080/api/admin/settings/:S3ArchiverConfig -X PUT -d "{"s3_bucket_name":"archival-bucket", "profile":"archiver"}`` .. _Archiving API Call: @@ -2688,6 +2687,7 @@ These are the bucket and project names to be used with the GoogleCloudSubmitToAr This is the JSON configuration object setting to be used with the S3SubmitToArchiveCommand class. Further information is in the :ref:`S3 Archiver Configuration` section above. + .. _:InstallationName: :InstallationName From f326a022c351f77ca8c2dd4c17e26c8d51e05bca Mon Sep 17 00:00:00 2001 From: qqmyers Date: Tue, 9 Aug 2022 16:07:38 -0400 Subject: [PATCH 15/15] restore archival status support --- .../command/impl/S3SubmitToArchiveCommand.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java index ca779e48106..f24d956e9d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/S3SubmitToArchiveCommand.java @@ -20,7 +20,9 @@ import java.util.Map; import java.util.logging.Logger; +import javax.json.Json; import javax.json.JsonObject; +import javax.json.JsonObjectBuilder; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -71,6 +73,12 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t s3 = createClient(configObject); tm = TransferManagerBuilder.standard().withS3Client(s3).build(); + + //Set a failure status that will be updated if we succeed + JsonObjectBuilder statusObject = Json.createObjectBuilder(); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_FAILURE); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, "Bag not transferred"); + try { Dataset dataset = dv.getDataset(); @@ -129,7 +137,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t // view it as an admin) // Unsigned URL - gives location but not access without creds - dv.setArchivalCopyLocation(s3.getUrl(bucketName, bagKey).toString()); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS, DatasetVersion.ARCHIVAL_STATUS_SUCCESS); + statusObject.add(DatasetVersion.ARCHIVAL_STATUS_MESSAGE, s3.getUrl(bucketName, bagKey).toString()); } else { logger.warning("Could not write local Bag file " + fileName); return new Failure("S3 Archiver fail writing temp local bag"); @@ -147,6 +156,8 @@ public WorkflowStepResult performArchiveSubmission(DatasetVersion dv, ApiToken t return new Failure("S3 Archiver Submission Failure", e.getLocalizedMessage() + ": check log for details"); + } finally { + dv.setArchivalCopyLocation(statusObject.build().toString()); } return WorkflowStepResult.OK; } else {