Skip to content

Commit

Permalink
feature(API.PID.reconcile): Added command and API to reconcile the PI…
Browse files Browse the repository at this point in the history
…D of an unpublished dataset.
  • Loading branch information
johannes-darms committed May 17, 2024
1 parent 77c7102 commit 2d216ea
Show file tree
Hide file tree
Showing 12 changed files with 598 additions and 12 deletions.
5 changes: 5 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DataFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker;
import edu.harvard.iq.dataverse.ingest.IngestReport;
import edu.harvard.iq.dataverse.ingest.IngestRequest;
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.FileUtil;
import edu.harvard.iq.dataverse.util.ShapefileHandler;
Expand Down Expand Up @@ -1111,4 +1112,8 @@ private boolean tagExists(String tagLabel) {
}
return false;
}
@Override
public PidProvider getEffectivePidGenerator() {
return getOwner().getEffectivePidGenerator();
}
} // end of class
2 changes: 2 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DvObject.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package edu.harvard.iq.dataverse;

import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.storageuse.StorageQuota;

Expand Down Expand Up @@ -493,6 +494,7 @@ public void setStorageQuota(StorageQuota storageQuota) {
*/
public abstract boolean isAncestorOf( DvObject other );

public abstract PidProvider getEffectivePidGenerator();

@OneToMany(mappedBy = "definitionPoint",cascade={ CascadeType.REMOVE, CascadeType.MERGE,CascadeType.PERSIST}, orphanRemoval=true)
List<RoleAssignment> roleAssignments;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ public void setPidGenerator(PidProvider pidGenerator) {
}
}

@Override
public PidProvider getEffectivePidGenerator() {
if (pidGenerator == null) {
String specs = getPidGeneratorSpecs();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public enum Type {
CHECKSUMIMPORT, CHECKSUMFAIL, CONFIRMEMAIL, APIGENERATED, INGESTCOMPLETED, INGESTCOMPLETEDWITHERRORS,
PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, STATUSUPDATED, DATASETCREATED, DATASETMENTIONED,
GLOBUSUPLOADCOMPLETED, GLOBUSUPLOADCOMPLETEDWITHERRORS,
GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS, REQUESTEDFILEACCESS;
GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS, REQUESTEDFILEACCESS, PIDRECONCILED;

public String getDescription() {
return BundleUtil.getStringFromBundle("notification.typeDescription." + this.name());
Expand Down
37 changes: 36 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -4613,7 +4613,42 @@ public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext cr
return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion));
}, getRequestUser(crc));
}


@PUT
@AuthRequired
@Path("{identifier}/pidReconcile/{pididentifier}")
public Response reconcilePid(@Context ContainerRequestContext crc, @PathParam("identifier") String datasetId,
@PathParam("pididentifier") String generatorId) throws WrappedResponse {

// Superuser-only:
AuthenticatedUser user;
try {
user = getRequestAuthenticatedUserOrDie(crc);
} catch (WrappedResponse ex) {
return error(Response.Status.UNAUTHORIZED, "Authentication is required.");
}
if (!user.isSuperuser()) {
return error(Response.Status.FORBIDDEN, "Superusers only.");
}

Dataset dataset;
PidProvider pidProvider;
try {
dataset = findDatasetOrDie(datasetId);
} catch (WrappedResponse ex) {
return error(Response.Status.NOT_FOUND, "No such dataset");
}
if (PidUtil.getManagedProviderIds().contains(generatorId)) {
pidProvider =PidUtil.getPidProvider(generatorId);
} else {
return error(Response.Status.NOT_FOUND, "No PID Generator found for the give id");
}
return response(req -> {
execCommand(new ReconcileDatasetPidCommand(req, dataset, pidProvider));
return ok(dataset.getGlobalId().toString());
}, getRequestUser(crc));

}
/**
* Get the PidProvider that will be used for generating new DOIs in this dataset
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,7 @@ public void displayNotification() {
case RETURNEDDS:
case WORKFLOW_SUCCESS:
case WORKFLOW_FAILURE:
case PIDRECONCILED:
case STATUSUPDATED:
userNotification.setTheObject(datasetVersionService.find(userNotification.getObjectId()));
break;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
package edu.harvard.iq.dataverse.engine.command.impl;

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetField;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.DatasetVersionDifference;
import edu.harvard.iq.dataverse.DatasetVersionUser;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
Expand All @@ -25,8 +20,6 @@
import static java.util.stream.Collectors.joining;

import jakarta.validation.ConstraintViolation;
import edu.harvard.iq.dataverse.MetadataBlock;
import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
import edu.harvard.iq.dataverse.settings.JvmSettings;

/**
Expand Down Expand Up @@ -151,9 +144,9 @@ protected void validateOrDie(DatasetVersion dsv, Boolean lenient) throws Command
* @param ctxt
* @throws CommandException
*/
protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctxt, boolean retry) throws CommandException {
protected void registerExternalIdentifier(DvObject theDataset, CommandContext ctxt, boolean retry) throws CommandException {
if (!theDataset.isIdentifierRegistered()) {
PidProvider pidProvider = PidUtil.getPidProvider(theDataset.getGlobalId().getProviderId());
PidProvider pidProvider = theDataset.getEffectivePidGenerator();
if ( pidProvider != null ) {
try {
if (pidProvider.alreadyRegistered(theDataset)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package edu.harvard.iq.dataverse.engine.command.impl;

import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
import edu.harvard.iq.dataverse.export.ExportService;
import edu.harvard.iq.dataverse.pidproviders.PidProvider;
import edu.harvard.iq.dataverse.util.BundleUtil;

import java.io.IOException;
import java.sql.Timestamp;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Given a draft datasetVersion (which has no published predecessors), and PID provider that is different to the current
* configured one this command reconfigures the PID provider, and updates the PID to the new provider (Removed the
* existing PID, registers a new one and notifies the user about the change).
*
* @author jdarms
*/
// No required permissions because we check for superuser status.
@RequiredPermissions({})
public class ReconcileDatasetPidCommand extends AbstractDatasetCommand<Dataset> {

private static final Logger logger = Logger.getLogger(ReconcileDatasetPidCommand.class.getName());
PidProvider newPidProvider;

public ReconcileDatasetPidCommand(DataverseRequest aRequest, Dataset theDataset, PidProvider newPidProvider) {
super(aRequest, theDataset);
this.newPidProvider = newPidProvider;
}


@Override
public Dataset execute(CommandContext ctxt) throws CommandException {
// ensure that only superuser can execute the command.
if (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser()) {
throw new PermissionException(BundleUtil.getStringFromBundle("admin.api.auth.mustBeSuperUser"), this,
Collections.singleton(Permission.EditDataset), getDataset());
}
// Datast must be unreleased! This means there is only one version!
if (getDataset().isReleased()) { //@TODO: Clarify whether this is the best check...
throw new IllegalCommandException("Dataset already published, cannot alter PID Provider", this);
}
// Dataset must not be harvested!
if (getDataset().isHarvested()) {
throw new IllegalCommandException("Dataset is harvested, cannot alter PID Provider", this);
}
PidProvider currentPidProvider = getDataset().getEffectivePidGenerator();
// new PID Provider must be different to requested one!
if (this.newPidProvider.equals(currentPidProvider)) {
throw new IllegalCommandException("PID Provider " + currentPidProvider.getId() + " is same as configured. This Operation has no effect!", this);
}

GlobalId oldId = getDataset().getGlobalId();
if (oldId == null) {
throw new IllegalStateException("Dataset without a global identifier, cannot alter!");
}
logger.fine("Reconciling dataset( id =`" + getDataset().getId() + ")` - removing globalId `" + getDataset().getGlobalId() + '`');
// remove dataset PID
try {
if (currentPidProvider.alreadyRegistered(getDataset())) { //if not registered with PIDProvider than there is no need to delete it...
currentPidProvider.deleteIdentifier(getDataset()); // delete it externally
}
getDataset().setGlobalId(null); // remove it internally
getDataset().setGlobalIdCreateTime(null);
getDataset().setIdentifierRegistered(false);
} catch (Exception e) {
logger.log(Level.WARNING, "Identifier deletion was not successful:", e.getMessage());
}

if(ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner())) {
reconcileFilePids(ctxt, currentPidProvider);
}
getDataset().setPidGenerator(this.newPidProvider);
newPidProvider.generatePid(getDataset()); // this updates Protocol, Authority, and Identifier and thus a new GlobalID
logger.fine("Reconciling dataset( id =`" + getDataset().getId() + ")` - creating new globalId `" + getDataset().getGlobalId() + '`');
if (!newPidProvider.registerWhenPublished()) {
registerExternalIdentifier(getDataset(), ctxt, true); // this updates GlobalIdCreateTime and IdentifierRegistered
}
// keep old Id als alternative identifier
AlternativePersistentIdentifier api;
api = new AlternativePersistentIdentifier();
api.setProtocol(oldId.getProtocol());
api.setAuthority(oldId.getAuthority());
api.setIdentifier(oldId.getIdentifier());
api.setDvObject(getDataset());
api.setStorageLocationDesignator(true);// cf. Dataset#getIdentifierForFileStorage()
if(getDataset().getAlternativePersistentIndentifiers()!=null) {
getDataset().getAlternativePersistentIndentifiers().add(api);
}else{
getDataset().setAlternativePersistentIndentifiers(Set.of(api));
}
// We keep the old persistent identifier as AlternativePersistentIdentifier with storageLocationDesignator true.
// This keep the link the object store intact, without altering the files.
// IMHO: This command should also update the storage. First, maintenance of the storage becomes a mess if there is a counterintuitive layout.
// Second, it could occur that another object is minted with the old identifier and consequently we have a conflict in our storage system.
// We accept this risk for now since a super user can update the storage manually from old file path to the new one, and remove the AlternativeIdentifer form the database.
// This removed the old identifier totally from the system and avoids all side effects...
// @TODO: Move files on storage once the storage API support a move operation!
ctxt.em().merge(getDataset());
ctxt.em().flush();

logger.fine("Reconciling dataset( id =`" + getDataset().getId() + ")` - Replaced old globalId `" + oldId + " with new globalId `" + getDataset().getGlobalId() + '`');
// notify all users with direct role assignments about the changed persistent identifier
List<RoleAssignment> ras = ctxt.roles().directRoleAssignments(getDataset());
for (RoleAssignment ra : ras) {
for (AuthenticatedUser au : ctxt.roleAssignees().getExplicitUsers(ctxt.roleAssignees().getRoleAssignee(ra.getAssigneeIdentifier()))) {
ctxt.notifications().sendNotification(au, new Timestamp(new Date().getTime()), UserNotification.Type.PIDRECONCILED,getDataset().getLatestVersion().getId(),"Persistent identifier changed!");
}
}

return getDataset();
}

private void reconcileFilePids(CommandContext ctxt, PidProvider currentPidProvider) {
// remove datafile PIDs
try {
for (DataFile df : getDataset().getFiles()) {
if (currentPidProvider.alreadyRegistered(df)) {
currentPidProvider.deleteIdentifier(df);// delete it external
}
GlobalId oldPid=df.getGlobalId();
df.setGlobalId(null); // and remove it internally from data structure
df.setGlobalIdCreateTime(null);
df.setIdentifierRegistered(false);

AlternativePersistentIdentifier api;
api = new AlternativePersistentIdentifier();
api.setProtocol(oldPid.getProtocol());
api.setAuthority(oldPid.getAuthority());
api.setIdentifier(oldPid.getIdentifier());
api.setDvObject(df);
api.setStorageLocationDesignator(true); // cf. Dataset#getIdentifierForFileStorage()
if(df.getAlternativePersistentIndentifiers()!=null) {
df.getAlternativePersistentIndentifiers().add(api);
}else{
df.setAlternativePersistentIndentifiers(Set.of(api));
}
// DataFiles are not a child of DvObjectContainer hence we cannot update the PIDProvider for them! cf. DvObjectContainer.setPidGenerator(this.newPidProvider);
// We don't need to update the PIDProvider since the configuration of the Dataset, which is already updated, is used.
// @TODO: Is this true?
newPidProvider.generatePid(df); // this updates Protocol, Authority, and Identifier and thus a new GlobalID
logger.fine("Reconciling datafile( id =`" + df.getId() + ")` - creating new globalId `" +df.getGlobalId() + '`');
if (!newPidProvider.registerWhenPublished()) {
registerExternalIdentifier(df, ctxt, true); // this updates GlobalIdCreateTime and IdentifierRegistered
}
}
} catch (Exception e) {
logger.log(Level.WARNING, "Identifier deletion was not successful:", e.getMessage());
}
}

@Override
public boolean onSuccess(CommandContext ctxt, Object r) {
//update search index with the state
ctxt.index().asyncIndexDataset(getDataset(), true);
//invalidate all existing Export caches
// currently, Exports are only possible for published datasets...
// so there is no need to invalide caches...
// yet here is the needed snippet
// try {
// ExportService.getInstance().clearAllCachedFormats(getDataset());
// } catch (IOException e) {
// throw new RuntimeException(e);
// }
return true;
}
}
3 changes: 3 additions & 0 deletions src/main/java/propertyFiles/Bundle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ notification.publishFailedPidReg={0} in {1} could not be published due to a fail
notification.workflowFailed=An external workflow run on {0} in {1} has failed. Check your email and/or view the Dataset page which may have additional details. Contact support if this continues to happen.
notification.workflowSucceeded=An external workflow run on {0} in {1} has succeeded. Check your email and/or view the Dataset page which may have additional details.
notification.statusUpdated=The status of dataset {0} has been updated to {1}.
notification.pidreconciled=The persistent identifier of dataset {0} has been updated to `{1}`.
notification.datasetMentioned=Announcement Received: Newly released {0} <a href="{1}">{2}</a> {3} Dataset {4}.

notification.ingestCompleted=Dataset <a href="/dataset.xhtml?persistentId={0}" title="{1}">{1}</a> has one or more tabular files that completed the <a href="{2}/{3}/user/dataset-management.html#tabular-data-files" title="Tabular Data Files - Dataverse User Guide" target="_blank" rel="noopener">tabular ingest process</a> and are available in archival formats.
Expand Down Expand Up @@ -285,6 +286,8 @@ notification.typeDescription.WORKFLOW_FAILURE=External workflow run has failed
notification.typeDescription.STATUSUPDATED=Status of dataset has been updated
notification.typeDescription.DATASETCREATED=Dataset was created by user
notification.typeDescription.DATASETMENTIONED=Dataset was referenced in remote system
notification.typeDescription.PIDRECONCILED=The Persistent identifier of dataset has been updated


groupAndRoles.manageTips=Here is where you can access and manage all the groups you belong to, and the roles you have been assigned.
user.message.signup.label=Create Account
Expand Down
10 changes: 10 additions & 0 deletions src/main/webapp/dataverseuser.xhtml
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,16 @@
<f:param value="#{DatasetUtil:getLocaleExternalStatus(item.theObject.externalStatusLabel)}"/>
</h:outputFormat>
</ui:fragment>
<ui:fragment rendered="#{item.type == 'PIDRECONCILED'}">
<o:importFunctions type="edu.harvard.iq.dataverse.dataset.DatasetUtil" />
<span class="icon-dataset text-icon-inline text-muted"></span>
<h:outputFormat value="#{bundle['notification.pidreconciled']}" escape="false">
<o:param>
<a href="/dataset.xhtml?persistentId=#{item.theObject.getDataset().getGlobalId()}&amp;version=DRAFT&amp;faces-redirect=true" title="#{item.theObject.getDataset().getDisplayName()}">#{item.theObject.getDataset().getDisplayName()}</a>
</o:param>
<f:param value="#{item.theObject.getDataset().getGlobalId()}"/>
</h:outputFormat>
</ui:fragment>
<ui:fragment rendered="#{item.type == 'DATASETMENTIONED'}">
<o:importFunctions type="edu.harvard.iq.dataverse.util.json.JsonUtil" />
<span class="icon-dataset text-icon-inline text-muted"></span>
Expand Down
Loading

0 comments on commit 2d216ea

Please sign in to comment.