From 61abac1352f88a501bb0274d9689b0b3e5c8c017 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 1 Nov 2023 16:42:15 -0400 Subject: [PATCH 01/35] #9464 create json --- .../harvard/iq/dataverse/DataversePage.java | 2 + .../iq/dataverse/DataverseServiceBean.java | 211 ++++++++++++++++++ 2 files changed, 213 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index 943a74327d5..12b7e41b3d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -597,6 +597,8 @@ public void updateOptionsRadio(Long mdbId, Long dsftId) { public String save() { + String test = dataverseService.getCollectionDatasetSchema(dataverse.getId()); + System.out.print(test); List listDFTIL = new ArrayList<>(); if (editMode != null && ( editMode.equals(EditMode.INFO) || editMode.equals(EditMode.CREATE))) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 549b8310122..070fc20a5da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -42,6 +42,7 @@ import jakarta.persistence.NonUniqueResultException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.TypedQuery; +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; /** @@ -80,6 +81,9 @@ public class DataverseServiceBean implements java.io.Serializable { @EJB PermissionServiceBean permissionService; + @EJB + DataverseFieldTypeInputLevelServiceBean dataverseFieldTypeInputLevelService; + @EJB SystemConfig systemConfig; @@ -919,5 +923,212 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { return em.createNativeQuery(cqString).getResultList(); } + + public String getCollectionDatasetSchema(Long dataverseId) { + + List selectedBlocks = new ArrayList<>(); + List requiredDSFT = new ArrayList<>(); + + Dataverse testDV = this.find(dataverseId); + + while (!testDV.isMetadataBlockRoot()) { + if (testDV.getOwner() == null) { + break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value + } + testDV = testDV.getOwner(); + } + + selectedBlocks.addAll(testDV.getMetadataBlocks()); + + for (MetadataBlock mdb : selectedBlocks) { + for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) { + if (!dsft.isChild()) { + DataverseFieldTypeInputLevel dsfIl = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), dsft.getId()); + if (dsfIl != null) { + dsft.setRequiredDV(dsfIl.isRequired()); + dsft.setInclude(dsfIl.isInclude()); + } else { + dsft.setRequiredDV(dsft.isRequired()); + dsft.setInclude(true); + } + if (dsft.isHasChildren()) { + for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) { + DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId()); + if (dsfIlChild != null) { + child.setRequiredDV(dsfIlChild.isRequired()); + child.setInclude(dsfIlChild.isInclude()); + } else { + // in the case of conditionally required (child = true, parent = false) + // we set this to false; i.e this is the default "don't override" value + child.setRequiredDV(child.isRequired() && dsft.isRequired()); + child.setInclude(true); + } + } + } + if(dsft.isRequiredDV()){ + requiredDSFT.add(dsft); + } + } + } + + } + + String reqMDBNames = ""; + List hasReqFields = new ArrayList<>(); + String retval = datasetSchemaPreface; + for (MetadataBlock mdb : selectedBlocks) { + for (DatasetFieldType dsft : requiredDSFT) { + if (dsft.getMetadataBlock().equals(mdb)) { + hasReqFields.add(mdb); + if (!reqMDBNames.isEmpty()) reqMDBNames += ","; + reqMDBNames += "\"" + mdb.getName() + "\""; + break; + } + } + } + + for (MetadataBlock mdb : hasReqFields) { + retval += getCustomMDBSchema(mdb, requiredDSFT); + } + + retval += "\n}\n"; + + retval += endOfjson.replace("blockNames", reqMDBNames); + + return retval; + + } + + private String datasetSchemaPreface = + "{\n" + + " \"$schema\": \"http://json-schema.org/draft-04/schema#\",\n" + + " \"$defs\": {\n" + + " \"field\": {\n" + + " \"type\": \"object\",\n" + + " \"required\": [\"typeClass\", \"multiple\", \"typeName\"],\n" + + " \"properties\": {\n" + + " \"value\": {\n" + + " \"anyOf\": [\n" + + " {\n" + + " \"type\": \"array\"\n" + + " },\n" + + " {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"$ref\": \"#/$defs/field\"\n" + + " }\n" + + " ]\n" + + " },\n" + + " \"typeClass\": {\n" + + " \"type\": \"string\"\n" + + " }\n," + + " \"multiple\": {\n" + + " \"type\": \"boolean\"\n" + + " },\n" + + " \"typeName\": {\n" + + " \"type\": \"string\"\n" + + " }\n" + + " }\n" + + " }\n" + + "},\n" + + "\"type\": \"object\",\n" + + "\"properties\": {\n" + + " \"datasetVersion\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + " \"license\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + " \"name\": {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " \"uri\": {\n" + + " \"type\": \"string\",\n" + + " \"format\": \"uri\"\n" + + " }\n" + + " },\n" + + " \"required\": [\"name\", \"uri\"]\n" + + " },\n" + + " \"metadataBlocks\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + "" ; + + + + private String reqValTemplate = " {\n" + +" \"contains\": {\n" + +" \"properties\": {\n" + +" \"typeName\": {\n" + +" \"const\": \"reqFieldTypeName\"\n" + +" }\n" + +" }\n" + +" }\n" + +" },"; + + private String minItemsTemplate = "\n \"minItems\": numMinItems,\n" + +" \"allOf\": [\n"; + private String endOfReqVal = " ]\n" + +" }\n" + +" },\n" + +" \"required\": [\"fields\"]\n" + +" },"; + + private String endOfjson = ",\n" + +" \"required\": [blockNames]\n" + +" }\n" + +" },\n" + +" \"required\": [\"license\", \"metadataBlocks\"]\n" + +" }\n" + +" },\n" + +" \"required\": [\"datasetVersion\"]\n" + +"}\n"; + + private String startOfMDB = "\"blockName\": {\n" + +" \"type\": \"object\",\n" + +" \"properties\": {\n" + +" \"fields\": {\n" + +" \"type\": \"array\",\n" + +" \"items\": {\n" + +" \"$ref\": \"#/$defs/field\"\n" + +" },"; + + private String getCustomMDBSchema (MetadataBlock mdb, List requiredDSFT){ + String retval = ""; + boolean mdbHasReqField = false; + int numReq = 0; + List requiredThisMDB = new ArrayList<>(); + + for (DatasetFieldType dsft : requiredDSFT ){ + + if(dsft.getMetadataBlock().equals(mdb)){ + numReq++; + mdbHasReqField = true; + requiredThisMDB.add(dsft); + } + } + if (mdbHasReqField){ + retval += startOfMDB.replace("blockName", mdb.getName()); + + retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size())); + int count = 0; + for (DatasetFieldType dsft:requiredThisMDB ){ + count++; + String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName()); + if (count < requiredThisMDB.size()){ + retval += reqValImp + "\n"; + } else { + reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1); + retval += reqValImp+ "\n"; + retval += endOfReqVal; + } + } + + } + + return retval; + } + } From 38f09f6e401d069956b56df94eb03437ca905a0e Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 2 Nov 2023 09:32:20 -0400 Subject: [PATCH 02/35] #9464 fix json schema formatting --- .../edu/harvard/iq/dataverse/DataverseServiceBean.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 070fc20a5da..be2a97541c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -991,7 +991,7 @@ public String getCollectionDatasetSchema(Long dataverseId) { retval += getCustomMDBSchema(mdb, requiredDSFT); } - retval += "\n}\n"; + retval += "\n }"; retval += endOfjson.replace("blockNames", reqMDBNames); @@ -1022,7 +1022,7 @@ public String getCollectionDatasetSchema(Long dataverseId) { " },\n" + " \"typeClass\": {\n" + " \"type\": \"string\"\n" + - " }\n," + + " },\n" + " \"multiple\": {\n" + " \"type\": \"boolean\"\n" + " },\n" + @@ -1085,7 +1085,8 @@ public String getCollectionDatasetSchema(Long dataverseId) { " \"required\": [\"datasetVersion\"]\n" + "}\n"; - private String startOfMDB = "\"blockName\": {\n" + + private String startOfMDB = "" + +" \"blockName\": {\n" + " \"type\": \"object\",\n" + " \"properties\": {\n" + " \"fields\": {\n" + From 5ca4cc08b92b0fe0c4f44bcd3c8da2e32f5ebb47 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 2 Nov 2023 09:48:51 -0400 Subject: [PATCH 03/35] #9464 remove license from required --- .../java/edu/harvard/iq/dataverse/DataverseServiceBean.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index be2a97541c6..5942d4a8010 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -1079,7 +1079,7 @@ public String getCollectionDatasetSchema(Long dataverseId) { " \"required\": [blockNames]\n" + " }\n" + " },\n" + -" \"required\": [\"license\", \"metadataBlocks\"]\n" + +" \"required\": [\"metadataBlocks\"]\n" + " }\n" + " },\n" + " \"required\": [\"datasetVersion\"]\n" + From 02a570aceb89afa0b9f110f76137959d5704b2e4 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 8 Nov 2023 11:34:53 -0500 Subject: [PATCH 04/35] #9464 Add commands, endpoints, IT, etc --- .../harvard/iq/dataverse/DataversePage.java | 2 - .../iq/dataverse/DataverseServiceBean.java | 124 ++++++++++-------- .../harvard/iq/dataverse/api/Dataverses.java | 35 +++++ .../command/impl/GetDatasetSchemaCommand.java | 44 +++++++ .../impl/ValidateDatasetJsonCommand.java | 45 +++++++ .../harvard/iq/dataverse/api/DatasetsIT.java | 41 ++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 21 +++ 7 files changed, 258 insertions(+), 54 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index 12b7e41b3d8..943a74327d5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -597,8 +597,6 @@ public void updateOptionsRadio(Long mdbId, Long dsftId) { public String save() { - String test = dataverseService.getCollectionDatasetSchema(dataverse.getId()); - System.out.print(test); List listDFTIL = new ArrayList<>(); if (editMode != null && ( editMode.equals(EditMode.INFO) || editMode.equals(EditMode.CREATE))) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 5942d4a8010..407cfb343c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -44,6 +44,11 @@ import jakarta.persistence.TypedQuery; import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.everit.json.schema.loader.SchemaLoader; +import org.json.JSONObject; +import org.json.JSONTokener; /** * @@ -924,12 +929,12 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { } - public String getCollectionDatasetSchema(Long dataverseId) { + public String getCollectionDatasetSchema(String dataverseAlias) { List selectedBlocks = new ArrayList<>(); List requiredDSFT = new ArrayList<>(); - Dataverse testDV = this.find(dataverseId); + Dataverse testDV = this.findByAlias(dataverseAlias); while (!testDV.isMetadataBlockRoot()) { if (testDV.getOwner() == null) { @@ -997,7 +1002,60 @@ public String getCollectionDatasetSchema(Long dataverseId) { return retval; - } + } + + private String getCustomMDBSchema (MetadataBlock mdb, List requiredDSFT){ + String retval = ""; + boolean mdbHasReqField = false; + int numReq = 0; + List requiredThisMDB = new ArrayList<>(); + + for (DatasetFieldType dsft : requiredDSFT ){ + + if(dsft.getMetadataBlock().equals(mdb)){ + numReq++; + mdbHasReqField = true; + requiredThisMDB.add(dsft); + } + } + if (mdbHasReqField){ + retval += startOfMDB.replace("blockName", mdb.getName()); + + retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size())); + int count = 0; + for (DatasetFieldType dsft:requiredThisMDB ){ + count++; + String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName()); + if (count < requiredThisMDB.size()){ + retval += reqValImp + "\n"; + } else { + reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1); + retval += reqValImp+ "\n"; + retval += endOfReqVal; + } + } + + } + + return retval; + } + + public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { + JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); + + try { + Schema schema = SchemaLoader.load(rawSchema); + schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid + } catch (ValidationException vx) { + logger.info("Dataset schema error : " + vx); //without classLoader is blows up in actual deployment + return "Dataset schema error : " + vx.getErrorMessage(); + } catch (Exception ex) { + logger.info("Dataset file error : " + ex.getLocalizedMessage()); + return "Dataset file error : " + ex.getLocalizedMessage(); + } + + return "The Dataset json provided is valid for this Dataverse Collection."; + } private String datasetSchemaPreface = "{\n" + @@ -1053,9 +1111,17 @@ public String getCollectionDatasetSchema(Long dataverseId) { " \"metadataBlocks\": {\n" + " \"type\": \"object\",\n" + " \"properties\": {\n" + - "" ; - - + "" ; + + private String startOfMDB = "" + +" \"blockName\": {\n" + +" \"type\": \"object\",\n" + +" \"properties\": {\n" + +" \"fields\": {\n" + +" \"type\": \"array\",\n" + +" \"items\": {\n" + +" \"$ref\": \"#/$defs/field\"\n" + +" },"; private String reqValTemplate = " {\n" + " \"contains\": {\n" + @@ -1085,51 +1151,5 @@ public String getCollectionDatasetSchema(Long dataverseId) { " \"required\": [\"datasetVersion\"]\n" + "}\n"; - private String startOfMDB = "" + -" \"blockName\": {\n" + -" \"type\": \"object\",\n" + -" \"properties\": {\n" + -" \"fields\": {\n" + -" \"type\": \"array\",\n" + -" \"items\": {\n" + -" \"$ref\": \"#/$defs/field\"\n" + -" },"; - - - private String getCustomMDBSchema (MetadataBlock mdb, List requiredDSFT){ - String retval = ""; - boolean mdbHasReqField = false; - int numReq = 0; - List requiredThisMDB = new ArrayList<>(); - - for (DatasetFieldType dsft : requiredDSFT ){ - - if(dsft.getMetadataBlock().equals(mdb)){ - numReq++; - mdbHasReqField = true; - requiredThisMDB.add(dsft); - } - } - if (mdbHasReqField){ - retval += startOfMDB.replace("blockName", mdb.getName()); - - retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size())); - int count = 0; - for (DatasetFieldType dsft:requiredThisMDB ){ - count++; - String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName()); - if (count < requiredThisMDB.size()){ - retval += reqValImp + "\n"; - } else { - reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1); - retval += reqValImp+ "\n"; - retval += endOfReqVal; - } - } - - } - - return retval; - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index d0711aefa5f..fabb33e328a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -44,6 +44,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetSchemaCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand; @@ -68,6 +69,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateExplicitGroupCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetsCommand; +import edu.harvard.iq.dataverse.engine.command.impl.ValidateDatasetJsonCommand; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -232,6 +234,39 @@ public Response addDataverse(@Context ContainerRequestContext crc, String body, } } + + @POST + @AuthRequired + @Path("{identifier}/validateDatasetJson") + @Consumes("application/json") + public Response validateDatasetJson(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String idtf) { + User u = getRequestUser(crc); + try { + String validationMessage = execCommand(new ValidateDatasetJsonCommand(createDataverseRequest(u), findDataverseOrDie(idtf), body)); + return ok(validationMessage); + } catch (WrappedResponse ex) { + Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); + return ex.getResponse(); + } + } + + @GET + @AuthRequired + @Path("{identifier}/datasetSchema") + @Produces(MediaType.APPLICATION_JSON) + public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf) { + User u = getRequestUser(crc); + + try { + String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf))); + return ok(datasetSchema); + } catch (WrappedResponse ex) { + Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); + return ex.getResponse(); + } + } + + @POST @AuthRequired diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java new file mode 100644 index 00000000000..48f135dba32 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java @@ -0,0 +1,44 @@ + +package edu.harvard.iq.dataverse.engine.command.impl; + + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + + +import java.util.logging.Logger; + +/** + * + * @author stephenkraffmiller + */ +@RequiredPermissions(Permission.AddDataset) +public class GetDatasetSchemaCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + + public GetDatasetSchemaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + dataverse = target; + } + + @Override + public String execute(CommandContext ctxt) throws CommandException { + try { + return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias()); + + } catch (Exception ex) { + String error = "Exception caught in getting the schema for this collection. Error: " + ex; + throw new IllegalCommandException(error, this); + } + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java new file mode 100644 index 00000000000..a01aee1db9e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java @@ -0,0 +1,45 @@ + +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + + +import java.util.logging.Logger; + +/** + * + * @author stephenkraffmiller + */ +@RequiredPermissions(Permission.AddDataset) +public class ValidateDatasetJsonCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + private final String datasetJson; + + public ValidateDatasetJsonCommand(DataverseRequest aRequest, Dataverse target, String datasetJsonIn) { + super(aRequest, target); + dataverse = target; + datasetJson = datasetJsonIn; + } + + @Override + public String execute(CommandContext ctxt) throws CommandException { + try { + String retVal = ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson); + return retVal; + } catch (Exception ex) { + String error = "Exception caught in getting the schema for this collection. Error: " + ex; + return error; + } + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 56bf53c1c99..14131c0fa57 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -162,6 +162,47 @@ public static void afterClass() { .statusCode(200); */ } + + @Test + public void testCollectionSchema(){ + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response getCollectionSchemaResponse = UtilIT.getCollectionSchema(dataverseAlias, apiToken); + getCollectionSchemaResponse.prettyPrint(); + getCollectionSchemaResponse.then().assertThat() + .statusCode(200); + + String expectedJson = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch1.json"); + + Response validateDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, expectedJson, apiToken); + validateDatasetJsonResponse.prettyPrint(); + validateDatasetJsonResponse.then().assertThat() + .statusCode(200); + + + String pathToJsonFile = "scripts/search/tests/data/datasetMissingReqFields.json"; + + String jsonIn = UtilIT.getDatasetJson(pathToJsonFile); + + Response validateBadDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, jsonIn, apiToken); + validateBadDatasetJsonResponse.prettyPrint(); + validateBadDatasetJsonResponse.then().assertThat() + .statusCode(200); + + + validateBadDatasetJsonResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(containsString("Dataset schema error")); + + } @Test public void testCreateDataset() { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e3a7fd0cfc3..0443bd2e35e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -406,6 +406,27 @@ static Response getGuestbookResponses(String dataverseAlias, Long guestbookId, S } return requestSpec.get("/api/dataverses/" + dataverseAlias + "/guestbookResponses/"); } + + static Response getCollectionSchema (String dataverseAlias, String apiToken){ + + Response getCollectionSchemaResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .get("/api/dataverses/" + dataverseAlias + "/datasetSchema"); + return getCollectionSchemaResponse; + + } + + static Response validateDatasetJson (String dataverseAlias, String datasetJson, String apiToken){ + + Response getValidateDatasetJsonResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(datasetJson) + .contentType("application/json") + .post("/api/dataverses/" + dataverseAlias + "/validateDatasetJson"); + return getValidateDatasetJsonResponse; + + } static Response createRandomDatasetViaNativeApi(String dataverseAlias, String apiToken) { return createRandomDatasetViaNativeApi(dataverseAlias, apiToken, false); From 521e8d24eaf1fb9e0ad144a336a06ed384f4a55b Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 8 Nov 2023 11:53:23 -0500 Subject: [PATCH 05/35] #9464 delete test dataverse --- src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 14131c0fa57..c43a0c251a2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -202,6 +202,10 @@ public void testCollectionSchema(){ .statusCode(OK.getStatusCode()) .body(containsString("Dataset schema error")); + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteDataverseResponse.prettyPrint(); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + } @Test From 7c630f70a4e548bf51c4ed27ee1a50825d795379 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 8 Nov 2023 14:13:14 -0500 Subject: [PATCH 06/35] #9464 add release note --- doc/release-notes/9464-json-validation.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 doc/release-notes/9464-json-validation.md diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md new file mode 100644 index 00000000000..3703b420225 --- /dev/null +++ b/doc/release-notes/9464-json-validation.md @@ -0,0 +1,3 @@ +Functionality has been added to help validate dataset json prior to dataset creation. There are two new API endpoints in this release. The first takes in a Dataverse Collection alias and returns a custom schema based on the required fields of the collection. +The second takes in a Dataverse collection alias and a dataset json file and does an automated validation of the json file against the custom schema for the collection. (Issue 9464 and 9465) + From 720b3b0f488482b04cd0b7f1c15d129a387bbb79 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 8 Nov 2023 15:00:28 -0500 Subject: [PATCH 07/35] add doc for get schema --- doc/sphinx-guides/source/api/native-api.rst | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 1992390410c..e7e7c6fc280 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -505,6 +505,28 @@ The fully expanded example above (without environment variables) looks like this .. note:: Previous endpoints ``$SERVER/api/dataverses/$id/metadatablocks/:isRoot`` and ``POST https://$SERVER/api/dataverses/$id/metadatablocks/:isRoot?key=$apiKey`` are deprecated, but supported. +.. _get-dataset-json-schema: + +Retrieve a JSON schema for a Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Retrieves a JSON schema customized for a given Dataverse collection in order to validate a Dataset JSON file prior to creating the dataset: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID//datasetSchema" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root//datasetSchema" + + .. _create-dataset-command: From 7be534771370f0e588920c818cf0b0e42ead4111 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 8 Nov 2023 15:32:21 -0500 Subject: [PATCH 08/35] #9464 fix typo --- doc/sphinx-guides/source/api/native-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index e7e7c6fc280..5d784eebd64 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -524,7 +524,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root//datasetSchema" + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/datasetSchema" From c553d1b3a1e7b69174477c48ada4952ec2bd9e3b Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 8 Nov 2023 16:15:50 -0500 Subject: [PATCH 09/35] Add permission note --- doc/sphinx-guides/source/api/native-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 5d784eebd64..f170cfd53f9 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -526,7 +526,7 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/datasetSchema" - +Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint. .. _create-dataset-command: From a080f84b8cd20693e14a227993a6c6c33510c020 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 9 Nov 2023 09:35:12 -0500 Subject: [PATCH 10/35] #9464 add doc for validate json --- doc/sphinx-guides/source/api/native-api.rst | 25 ++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index f170cfd53f9..1af98b6c20e 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -518,7 +518,7 @@ Retrieves a JSON schema customized for a given Dataverse collection in order to export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID//datasetSchema" + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/datasetSchema" The fully expanded example above (without environment variables) looks like this: @@ -528,6 +528,29 @@ The fully expanded example above (without environment variables) looks like this Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint. +.. _validate-dataset-json: + +Validate Dataset.json file for a Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Validates a Dataset json file customized for a given Dataverse collection prior to creating the dataset: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/validateDatasetJson" --upload-file dataset.json -H 'Content-type:application/json' + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" --upload-file dataset.json -H 'Content-type:application/json' + +Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint. + .. _create-dataset-command: Create a Dataset in a Dataverse Collection From 7d3836646c1fa72262acc8a1de6242277b5948d5 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 9 Nov 2023 14:23:53 -0500 Subject: [PATCH 11/35] #9464 add strings to bundle --- .../harvard/iq/dataverse/DataverseServiceBean.java | 13 +++++++------ src/main/java/propertyFiles/Bundle.properties | 3 +++ .../edu/harvard/iq/dataverse/api/DatasetsIT.java | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 407cfb343c4..bbf35535915 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -18,6 +18,7 @@ import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.SolrSearchResult; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.File; @@ -1047,14 +1048,14 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { Schema schema = SchemaLoader.load(rawSchema); schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid } catch (ValidationException vx) { - logger.info("Dataset schema error : " + vx); //without classLoader is blows up in actual deployment - return "Dataset schema error : " + vx.getErrorMessage(); - } catch (Exception ex) { - logger.info("Dataset file error : " + ex.getLocalizedMessage()); - return "Dataset file error : " + ex.getLocalizedMessage(); + logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + } catch (Exception ex) { + logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); } - return "The Dataset json provided is valid for this Dataverse Collection."; + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.succeeded"); } private String datasetSchemaPreface = diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 79887f7e76c..150dd9048a1 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2670,6 +2670,9 @@ dataverses.api.move.dataverse.error.forceMove=Please use the parameter ?forceMov dataverses.api.create.dataset.error.mustIncludeVersion=Please provide initial version in the dataset json dataverses.api.create.dataset.error.superuserFiles=Only a superuser may add files via this api dataverses.api.create.dataset.error.mustIncludeAuthorName=Please provide author name in the dataset json +dataverses.api.validate.json.succeeded=The Dataset JSON provided is valid for this Dataverse Collection. +dataverses.api.validate.json.failed=The Dataset JSON provided failed validation with the following error: +dataverses.api.validate.json.exception=Validation failed with following exception: #Access.java access.api.allowRequests.failure.noDataset=Could not find Dataset with id: {0} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index c43a0c251a2..24fbcf13d23 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -200,7 +200,7 @@ public void testCollectionSchema(){ validateBadDatasetJsonResponse.then().assertThat() .statusCode(OK.getStatusCode()) - .body(containsString("Dataset schema error")); + .body(containsString("failed validation")); Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); deleteDataverseResponse.prettyPrint(); From 7887a0527ddeceabd10a62bde68fcd38ed9cd824 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 9 Nov 2023 14:51:55 -0500 Subject: [PATCH 12/35] #9464 simplify commands --- .../engine/command/impl/GetDatasetSchemaCommand.java | 10 ++-------- .../command/impl/ValidateDatasetJsonCommand.java | 10 +++------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java index 48f135dba32..2d5e1251614 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java @@ -31,14 +31,8 @@ public GetDatasetSchemaCommand(DataverseRequest aRequest, Dataverse target) { } @Override - public String execute(CommandContext ctxt) throws CommandException { - try { - return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias()); - - } catch (Exception ex) { - String error = "Exception caught in getting the schema for this collection. Error: " + ex; - throw new IllegalCommandException(error, this); - } + public String execute(CommandContext ctxt) throws CommandException { + return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java index a01aee1db9e..ae1a89c3661 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java @@ -34,12 +34,8 @@ public ValidateDatasetJsonCommand(DataverseRequest aRequest, Dataverse target, S @Override public String execute(CommandContext ctxt) throws CommandException { - try { - String retVal = ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson); - return retVal; - } catch (Exception ex) { - String error = "Exception caught in getting the schema for this collection. Error: " + ex; - return error; - } + + return ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson); + } } From 437e7ccd480dbae405238faffb9fff8a8317218d Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 13 Nov 2023 09:56:16 -0500 Subject: [PATCH 13/35] #9464 remove unused import --- src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index fabb33e328a..557b7df202b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -128,7 +128,6 @@ import java.util.Optional; import java.util.stream.Collectors; import jakarta.servlet.http.HttpServletResponse; -import jakarta.validation.constraints.NotNull; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.StreamingOutput; From 73593acb1bcdb9ba1d62e47310753e905b2546dd Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 17 Nov 2023 15:17:28 -0500 Subject: [PATCH 14/35] #9464 query by dvo. update IT --- .../dataverse/metrics/MetricsServiceBean.java | 33 ++++++++++--------- .../harvard/iq/dataverse/api/MetricsIT.java | 14 +++++--- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 79369207963..832dda5ced9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat + "from datasetversion\n" + "where versionstate='RELEASED' \n" + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n") - + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "") - + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n " : "") + + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "") + + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n " : "") + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n") + "group by dataset_id) as subq group by subq.date order by date;" @@ -156,11 +156,11 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat * @param d */ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { - String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -189,7 +189,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + "and \n" @@ -212,8 +212,9 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + " from datasetversion\n" + " join dataset on dataset.id = datasetversion.dataset_id\n" + + " join dvobject on dataset.id = dvobject.id\n" + " where versionstate='RELEASED'\n" + - " and dataset.harvestingclient_id is null\n" + + " and dvobject.harvestingclient_id is null\n" + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + " group by dataset_id\n" + "))\n"; @@ -225,7 +226,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // so the query is simpler: String harvestOriginClause = "(\n" + " datasetversion.dataset_id = dataset.id\n" + - " AND dataset.harvestingclient_id IS NOT null \n" + + " AND dvobject.harvestingclient_id IS NOT null \n" + " AND date_trunc('month', datasetversion.createtime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + ")\n"; @@ -244,7 +245,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n" + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n" + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n") + + "JOIN dvobject ON dvobject.id = dataset.id\n" + "WHERE\n" + originClause + "AND datasetfieldtype.name = 'subject'\n" @@ -258,11 +259,11 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio } public long datasetsPastDays(int days, String dataLocation, Dataverse d) { - String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -276,7 +277,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and \n" @@ -304,7 +305,7 @@ public JsonArray filesTimeSeries(Dataverse d) { + "where datasetversion.id=filemetadata.datasetversion_id\n" + "and versionstate='RELEASED' \n" + "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n" - + "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " + + "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " + ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ") + "group by filemetadata.id) as subq group by subq.date order by date;"); logger.log(Level.FINE, "Metric query: {0}", query); @@ -327,11 +328,11 @@ public long filesToMonth(String yyyymm, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED'\n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" - + "and dataset.harvestingclient_id is null\n" + + "and dvobject.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); @@ -350,11 +351,11 @@ public long filesPastDays(int days, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + + "join dvobject on dvobject.id = dataset.id\n" + "where versionstate='RELEASED'\n" + "and releasetime > current_date - interval '" + days + "' day\n" + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") - + "and dataset.harvestingclient_id is null\n" + + "and dvobject.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java index e3328eefb4a..b961a86dc0b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java @@ -30,7 +30,7 @@ public static void cleanUpClass() { @Test public void testGetDataversesToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsDataversesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -54,7 +54,7 @@ public void testGetDataversesToMonth() { @Test public void testGetDatasetsToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -77,7 +77,7 @@ public void testGetDatasetsToMonth() { @Test public void testGetFilesToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsFilesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -100,7 +100,7 @@ public void testGetFilesToMonth() { @Test public void testGetDownloadsToMonth() { - String yyyymm = "2018-04"; + String yyyymm = "2023-04"; // yyyymm = null; Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -283,6 +283,12 @@ public void testGetDatasetsBySubject() { response = UtilIT.metricsDatasetsBySubject("dataLocation=local"); response.then().assertThat() .statusCode(OK.getStatusCode()); + + //Test ok when passing remote + response = UtilIT.metricsDatasetsBySubject("dataLocation=remote"); + response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); } @Test From e4ede35ea8a57afc8830dc63619bed3b660da8ff Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 20 Nov 2023 09:37:27 -0500 Subject: [PATCH 15/35] #9464 fix logger reference --- .../engine/command/impl/ValidateDatasetJsonCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java index ae1a89c3661..619740ddd89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java @@ -21,7 +21,7 @@ @RequiredPermissions(Permission.AddDataset) public class ValidateDatasetJsonCommand extends AbstractCommand { - private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName()); + private static final Logger logger = Logger.getLogger(ValidateDatasetJsonCommand.class.getCanonicalName()); private final Dataverse dataverse; private final String datasetJson; From 766c9c3da73a02fefa9c465c26e452cbd0d79f62 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 20 Nov 2023 13:32:25 -0500 Subject: [PATCH 16/35] #9464 add base schema as a file --- .../iq/dataverse/baseDatasetSchema.json | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json diff --git a/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json b/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json new file mode 100644 index 00000000000..a37e216b2ea --- /dev/null +++ b/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json @@ -0,0 +1,123 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "$defs": { + "field": { + "type": "object", + "required": ["typeClass", "multiple", "typeName"], + "properties": { + "value": { + "anyOf": [ + { + "type": "array" + }, + { + "type": "string" + }, + { + "$ref": "#/$defs/field" + } + ] + }, + "typeClass": { + "type": "string" + }, + "multiple": { + "type": "boolean" + }, + "typeName": { + "type": "string" + } + } + } +}, +"type": "object", +"properties": { + "datasetVersion": { + "type": "object", + "properties": { + "license": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["name", "uri"] + }, + "metadataBlocks": { + "type": "object", + "properties": { + "citation": { + "type": "object", + "properties": { + "fields": { + "type": "array", + "items": { + "$ref": "#/$defs/field" + }, + "minItems": 5, + "allOf": [ + { + "contains": { + "properties": { + "typeName": { + "const": "title" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "author" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "datasetContact" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "dsDescription" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "subject" + } + } + } + } + ] + } + }, + "required": ["fields"] + } + }, + "required": ["citation"] + } + }, + "required": ["metadataBlocks"] + } + }, + "required": ["datasetVersion"] +} + From c82faf967d62dc7e076d59885c77548cc90e206b Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 21 Nov 2023 13:46:36 -0500 Subject: [PATCH 17/35] #9464 fix formatting --- .../dataverse/metrics/MetricsServiceBean.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 832dda5ced9..ff283bcda80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -207,17 +207,17 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // A published local datasets may have more than one released version! // So that's why we have to jump through some extra hoops below // in order to select the latest one: - String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" + - "(\n" + - "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + - " from datasetversion\n" + - " join dataset on dataset.id = datasetversion.dataset_id\n" + - " join dvobject on dataset.id = dvobject.id\n" + - " where versionstate='RELEASED'\n" + - " and dvobject.harvestingclient_id is null\n" + - " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + - " group by dataset_id\n" + - "))\n"; + String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" + + "(\n" + + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + + " from datasetversion\n" + + " join dataset on dataset.id = datasetversion.dataset_id\n" + + " join dvobject on dataset.id = dvobject.id\n" + + " where versionstate='RELEASED'\n" + + " and dvobject.harvestingclient_id is null\n" + + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + + " group by dataset_id\n" + + "))\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated From 44a07a31b8baf84fd6bfd5fdb846d0976a52f1aa Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 21 Nov 2023 13:54:39 -0500 Subject: [PATCH 18/35] #9464 more code cleanup --- .../dataverse/metrics/MetricsServiceBean.java | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index ff283bcda80..2b62c6cd9a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -51,7 +51,7 @@ public class MetricsServiceBean implements Serializable { /** Dataverses */ - + public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) { Query query = em.createNativeQuery("" + "select distinct to_char(date_trunc('month', dvobject.publicationdate),'YYYY-MM') as month, count(date_trunc('month', dvobject.publicationdate))\n" @@ -64,7 +64,7 @@ public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) { List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -129,9 +129,9 @@ public List dataversesBySubject(Dataverse d) { /** Datasets */ - + public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dataverse d) { - Query query = em.createNativeQuery( + Query query = em.createNativeQuery( "select distinct date, count(dataset_id)\n" + "from (\n" + "select min(to_char(COALESCE(releasetime, createtime), 'YYYY-MM')) as date, dataset_id\n" @@ -149,8 +149,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - - + + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -180,10 +180,10 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { // But do not use this notation if you need the values returned to // meaningfully identify the datasets! - + Query query = em.createNativeQuery( - - + + "select count(*)\n" + "from (\n" + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" @@ -214,7 +214,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio + " join dataset on dataset.id = datasetversion.dataset_id\n" + " join dvobject on dataset.id = dvobject.id\n" + " where versionstate='RELEASED'\n" - + " and dvobject.harvestingclient_id is null\n" + + " and dvobject.harvestingclient_id is null" + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + " group by dataset_id\n" + "))\n"; @@ -313,7 +313,7 @@ public JsonArray filesTimeSeries(Dataverse d) { return MetricsUtil.timeSeriesToJson(results); } - + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -390,7 +390,7 @@ public JsonArray filesByType(Dataverse d) { return jab.build(); } - + public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) { Query query = em.createNativeQuery("SELECT DISTINCT to_char(" + (published ? "ob.publicationdate" : "ob.createdate") + ",'YYYY-MM') as date, df.contenttype, count(df.id), coalesce(sum(df.filesize),0) " + " FROM DataFile df, DvObject ob" @@ -403,13 +403,13 @@ public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) { logger.log(Level.FINE, "Metric query: {0}", query); List results = query.getResultList(); return MetricsUtil.timeSeriesByTypeToJson(results); - + } - /** Downloads + /** Downloads * @param d * @throws ParseException */ - + public JsonArray downloadsTimeSeries(Dataverse d) { // ToDo - published only? Query earlyDateQuery = em.createNativeQuery("" @@ -433,11 +433,11 @@ public JsonArray downloadsTimeSeries(Dataverse d) { List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - + /* * This includes getting historic download without a timestamp if query * is earlier than earliest timestamped record - * + * * @param yyyymm Month in YYYY-MM format. */ public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException { @@ -460,7 +460,7 @@ public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException { + "where (date_trunc('month', responsetime) <= to_date('" + yyyymm + "','YYYY-MM')" + "or responsetime is NULL)\n" // includes historic guestbook records without date + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" - + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") + + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") ); logger.log(Level.FINE, "Metric query: {0}", query); return (long) query.getSingleResult(); @@ -488,7 +488,7 @@ public long downloadsPastDays(int days, Dataverse d) { return (long) query.getSingleResult(); } - + public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) { Query query = em.createNativeQuery("select distinct to_char(gb.responsetime, 'YYYY-MM') as date, ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") " + " FROM guestbookresponse gb, DvObject ob" @@ -502,7 +502,7 @@ public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) { return MetricsUtil.timeSeriesByIDAndPIDToJson(results); } - + public JsonArray fileDownloads(String yyyymm, Dataverse d, boolean uniqueCounts) { Query query = em.createNativeQuery("select ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") " + " FROM guestbookresponse gb, DvObject ob" @@ -544,7 +544,7 @@ public JsonArray uniqueDownloadsTimeSeries(Dataverse d) { return MetricsUtil.timeSeriesByPIDToJson(results); } - + public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) { //select distinct count(distinct email),dataset_id, date_trunc('month', responsetime) from guestbookresponse group by dataset_id, date_trunc('month',responsetime) order by dataset_id,date_trunc('month',responsetime); @@ -572,10 +572,10 @@ public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) { return jab.build(); } - - //MDC - - + + //MDC + + public JsonArray mdcMetricTimeSeries(MetricType metricType, String country, Dataverse d) { Query query = em.createNativeQuery("SELECT distinct substring(monthyear from 1 for 7) as date, coalesce(sum(" + metricType.toString() + "),0) as count FROM DatasetMetrics\n" + ((d == null) ? "" : "WHERE dataset_id in ( " + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ")\n") @@ -747,7 +747,7 @@ public Metric getMetric(String name, String dataLocation, String dayString, Data // https://github.com/DANS-KNAW/dataverse/blob/dans-develop/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsDansServiceBean.java /** - * + * * @param dvId - parent dataverse id * @param dtype - type of object to return 'Dataverse' or 'Dataset' * @return - list of objects of specified type included in the subtree (includes parent dataverse if dtype is 'Dataverse') @@ -769,7 +769,7 @@ private String getCommaSeparatedIdStringForSubtree(Dataverse d, String dtype) { } private List getChildrenIdsRecursively(Long dvId, String dtype, DatasetVersion.VersionState versionState) { - + //Intended to be called only with dvId != null String sql = "WITH RECURSIVE querytree AS (\n" + " SELECT id, dtype, owner_id, publicationdate\n" From 7d687e9e06f9a70724650b13f91eab5a2a767e97 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 21 Nov 2023 13:59:38 -0500 Subject: [PATCH 19/35] #9464 third time's the charm? --- .../edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 2b62c6cd9a7..a50797fe443 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -214,8 +214,8 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio + " join dataset on dataset.id = datasetversion.dataset_id\n" + " join dvobject on dataset.id = dvobject.id\n" + " where versionstate='RELEASED'\n" - + " and dvobject.harvestingclient_id is null" - + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + + " and dvobject.harvestingclient_id is null" + + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + " group by dataset_id\n" + "))\n"; From 212baf2ed89f21d9d66da2b5ef6c7652c505141e Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 27 Nov 2023 14:41:43 -0500 Subject: [PATCH 20/35] #9464 return json object as api response --- .../DataverseFieldTypeInputLevel.java | 5 +- ...taverseFieldTypeInputLevelServiceBean.java | 10 ++++ .../iq/dataverse/DataverseServiceBean.java | 55 ++++++++++++++++--- .../harvard/iq/dataverse/api/Dataverses.java | 3 +- 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java index c4749be0cb3..a3425987bf8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java @@ -30,8 +30,9 @@ @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdDatasetFieldTypeId", query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id = :datasetFieldTypeId"), @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdAndDatasetFieldTypeIdList", - query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList") - + query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList"), + @NamedQuery(name = "DataverseFieldTypeInputLevel.findRequiredByDataverseId", + query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.required = 'true' ") }) @Table(name="DataverseFieldTypeInputLevel" , uniqueConstraints={ diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java index 66c700f59ce..1bd290ecc4d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java @@ -88,6 +88,16 @@ public DataverseFieldTypeInputLevel findByDataverseIdDatasetFieldTypeId(Long dat return null; } } + + public List findRequiredByDataverseId(Long dataverseId) { + Query query = em.createNamedQuery("DataverseFieldTypeInputLevel.findRequiredByDataverseId", DataverseFieldTypeInputLevel.class); + query.setParameter("dataverseId", dataverseId); + try{ + return query.getResultList(); + } catch ( NoResultException nre ) { + return null; + } + } public void delete(DataverseFieldTypeInputLevel dataverseFieldTypeInputLevel) { em.remove(em.merge(dataverseFieldTypeInputLevel)); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index bbf35535915..ed46caf65a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.File; import java.io.IOException; import java.sql.Timestamp; @@ -43,6 +44,8 @@ import jakarta.persistence.NonUniqueResultException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.TypedQuery; +import java.nio.file.Files; +import java.nio.file.Paths; import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; import org.everit.json.schema.Schema; @@ -931,12 +934,9 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { public String getCollectionDatasetSchema(String dataverseAlias) { - - List selectedBlocks = new ArrayList<>(); - List requiredDSFT = new ArrayList<>(); - + Dataverse testDV = this.findByAlias(dataverseAlias); - + while (!testDV.isMetadataBlockRoot()) { if (testDV.getOwner() == null) { break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value @@ -944,6 +944,25 @@ public String getCollectionDatasetSchema(String dataverseAlias) { testDV = testDV.getOwner(); } + /* Couldn't get the 'return base if no extra required fields to work with the path provided + leaving it as 'out of scope' for now SEK 11/27/2023 + + List required = new ArrayList<>(); + + required = dataverseFieldTypeInputLevelService.findRequiredByDataverseId(testDV.getId()); + + if (required == null || required.isEmpty()){ + String pathToJsonFile = "src/main/resources/edu/harvas/iq/dataverse/baseDatasetSchema.json"; + String baseSchema = getBaseSchemaStringFromFile(pathToJsonFile); + if (baseSchema != null && !baseSchema.isEmpty()){ + return baseSchema; + } + } + + */ + List selectedBlocks = new ArrayList<>(); + List requiredDSFT = new ArrayList<>(); + selectedBlocks.addAll(testDV.getMetadataBlocks()); for (MetadataBlock mdb : selectedBlocks) { @@ -992,9 +1011,13 @@ public String getCollectionDatasetSchema(String dataverseAlias) { } } } - + int countMDB = 0; for (MetadataBlock mdb : hasReqFields) { + if (countMDB>0){ + retval += ","; + } retval += getCustomMDBSchema(mdb, requiredDSFT); + countMDB++; } retval += "\n }"; @@ -1037,7 +1060,7 @@ private String getCustomMDBSchema (MetadataBlock mdb, List req } } - + return retval; } @@ -1058,6 +1081,20 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { return BundleUtil.getStringFromBundle("dataverses.api.validate.json.succeeded"); } + static String getBaseSchemaStringFromFile(String pathToJsonFile) { + File datasetSchemaJson = new File(pathToJsonFile); + try { + String datasetSchemaAsJson = new String(Files.readAllBytes(Paths.get(datasetSchemaJson.getAbsolutePath()))); + return datasetSchemaAsJson; + } catch (IOException ex) { + logger.info("IO - failed to get schema file - will build on fly " +ex.getMessage()); + return null; + } catch (Exception e){ + logger.info("Other exception - failed to get schema file - will build on fly. " + e.getMessage()); + return null; + } + } + private String datasetSchemaPreface = "{\n" + " \"$schema\": \"http://json-schema.org/draft-04/schema#\",\n" + @@ -1140,7 +1177,7 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { " }\n" + " },\n" + " \"required\": [\"fields\"]\n" + -" },"; +" }"; private String endOfjson = ",\n" + " \"required\": [blockNames]\n" + @@ -1152,5 +1189,5 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { " \"required\": [\"datasetVersion\"]\n" + "}\n"; - + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index 557b7df202b..c6ae619f36a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -258,7 +258,8 @@ public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathPara try { String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf))); - return ok(datasetSchema); + JsonObject jsonObject = JsonUtil.getJsonObject(datasetSchema); + return ok(jsonObject); } catch (WrappedResponse ex) { Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); return ex.getResponse(); From 93670260de18059ff75a2fa3f8ca00e395b64110 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 27 Nov 2023 14:46:30 -0500 Subject: [PATCH 21/35] #9464 revert harvesting changes made in error --- .../dataverse/metrics/MetricsServiceBean.java | 51 +++++++++---------- .../harvard/iq/dataverse/api/MetricsIT.java | 14 ++--- 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index a50797fe443..1b5619c53e0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -138,8 +138,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat + "from datasetversion\n" + "where versionstate='RELEASED' \n" + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n") - + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " : "") - + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dvobject.harvestingclient_id IS NOT NULL\n " : "") + + ((DATA_LOCATION_LOCAL.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " : "") + + ((DATA_LOCATION_REMOTE.equals(dataLocation)) ? "and dataset.harvestingclient_id IS NOT NULL\n " : "") + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + (((d == null)&&(DATA_LOCATION_ALL.equals(dataLocation))) ? "" : ")\n") + "group by dataset_id) as subq group by subq.date order by date;" @@ -156,11 +156,11 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat * @param d */ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { - String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dvobject.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(date_trunc('month', createtime) <= to_date('" + yyyymm + "','YYYY-MM') and dataset.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -189,7 +189,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + "join dvobject on dvobject.id = dataset.id\n" + + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + "and \n" @@ -207,17 +207,16 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // A published local datasets may have more than one released version! // So that's why we have to jump through some extra hoops below // in order to select the latest one: - String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" - + "(\n" - + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" - + " from datasetversion\n" - + " join dataset on dataset.id = datasetversion.dataset_id\n" - + " join dvobject on dataset.id = dvobject.id\n" - + " where versionstate='RELEASED'\n" - + " and dvobject.harvestingclient_id is null" - + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" - + " group by dataset_id\n" - + "))\n"; + String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" + + "(\n" + + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + + " from datasetversion\n" + + " join dataset on dataset.id = datasetversion.dataset_id\n" + + " where versionstate='RELEASED'\n" + + " and dataset.harvestingclient_id is null\n" + + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + + " group by dataset_id\n" + + "))\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated @@ -226,7 +225,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // so the query is simpler: String harvestOriginClause = "(\n" + " datasetversion.dataset_id = dataset.id\n" + - " AND dvobject.harvestingclient_id IS NOT null \n" + + " AND dataset.harvestingclient_id IS NOT null \n" + " AND date_trunc('month', datasetversion.createtime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + ")\n"; @@ -245,7 +244,7 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio + "JOIN datasetfieldtype ON datasetfieldtype.id = controlledvocabularyvalue.datasetfieldtype_id\n" + "JOIN datasetversion ON datasetversion.id = datasetfield.datasetversion_id\n" + "JOIN dataset ON dataset.id = datasetversion.dataset_id\n" - + "JOIN dvobject ON dvobject.id = dataset.id\n" + + ((d == null) ? "" : "JOIN dvobject ON dvobject.id = dataset.id\n") + "WHERE\n" + originClause + "AND datasetfieldtype.name = 'subject'\n" @@ -259,11 +258,11 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio } public long datasetsPastDays(int days, String dataLocation, Dataverse d) { - String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NULL)\n"; + String dataLocationLine = "(releasetime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NULL)\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL //we have to use createtime for harvest as post dvn3 harvests do not have releasetime populated - String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dvobject.harvestingclient_id IS NOT NULL)\n"; + String harvestBaseLine = "(createtime > current_date - interval '" + days + "' day and dataset.harvestingclient_id IS NOT NULL)\n"; if (DATA_LOCATION_REMOTE.equals(dataLocation)) { dataLocationLine = harvestBaseLine; // replace } else if (DATA_LOCATION_ALL.equals(dataLocation)) { @@ -277,7 +276,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + "join dvobject on dvobject.id = dataset.id\n" + + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + "where versionstate='RELEASED' \n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and \n" @@ -305,7 +304,7 @@ public JsonArray filesTimeSeries(Dataverse d) { + "where datasetversion.id=filemetadata.datasetversion_id\n" + "and versionstate='RELEASED' \n" + "and dataset_id in (select dataset.id from dataset, dvobject where dataset.id=dvobject.id\n" - + "and dvobject.harvestingclient_id IS NULL and publicationdate is not null\n " + + "and dataset.harvestingclient_id IS NULL and publicationdate is not null\n " + ((d == null) ? ")" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + "))\n ") + "group by filemetadata.id) as subq group by subq.date order by date;"); logger.log(Level.FINE, "Metric query: {0}", query); @@ -328,11 +327,11 @@ public long filesToMonth(String yyyymm, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + "join dvobject on dvobject.id = dataset.id\n" + + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + "where versionstate='RELEASED'\n" + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" - + "and dvobject.harvestingclient_id is null\n" + + "and dataset.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); @@ -351,11 +350,11 @@ public long filesPastDays(int days, Dataverse d) { + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" - + "join dvobject on dvobject.id = dataset.id\n" + + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") + "where versionstate='RELEASED'\n" + "and releasetime > current_date - interval '" + days + "' day\n" + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") - + "and dvobject.harvestingclient_id is null\n" + + "and dataset.harvestingclient_id is null\n" + "group by dataset_id \n" + ");" ); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java index b961a86dc0b..e3328eefb4a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java @@ -30,7 +30,7 @@ public static void cleanUpClass() { @Test public void testGetDataversesToMonth() { - String yyyymm = "2023-04"; + String yyyymm = "2018-04"; // yyyymm = null; Response response = UtilIT.metricsDataversesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -54,7 +54,7 @@ public void testGetDataversesToMonth() { @Test public void testGetDatasetsToMonth() { - String yyyymm = "2023-04"; + String yyyymm = "2018-04"; // yyyymm = null; Response response = UtilIT.metricsDatasetsToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -77,7 +77,7 @@ public void testGetDatasetsToMonth() { @Test public void testGetFilesToMonth() { - String yyyymm = "2023-04"; + String yyyymm = "2018-04"; // yyyymm = null; Response response = UtilIT.metricsFilesToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -100,7 +100,7 @@ public void testGetFilesToMonth() { @Test public void testGetDownloadsToMonth() { - String yyyymm = "2023-04"; + String yyyymm = "2018-04"; // yyyymm = null; Response response = UtilIT.metricsDownloadsToMonth(yyyymm, null); String precache = response.prettyPrint(); @@ -283,12 +283,6 @@ public void testGetDatasetsBySubject() { response = UtilIT.metricsDatasetsBySubject("dataLocation=local"); response.then().assertThat() .statusCode(OK.getStatusCode()); - - //Test ok when passing remote - response = UtilIT.metricsDatasetsBySubject("dataLocation=remote"); - response.prettyPrint(); - response.then().assertThat() - .statusCode(OK.getStatusCode()); } @Test From b7a3e7803dd6689c0060fdd2d5a2dab5e00a51ff Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 27 Nov 2023 14:29:01 -0500 Subject: [PATCH 22/35] add dataset JSON Schema to API guide, add test #9464 --- .../source/_static/api/dataset-schema.json | 123 ++++++++++++++++++ doc/sphinx-guides/source/api/native-api.rst | 2 + .../harvard/iq/dataverse/api/DatasetsIT.java | 9 ++ 3 files changed, 134 insertions(+) create mode 100644 doc/sphinx-guides/source/_static/api/dataset-schema.json diff --git a/doc/sphinx-guides/source/_static/api/dataset-schema.json b/doc/sphinx-guides/source/_static/api/dataset-schema.json new file mode 100644 index 00000000000..a66ab43bd0f --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/dataset-schema.json @@ -0,0 +1,123 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "$defs": { + "field": { + "type": "object", + "required": ["typeClass", "multiple", "typeName"], + "properties": { + "value": { + "anyOf": [ + { + "type": "array" + }, + { + "type": "string" + }, + { + "$ref": "#/$defs/field" + } + ] + }, + "typeClass": { + "type": "string" + }, + "multiple": { + "type": "boolean" + }, + "typeName": { + "type": "string" + } + } + } +}, +"type": "object", +"properties": { + "datasetVersion": { + "type": "object", + "properties": { + "license": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["name", "uri"] + }, + "metadataBlocks": { + "type": "object", + "properties": { + "citation": { + "type": "object", + "properties": { + "fields": { + "type": "array", + "items": { + "$ref": "#/$defs/field" + }, + "minItems": 5, + "allOf": [ + { + "contains": { + "properties": { + "typeName": { + "const": "title" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "author" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "datasetContact" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "dsDescription" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "subject" + } + } + } + } + ] + } + }, + "required": ["fields"] + } + }, + "required": ["citation"] + } + }, + "required": ["metadataBlocks"] + } + }, + "required": ["datasetVersion"] +} + diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 61b1a9443b2..ca043af39e3 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -528,6 +528,8 @@ The fully expanded example above (without environment variables) looks like this Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint. +While it is recommended to download a copy of the JSON Schema from the collection (as above) to account for any fields that have been marked as required, you can also download a minimal :download:`dataset-schema.json <../_static/api/dataset-schema.json>` to get a sense of the schema when no customizations have been made. + .. _validate-dataset-json: Validate Dataset.json file for a Collection diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 9fe08498de9..6a746b7c5b5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -62,6 +62,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.File; import java.io.IOException; @@ -179,6 +180,14 @@ public void testCollectionSchema(){ getCollectionSchemaResponse.prettyPrint(); getCollectionSchemaResponse.then().assertThat() .statusCode(200); + + JsonObject expectedSchema = null; + try { + expectedSchema = JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json"); + } catch (IOException ex) { + } + + assertEquals(JsonUtil.prettyPrint(expectedSchema), JsonUtil.prettyPrint(getCollectionSchemaResponse.body().asString())); String expectedJson = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch1.json"); From 2d3f7aba2bc180c8d151e076a4f476d039cc27f3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 27 Nov 2023 15:45:12 -0500 Subject: [PATCH 23/35] just return the JSON Schema, don't wrap in "data, message" #9464 --- src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index c6ae619f36a..202a54a9e77 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -259,7 +259,7 @@ public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathPara try { String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf))); JsonObject jsonObject = JsonUtil.getJsonObject(datasetSchema); - return ok(jsonObject); + return Response.ok(jsonObject).build(); } catch (WrappedResponse ex) { Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); return ex.getResponse(); From 0a77e2a938510d79645ad3ce5fcd349da3e8495f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 27 Nov 2023 15:53:06 -0500 Subject: [PATCH 24/35] tweak docs #9464 --- doc/sphinx-guides/source/api/native-api.rst | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index ca043af39e3..bd49f945d4e 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -507,10 +507,10 @@ The fully expanded example above (without environment variables) looks like this .. _get-dataset-json-schema: -Retrieve a JSON schema for a Collection -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Retrieve a Dataset JSON Schema for a Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Retrieves a JSON schema customized for a given Dataverse collection in order to validate a Dataset JSON file prior to creating the dataset: +Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset: .. code-block:: bash @@ -526,16 +526,16 @@ The fully expanded example above (without environment variables) looks like this curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/datasetSchema" -Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint. +Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint. While it is recommended to download a copy of the JSON Schema from the collection (as above) to account for any fields that have been marked as required, you can also download a minimal :download:`dataset-schema.json <../_static/api/dataset-schema.json>` to get a sense of the schema when no customizations have been made. .. _validate-dataset-json: -Validate Dataset.json file for a Collection +Validate Dataset JSON File for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Validates a Dataset json file customized for a given Dataverse collection prior to creating the dataset: +Validates a dataset JSON file customized for a given collection prior to creating the dataset: .. code-block:: bash @@ -543,15 +543,15 @@ Validates a Dataset json file customized for a given Dataverse collection prior export SERVER_URL=https://demo.dataverse.org export ID=root - curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/validateDatasetJson" --upload-file dataset.json -H 'Content-type:application/json' + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json The fully expanded example above (without environment variables) looks like this: .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" --upload-file dataset.json -H 'Content-type:application/json' + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json -Note: you must have Add Dataset permission in the given Dataverse collection to invoke this endpoint. +Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint. .. _create-dataset-command: From 7db36293364118244e0582c3d529a34cd5b66395 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 27 Nov 2023 15:57:24 -0500 Subject: [PATCH 25/35] removing trailing newline #9464 --- doc/sphinx-guides/source/_static/api/dataset-schema.json | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/sphinx-guides/source/_static/api/dataset-schema.json b/doc/sphinx-guides/source/_static/api/dataset-schema.json index a66ab43bd0f..34b8a1eeedb 100644 --- a/doc/sphinx-guides/source/_static/api/dataset-schema.json +++ b/doc/sphinx-guides/source/_static/api/dataset-schema.json @@ -120,4 +120,3 @@ }, "required": ["datasetVersion"] } - From 194945b36bd052569e6699c53a78de839d2c1f23 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 28 Nov 2023 09:16:50 -0500 Subject: [PATCH 26/35] remove cruft (unused) #9464 --- .../iq/dataverse/baseDatasetSchema.json | 123 ------------------ 1 file changed, 123 deletions(-) delete mode 100644 src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json diff --git a/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json b/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json deleted file mode 100644 index a37e216b2ea..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/baseDatasetSchema.json +++ /dev/null @@ -1,123 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-04/schema#", - "$defs": { - "field": { - "type": "object", - "required": ["typeClass", "multiple", "typeName"], - "properties": { - "value": { - "anyOf": [ - { - "type": "array" - }, - { - "type": "string" - }, - { - "$ref": "#/$defs/field" - } - ] - }, - "typeClass": { - "type": "string" - }, - "multiple": { - "type": "boolean" - }, - "typeName": { - "type": "string" - } - } - } -}, -"type": "object", -"properties": { - "datasetVersion": { - "type": "object", - "properties": { - "license": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "uri": { - "type": "string", - "format": "uri" - } - }, - "required": ["name", "uri"] - }, - "metadataBlocks": { - "type": "object", - "properties": { - "citation": { - "type": "object", - "properties": { - "fields": { - "type": "array", - "items": { - "$ref": "#/$defs/field" - }, - "minItems": 5, - "allOf": [ - { - "contains": { - "properties": { - "typeName": { - "const": "title" - } - } - } - }, - { - "contains": { - "properties": { - "typeName": { - "const": "author" - } - } - } - }, - { - "contains": { - "properties": { - "typeName": { - "const": "datasetContact" - } - } - } - }, - { - "contains": { - "properties": { - "typeName": { - "const": "dsDescription" - } - } - } - }, - { - "contains": { - "properties": { - "typeName": { - "const": "subject" - } - } - } - } - ] - } - }, - "required": ["fields"] - } - }, - "required": ["citation"] - } - }, - "required": ["metadataBlocks"] - } - }, - "required": ["datasetVersion"] -} - From c1bd009b8d2ebc297a51e6d23358ab76137c3848 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 28 Nov 2023 09:18:57 -0500 Subject: [PATCH 27/35] format code (no-op) #9464 --- .../java/edu/harvard/iq/dataverse/api/UtilIT.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index b3f8d639721..cd05719402f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -406,26 +406,22 @@ static Response getGuestbookResponses(String dataverseAlias, Long guestbookId, S } return requestSpec.get("/api/dataverses/" + dataverseAlias + "/guestbookResponses/"); } - - static Response getCollectionSchema (String dataverseAlias, String apiToken){ - + + static Response getCollectionSchema(String dataverseAlias, String apiToken) { Response getCollectionSchemaResponse = given() .header(API_TOKEN_HTTP_HEADER, apiToken) .contentType("application/json") .get("/api/dataverses/" + dataverseAlias + "/datasetSchema"); return getCollectionSchemaResponse; - } - - static Response validateDatasetJson (String dataverseAlias, String datasetJson, String apiToken){ - + + static Response validateDatasetJson(String dataverseAlias, String datasetJson, String apiToken) { Response getValidateDatasetJsonResponse = given() .header(API_TOKEN_HTTP_HEADER, apiToken) .body(datasetJson) .contentType("application/json") .post("/api/dataverses/" + dataverseAlias + "/validateDatasetJson"); return getValidateDatasetJsonResponse; - } static Response createRandomDatasetViaNativeApi(String dataverseAlias, String apiToken) { From c4d9b6e4a9741d07cc9193e794455c35a08320fd Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 28 Nov 2023 09:26:34 -0500 Subject: [PATCH 28/35] add new endpoints to API changelog #9464 --- doc/sphinx-guides/source/api/changelog.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 1726736e75c..dd2d129a420 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -8,6 +8,11 @@ API Changelog 6.1 --- +New +~~~ +- **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`. +- **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`. + Changes ~~~~~~~ - **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`. From 45df764c3b9c8d8f504f1134fb302bb4ea10a6bd Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 28 Nov 2023 09:28:30 -0500 Subject: [PATCH 29/35] tweak release note #9464 --- doc/release-notes/9464-json-validation.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md index 3703b420225..4b08f2ca9dd 100644 --- a/doc/release-notes/9464-json-validation.md +++ b/doc/release-notes/9464-json-validation.md @@ -1,3 +1,3 @@ -Functionality has been added to help validate dataset json prior to dataset creation. There are two new API endpoints in this release. The first takes in a Dataverse Collection alias and returns a custom schema based on the required fields of the collection. -The second takes in a Dataverse collection alias and a dataset json file and does an automated validation of the json file against the custom schema for the collection. (Issue 9464 and 9465) +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. (Issue #9464 and #9465) +For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html From d8e327d8c56793f24213385b9dcca864b2a134e3 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 28 Nov 2023 09:46:16 -0500 Subject: [PATCH 30/35] add "v" to make anchor links meaningful #9464 #10060 Anchors like #v6.1 are much better than #id1, which is meaningless. --- doc/sphinx-guides/source/api/changelog.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index dd2d129a420..cbf1cb329b9 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -5,8 +5,8 @@ API Changelog :local: :depth: 1 -6.1 ---- +v6.1 +---- New ~~~ @@ -17,8 +17,8 @@ Changes ~~~~~~~ - **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`. -6.0 ---- +v6.0 +---- Changes ~~~~~~~ From 866b5eaa1e7108a483c87b0a2a6b532e1f7ef5c1 Mon Sep 17 00:00:00 2001 From: Juan Pablo Tosca Villanueva Date: Tue, 28 Nov 2023 16:16:40 -0500 Subject: [PATCH 31/35] Adds -X POST on the docs for validateDatasetJson --- doc/sphinx-guides/source/api/native-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index bd49f945d4e..1f0c5a62d12 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -549,7 +549,7 @@ The fully expanded example above (without environment variables) looks like this .. code-block:: bash - curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint. From 547d71c342e08ebdf674d8754dc072465ad20651 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 4 Dec 2023 14:31:07 -0500 Subject: [PATCH 32/35] #9464 add more detail to validation error message --- .../edu/harvard/iq/dataverse/DataverseServiceBean.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index ed46caf65a1..027e58d9263 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -1072,7 +1072,12 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid } catch (ValidationException vx) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); - return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + String accumulatedexceptions = ""; + for (ValidationException va : vx.getCausingExceptions()){ + accumulatedexceptions = accumulatedexceptions + va; + accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " "); + } + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; } catch (Exception ex) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); From 7697157ac98049dea45a2bd98193aad75e6037e1 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 4 Dec 2023 15:27:21 -0500 Subject: [PATCH 33/35] #9464 handle single errors --- .../edu/harvard/iq/dataverse/DataverseServiceBean.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 027e58d9263..07e7fe615e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -1077,7 +1077,12 @@ public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { accumulatedexceptions = accumulatedexceptions + va; accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " "); } - return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; + if (!accumulatedexceptions.isEmpty()){ + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; + } else { + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + } + } catch (Exception ex) { logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); From c54a85fca9377b74efc0e74e8a70a6de2f6fccc4 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 5 Dec 2023 14:52:23 -0500 Subject: [PATCH 34/35] #9464 add caveats to release note. --- doc/release-notes/9464-json-validation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md index 4b08f2ca9dd..f104263ba35 100644 --- a/doc/release-notes/9464-json-validation.md +++ b/doc/release-notes/9464-json-validation.md @@ -1,3 +1,3 @@ -Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. (Issue #9464 and #9465) +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html From 2379828c2737260901b23020a436f5cab6cc962a Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 5 Dec 2023 15:05:12 -0500 Subject: [PATCH 35/35] Update native-api.rst --- doc/sphinx-guides/source/api/native-api.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 2d37c3b07ae..29aa7c880ac 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -510,7 +510,9 @@ The fully expanded example above (without environment variables) looks like this Retrieve a Dataset JSON Schema for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset: +Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset. This +first version of the schema only includes required elements and fields. In the future we plan to improve the schema by adding controlled +vocabulary and more robust dataset field format testing: .. code-block:: bash @@ -535,7 +537,8 @@ While it is recommended to download a copy of the JSON Schema from the collectio Validate Dataset JSON File for a Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Validates a dataset JSON file customized for a given collection prior to creating the dataset: +Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting +and the presence of required elements: .. code-block:: bash