Skip to content

Commit

Permalink
Merge pull request IQSS#4252 from IQSS/2243-schema.org-json-ld
Browse files Browse the repository at this point in the history
2243 schema.org json ld
  • Loading branch information
kcondon authored Nov 20, 2017
2 parents 0918fae + a756751 commit c67a39f
Show file tree
Hide file tree
Showing 6 changed files with 361 additions and 30 deletions.
60 changes: 33 additions & 27 deletions src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import edu.harvard.iq.dataverse.datasetutility.TwoRavensHelper;
import edu.harvard.iq.dataverse.datasetutility.WorldMapPermissionHelper;
import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException;
import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand;
import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand;
import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult;
import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand;
Expand Down Expand Up @@ -3944,23 +3945,6 @@ public String getDescription() {
return workingVersion.getDescriptionPlainText();
}

/**
* dataset publication date unpublished datasets will return an empty
* string.
*
* @return String dataset publication date (dd MMM yyyy).
*/
public String getPublicationDate() {
assert (null != workingVersion);
if (DatasetVersion.VersionState.DRAFT == workingVersion.getVersionState()) {
return "";
}
Date rel_date = workingVersion.getReleaseTime();
SimpleDateFormat fmt = new SimpleDateFormat("yyyy-MM-dd");
String r = fmt.format(rel_date.getTime());
return r;
}

/**
* dataset authors
*
Expand All @@ -3971,16 +3955,6 @@ public List<String> getDatasetAuthors() {
return workingVersion.getDatasetAuthorNames();
}

/**
* dataset subjects
*
* @return array of String containing the subjects for a page
*/
public List<String> getDatasetSubjects() {
assert (null != workingVersion);
return workingVersion.getDatasetSubjects();
}

/**
* publisher (aka - name of root dataverse)
*
Expand Down Expand Up @@ -4066,4 +4040,36 @@ public List<DatasetField> getDatasetSummaryFields() {
return DatasetUtil.getDatasetSummaryFields(workingVersion, customFields);
}

Boolean thisLatestReleasedVersion = null;

public boolean isThisLatestReleasedVersion() {
if (thisLatestReleasedVersion != null) {
return thisLatestReleasedVersion;
}

if (!workingVersion.isPublished()) {
thisLatestReleasedVersion = false;
return false;
}

DatasetVersion latestPublishedVersion = null;
Command<DatasetVersion> cmd = new GetLatestPublishedDatasetVersionCommand(dvRequestService.getDataverseRequest(), dataset);
try {
latestPublishedVersion = commandEngine.submit(cmd);
} catch (Exception ex) {
// whatever...
}

thisLatestReleasedVersion = workingVersion.equals(latestPublishedVersion);

return thisLatestReleasedVersion;

}

public String getJsonLd() {
if (isThisLatestReleasedVersion()) {
return workingVersion.getJsonLd();
}
return "";
}
}
264 changes: 263 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import edu.harvard.iq.dataverse.util.MarkupChecker;
import edu.harvard.iq.dataverse.DatasetFieldType.FieldType;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;
import edu.harvard.iq.dataverse.workflows.WorkflowComment;
import java.io.Serializable;
import java.math.BigDecimal;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
Expand All @@ -17,6 +19,9 @@
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObjectBuilder;
import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
Expand Down Expand Up @@ -142,6 +147,9 @@ public enum License {

@Transient
private String contributorNames;

@Transient
private String jsonLd;

@OneToMany(mappedBy="datasetVersion", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST})
private List<DatasetVersionUser> datasetVersionUsers;
Expand Down Expand Up @@ -417,6 +425,10 @@ public boolean isReleased() {
return versionState.equals(VersionState.RELEASED);
}

public boolean isPublished() {
return isReleased();
}

public boolean isDraft() {
return versionState.equals(VersionState.DRAFT);
}
Expand Down Expand Up @@ -706,6 +718,42 @@ public List<DatasetAuthor> getDatasetAuthors() {
return retList;
}

public List<String> getTimePeriodsCovered() {
List <String> retList = new ArrayList<>();
for (DatasetField dsf : this.getDatasetFields()) {
if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.timePeriodCovered)) {
for (DatasetFieldCompoundValue timePeriodValue : dsf.getDatasetFieldCompoundValues()) {
String start = "";
String end = "";
for (DatasetField subField : timePeriodValue.getChildDatasetFields()) {
if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.timePeriodCoveredStart)) {
if (subField.isEmptyForDisplay()) {
start = null;
} else {
// we want to use "getValue()", as opposed to "getDisplayValue()" here -
// as the latter method prepends the value with the word "Start:"!
start = subField.getValue();
}
}
if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.timePeriodCoveredEnd)) {
if (subField.isEmptyForDisplay()) {
end = null;
} else {
// see the comment above
end = subField.getValue();
}
}

}
if (start != null && end != null) {
retList.add(start + "/" + end);
}
}
}
}
return retList;
}

/**
* @return List of Strings containing the names of the authors.
*/
Expand All @@ -729,7 +777,55 @@ public List<String> getDatasetSubjects() {
}
return subjects;
}


/**
* @return List of Strings containing the version's Topic Classifications
*/
public List<String> getTopicClassifications() {
return getCompoundChildFieldValues(DatasetFieldConstant.topicClassification, DatasetFieldConstant.topicClassValue);
}

/**
* @return List of Strings containing the version's Keywords
*/
public List<String> getKeywords() {
return getCompoundChildFieldValues(DatasetFieldConstant.keyword, DatasetFieldConstant.keywordValue);
}

/**
* @return List of Strings containing the version's PublicationCitations
*/
public List<String> getPublicationCitationValues() {
return getCompoundChildFieldValues(DatasetFieldConstant.publication, DatasetFieldConstant.publicationCitation);
}

/**
* @param parentFieldName compound dataset field A (from DatasetFieldConstant.*)
* @param childFieldName dataset field B, child field of A (from DatasetFieldConstant.*)
* @return List of values of the child field
*/
public List<String> getCompoundChildFieldValues(String parentFieldName, String childFieldName) {
List<String> keywords = new ArrayList<>();
for (DatasetField dsf : this.getDatasetFields()) {
if (dsf.getDatasetFieldType().getName().equals(parentFieldName)) {
for (DatasetFieldCompoundValue keywordFieldValue : dsf.getDatasetFieldCompoundValues()) {
for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) {
if (subField.getDatasetFieldType().getName().equals(childFieldName)) {
String keyword = subField.getValue();
// Field values should NOT be empty or, especially, null,
// - in the ideal world. But as we are realizing, they CAN
// be null in real life databases. So, a check, just in case:
if (!StringUtil.isEmpty(keyword)) {
keywords.add(subField.getValue());
}
}
}
}
}
}
return keywords;
}

public String getDatasetProducersString(){
String retVal = "";
for (DatasetField dsf : this.getDatasetFields()) {
Expand Down Expand Up @@ -1099,4 +1195,170 @@ public List<WorkflowComment> getWorkflowComments() {
return workflowComments;
}

/**
* dataset publication date unpublished datasets will return an empty
* string.
*
* @return String dataset publication date in ISO 8601 format (yyyy-MM-dd).
*/
public String getPublicationDateAsString() {
if (DatasetVersion.VersionState.DRAFT == this.getVersionState()) {
return "";
}
Date rel_date = this.getReleaseTime();
SimpleDateFormat fmt = new SimpleDateFormat("yyyy-MM-dd");
String r = fmt.format(rel_date.getTime());
return r;
}

// TODO: Make this more performant by writing the output to the database or a file?
// Agree - now that this has grown into a somewhat complex chunk of formatted
// metadata - and not just a couple of values inserted into the page html -
// it feels like it would make more sense to treat it as another supported
// export format, that can be produced once and cached.
// The problem with that is that the export subsystem assumes there is only
// one metadata export in a given format per dataset (it uses the current
// released (published) version. This JSON fragment is generated for a
// specific released version - and we can have multiple released versions.
// So something will need to be modified to accommodate this. -- L.A.

public String getJsonLd() {
// We show published datasets only for "datePublished" field below.
if (!this.isPublished()) {
return "";
}

if (jsonLd != null) {
return jsonLd;
}
JsonObjectBuilder job = Json.createObjectBuilder();
job.add("@context", "http://schema.org");
job.add("@type", "Dataset");
job.add("identifier", this.getDataset().getPersistentURL());
job.add("name", this.getTitle());
JsonArrayBuilder authors = Json.createArrayBuilder();
for (DatasetAuthor datasetAuthor : this.getDatasetAuthors()) {
JsonObjectBuilder author = Json.createObjectBuilder();
String name = datasetAuthor.getName().getValue();
String affiliation = datasetAuthor.getAffiliation().getValue();
// We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization".
//author.add("@type", "Person");
author.add("name", name);
if (!StringUtil.isEmpty(affiliation)) {
author.add("affiliation", affiliation);
}
authors.add(author);
}
job.add("author", authors);
/**
* We are aware that there is a "datePublished" field but it means "Date
* of first broadcast/publication." This only makes sense for a 1.0
* version.
*/
String datePublished = this.getDataset().getPublicationDateFormattedYYYYMMDD();
if (datePublished != null) {
job.add("datePublished", datePublished);
}

/**
* "dateModified" is more appropriate for a version: "The date on which
* the CreativeWork was most recently modified or when the item's entry
* was modified within a DataFeed."
*/
job.add("dateModified", this.getPublicationDateAsString());
job.add("version", this.getVersionNumber().toString());
job.add("description", this.getDescriptionPlainText());
/**
* "keywords" - contains subject(s), datasetkeyword(s) and topicclassification(s)
* metadata fields for the version. -- L.A.
* (see #2243 for details/discussion/feedback from Google)
*/
JsonArrayBuilder keywords = Json.createArrayBuilder();

for (String subject : this.getDatasetSubjects()) {
keywords.add(subject);
}

for (String topic : this.getTopicClassifications()) {
keywords.add(topic);
}

for (String keyword : this.getKeywords()) {
keywords.add(keyword);
}

job.add("keywords", keywords);

/**
* citation:
* (multiple) publicationCitation values, if present:
*/

List<String> publicationCitations = getPublicationCitationValues();
if (publicationCitations.size() > 0) {
JsonArrayBuilder citation = Json.createArrayBuilder();
for (String pubCitation : publicationCitations) {
//citationEntry.add("@type", "Dataset");
//citationEntry.add("text", pubCitation);
citation.add(pubCitation);
}
job.add("citation", citation);
}

/**
* temporalCoverage:
* (if available)
*/

List<String> timePeriodsCovered = this.getTimePeriodsCovered();
if (timePeriodsCovered.size() > 0) {
JsonArrayBuilder temporalCoverage = Json.createArrayBuilder();
for (String timePeriod : timePeriodsCovered) {
temporalCoverage.add(timePeriod);
}
job.add("temporalCoverage", temporalCoverage);
}

/**
* spatialCoverage (if available)
* TODO
* (punted, for now - see #2243)
*
*/

/**
* funder (if available)
* TODO
* (punted, for now - see #2243)
*/

job.add("schemaVersion", "https://schema.org/version/3.3");

TermsOfUseAndAccess terms = this.getTermsOfUseAndAccess();
if (terms != null) {
JsonObjectBuilder license = Json.createObjectBuilder().add("@type", "Dataset");

if (TermsOfUseAndAccess.License.CC0.equals(terms.getLicense())) {
license.add("text", "CC0").add("url", "https://creativecommons.org/publicdomain/zero/1.0/");
} else {
license.add("text", terms.getTermsOfUse());
}

job.add("license",license);
}

job.add("includedInDataCatalog", Json.createObjectBuilder()
.add("@type", "DataCatalog")
.add("name", this.getRootDataverseNameforCitation())
.add("url", SystemConfig.getDataverseSiteUrlStatic())
);

job.add("provider", Json.createObjectBuilder()
.add("@type", "Organization")
.add("name", "Dataverse")
);
jsonLd = job.build().toString();
return jsonLd;
}

}
Loading

0 comments on commit c67a39f

Please sign in to comment.