From b703e27ec2d785adff459d3ff806e10344a2ef06 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 20 Sep 2016 13:45:21 -0400 Subject: [PATCH] Support SHA1 rather than MD5 as a checksum on a per file basis #3354 A dependency for rsync support (#3145) is the ability to persist SHA-1 checksums for files rather than MD5 checksums. A new installation-wide configuration setting called ":FileFixityChecksumAlgorithm" has been added which can be set to "SHA-1" to have Dataverse calculate and show SHA-1 checksums rather than MD5 checksums. In order to run this branch you must run the provided SQL upgrade script: scripts/database/upgrades/3354-alt-checksum.sql In addition, the Solr schema should be updated to the version in this branch. --- conf/solr/4.6.0/schema.xml | 2 + .../source/installation/config.rst | 7 ++ .../database/upgrades/3354-alt-checksum.sql | 7 ++ .../issues/3354/createDatasetWithSha1Files.sh | 5 ++ scripts/issues/3354/datasetWithSha1Files.json | 86 +++++++++++++++++++ scripts/issues/3354/mydata | 3 + scripts/search/search | 4 +- src/main/java/Bundle.properties | 7 +- .../edu/harvard/iq/dataverse/DataFile.java | 80 +++++++++++++++-- .../iq/dataverse/DataFileServiceBean.java | 32 +++++-- .../edu/harvard/iq/dataverse/DatasetPage.java | 4 +- .../dataverse/DatasetVersionDifference.java | 47 +++++++--- .../iq/dataverse/EditDatafilesPage.java | 4 +- .../harvard/iq/dataverse/api/Dataverses.java | 5 +- .../iq/dataverse/api/WorldMapRelatedData.java | 2 +- .../dataverse/ingest/IngestServiceBean.java | 23 +++-- .../iq/dataverse/mydata/DataRetrieverAPI.java | 12 +-- .../iq/dataverse/search/IndexServiceBean.java | 11 ++- .../iq/dataverse/search/SearchFields.java | 2 + .../search/SearchIncludeFragment.java | 8 +- .../dataverse/search/SearchServiceBean.java | 7 ++ .../iq/dataverse/search/SolrSearchResult.java | 37 +++++++- .../settings/SettingsServiceBean.java | 1 + .../harvard/iq/dataverse/util/FileUtil.java | 52 +++++++++++ .../iq/dataverse/util/MD5Checksum.java | 84 ------------------ .../iq/dataverse/util/SystemConfig.java | 13 +++ .../iq/dataverse/util/json/JsonParser.java | 41 +++++++-- .../iq/dataverse/util/json/JsonPrinter.java | 27 +++++- src/main/webapp/dataset.xhtml | 2 +- src/main/webapp/editFilesFragment.xhtml | 10 +-- src/main/webapp/file.xhtml | 13 +-- src/main/webapp/filesFragment.xhtml | 2 +- src/main/webapp/mydata_templates/cards.html | 1 + .../mydata_templates/cards_minimum.html | 2 +- src/main/webapp/search-include-fragment.xhtml | 2 +- .../harvard/iq/dataverse/api/DatasetsIT.java | 76 ++++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 10 +++ 37 files changed, 567 insertions(+), 164 deletions(-) create mode 100644 scripts/database/upgrades/3354-alt-checksum.sql create mode 100755 scripts/issues/3354/createDatasetWithSha1Files.sh create mode 100644 scripts/issues/3354/datasetWithSha1Files.json create mode 100755 scripts/issues/3354/mydata delete mode 100644 src/main/java/edu/harvard/iq/dataverse/util/MD5Checksum.java diff --git a/conf/solr/4.6.0/schema.xml b/conf/solr/4.6.0/schema.xml index 10f9b07be5c..323429b62da 100644 --- a/conf/solr/4.6.0/schema.xml +++ b/conf/solr/4.6.0/schema.xml @@ -298,6 +298,8 @@ + + diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 142e69ff3e3..b1f13c0ec8e 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -454,3 +454,10 @@ This setting is experimental per :doc:`/installation/shibboleth`. ++++++++++++ Set to false to disallow local accounts to be created if you are using :doc:`shibboleth` but not for production use until https://github.com/IQSS/dataverse/issues/2838 has been fixed. + +:FileFixityChecksumAlgorithm +++++++++++++++++++++++++++++ + +Dataverse calculates checksums for uploaded files so that users can determine if their file was corrupted via upload or download. This is sometimes called "file fixity": https://en.wikipedia.org/wiki/File_Fixity + +The default checksum algorithm used is MD5 and should be sufficient for establishing file fixity. "SHA-1" is an experimental alternate value for this setting. diff --git a/scripts/database/upgrades/3354-alt-checksum.sql b/scripts/database/upgrades/3354-alt-checksum.sql new file mode 100644 index 00000000000..0c54d673aac --- /dev/null +++ b/scripts/database/upgrades/3354-alt-checksum.sql @@ -0,0 +1,7 @@ +ALTER TABLE datafile ADD COLUMN checksumtype character varying(255); +ALTER TABLE datafile ALTER COLUMN checksumtype SET NOT NULL; +UPDATE datafile SET checksumtype = 'MD5'; +-- alternate statement for sbgrid.org and others interested in SHA-1 support +-- note that in the database we use "SHA1" (no hyphen) but the GUI will show "SHA-1" +--UPDATE datafile SET checksumtype = 'SHA1'; +ALTER TABLE datafile RENAME md5 TO checksumvalue; diff --git a/scripts/issues/3354/createDatasetWithSha1Files.sh b/scripts/issues/3354/createDatasetWithSha1Files.sh new file mode 100755 index 00000000000..1792a9e33a3 --- /dev/null +++ b/scripts/issues/3354/createDatasetWithSha1Files.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# existing, works, no files, commenting out +#curl -s -X POST -H "Content-type:application/json" -d @scripts/search/tests/data/dataset-finch1.json "http://localhost:8080/api/dataverses/root/datasets/?key=$API_TOKEN" +# new, has files +curl -s -X POST -H "Content-type:application/json" -d @scripts/issues/3354/datasetWithSha1Files.json "http://localhost:8080/api/dataverses/root/datasets/?key=$API_TOKEN" diff --git a/scripts/issues/3354/datasetWithSha1Files.json b/scripts/issues/3354/datasetWithSha1Files.json new file mode 100644 index 00000000000..95a4d3b88d0 --- /dev/null +++ b/scripts/issues/3354/datasetWithSha1Files.json @@ -0,0 +1,86 @@ +{ + "datasetVersion": { + "files": [ + { + "label": "foo.txt", + "dataFile": { + "filename": "foo.txt", + "contentType": "text/plain", + "storageIdentifier": "157484f9d6c-c36006fa39e5", + "originalFormatLabel": "UNKNOWN", + "checksum": { + "type": "SHA-1", + "value": "f1d2d2f924e986ac86fdf7b36c94bcdf32beec15" + } + } + } + ], + "metadataBlocks": { + "citation": { + "fields": [ + { + "value": "Dataset with SHA-1 files", + "typeClass": "primitive", + "multiple": false, + "typeName": "title" + }, + { + "value": [ + { + "authorName": { + "value": "Finch, Fiona", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorName" + }, + "authorAffiliation": { + "value": "Birds Inc.", + "typeClass": "primitive", + "multiple": false, + "typeName": "authorAffiliation" + } + } + ], + "typeClass": "compound", + "multiple": true, + "typeName": "author" + }, + { + "value": [ + { "datasetContactEmail" : { + "typeClass": "primitive", + "multiple": false, + "typeName": "datasetContactEmail", + "value" : "finch@mailinator.com" + } + }], + "typeClass": "compound", + "multiple": true, + "typeName": "datasetContact" + }, + { + "value": [ { + "dsDescriptionValue":{ + "value": "Some people prefer SHA-1 to MD5 for file fixity.", + "multiple":false, + "typeClass": "primitive", + "typeName": "dsDescriptionValue" + }}], + "typeClass": "compound", + "multiple": true, + "typeName": "dsDescription" + }, + { + "value": [ + "Other" + ], + "typeClass": "controlledVocabulary", + "multiple": true, + "typeName": "subject" + } + ], + "displayName": "Citation Metadata" + } + } + } +} diff --git a/scripts/issues/3354/mydata b/scripts/issues/3354/mydata new file mode 100755 index 00000000000..eb76d06e3f9 --- /dev/null +++ b/scripts/issues/3354/mydata @@ -0,0 +1,3 @@ +#!/bin/sh +# FIXME: Make this into a REST Assured test. +curl -s "http://localhost:8080/api/mydata/retrieve?key=$API_TOKEN&role_ids=1&dvobject_types=DataFile&published_states=Published&published_states=Unpublished&published_states=Draft&published_states=In+Review&published_states=Deaccessioned" | jq .data.items diff --git a/scripts/search/search b/scripts/search/search index b058dbdacb9..ac14596ac2a 100755 --- a/scripts/search/search +++ b/scripts/search/search @@ -1,11 +1,11 @@ #!/bin/sh if [ -z "$1" ]; then - curl -s 'http://localhost:8080/api/search?q=*' + curl -H "X-Dataverse-key: $API_TOKEN" -s 'http://localhost:8080/api/search?q=*' #curl -s 'http://localhost:8080/api/search?q=*&key=pete' else # i.e. ./search 'q=*&fq=filetype_s:"image"&fq=dvtype:files' # i.e. ./search 'q=*&start=10' # i.e. ./search 'q=*&sort=name_sort&order=asc' # i.e. ./search 'q=*&sort=name_sort&order=asc' | jq '.itemsJson[] | {name_sort}' - curl -s "http://localhost:8080/api/search?$1" + curl -H "X-Dataverse-key: $API_TOKEN" -s "http://localhost:8080/api/search?$1" fi diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index bc39855adc6..2a1a04e37b0 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -1076,7 +1076,6 @@ dataset.metadata.persistentId.tip=The unique persistent identifier for a Dataset dataset.versionDifferences.termsOfUseAccess=Terms of Use and Access dataset.versionDifferences.termsOfUseAccessChanged=Terms of Use/Access Changed file.viewDiffDialog.restricted=Restricted -file.viewDiffDialog.md5=MD5 dataset.template.tip=Changing the template will clear any fields you may have entered data into. dataset.noTemplate.label=None @@ -1133,9 +1132,8 @@ file.download.header=Download file.preview=Preview: file.fileName=File Name file.type.tabularData=Tabular Data -file.MD5=MD5 -file.MD5.origal=Original File MD5 -file.MD5.exists.tip=A file with this MD5 already exists in the dataset. +file.originalChecksumType=Original File {0} +file.checksum.exists.tip=A file with this checksum already exists in the dataset. file.selectedThumbnail=Thumbnail file.selectedThumbnail.tip=The thumbnail for this file is used as the default thumbnail for the dataset. Click 'Advanced Options' button of another file to select that file. @@ -1371,7 +1369,6 @@ file.tags.label=Tags file.metadataTab.fileMetadata.header=File Metadata file.metadataTab.fileMetadata.persistentid.label=Data File Persistent ID -file.metadataTab.fileMetadata.md5.label=MD5 file.metadataTab.fileMetadata.unf.label=UNF file.metadataTab.fileMetadata.size.label=Size file.metadataTab.fileMetadata.type.label=Type diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 24c01e3b107..7cfca585054 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataFileIO; import edu.harvard.iq.dataverse.ingest.IngestReport; import edu.harvard.iq.dataverse.ingest.IngestRequest; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.ShapefileHandler; import java.io.IOException; @@ -16,11 +17,14 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.Files; +import java.util.Arrays; import javax.persistence.Entity; import javax.persistence.OneToMany; import javax.persistence.OneToOne; import javax.persistence.CascadeType; import javax.persistence.Column; +import javax.persistence.EnumType; +import javax.persistence.Enumerated; import javax.persistence.Index; import javax.persistence.JoinColumn; import javax.persistence.JoinTable; @@ -61,9 +65,56 @@ public class DataFile extends DvObject { @Column( nullable = false ) private String fileSystemName; - - @Column( nullable = false ) - private String md5; + + /** + * End users will see "SHA-1" (with a hyphen) rather than "SHA1" in the GUI + * and API but in the "datafile" table we persist "SHA1" (no hyphen) for + * type safety (using keys of the enum). In the "setting" table, we persist + * "SHA-1" (with a hyphen) to match the GUI and the "Algorithm Name" list at + * https://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#MessageDigest + * + * The list of types should be limited to the list above in the technote + * because the string gets passed into MessageDigest.getInstance() and you + * can't just pass in any old string. + */ + public enum ChecksumType { + + MD5("MD5"), + SHA1("SHA-1"); + + private final String text; + + private ChecksumType(final String text) { + this.text = text; + } + + public static ChecksumType fromString(String text) { + if (text != null) { + for (ChecksumType checksumType : ChecksumType.values()) { + if (text.equals(checksumType.text)) { + return checksumType; + } + } + } + throw new IllegalArgumentException("ChecksumType must be one of these values: " + Arrays.asList(ChecksumType.values()) + "."); + } + + @Override + public String toString() { + return text; + } + } + + @Column(nullable = false) + @Enumerated(EnumType.STRING) + private ChecksumType checksumType; + + /** + * Examples include "f622da34d54bdc8ee541d6916ac1c16f" as an MD5 value or + * "3a484dfdb1b429c2e15eb2a735f1f5e4d5b04ec6" as a SHA-1 value" + */ + @Column(nullable = false) + private String checksumValue; @Column(nullable=true) private Long filesize; // Number of bytes in file. Allows 0 and null, negative numbers not permitted @@ -364,15 +415,26 @@ public void setRestricted(boolean restricted) { this.restricted = restricted; } + public ChecksumType getChecksumType() { + return checksumType; + } - public String getmd5() { - return this.md5; + public void setChecksumType(ChecksumType checksumType) { + this.checksumType = checksumType; } - - public void setmd5(String md5) { - this.md5 = md5; + + public String getChecksumValue() { + return this.checksumValue; } - + + public void setChecksumValue(String checksumValue) { + this.checksumValue = checksumValue; + } + + public String getOriginalChecksumType() { + return BundleUtil.getStringFromBundle("file.originalChecksumType", Arrays.asList(this.checksumType.toString()) ); + } + public DataFileIO getAccessObject() throws IOException { DataFileIO dataAccess = DataAccess.createDataAccessObject(this); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index c2ecce1543e..c41b2aa1caf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -255,7 +255,7 @@ public DataFile findCheapAndEasy(Long id) { Object[] result = null; try { - result = (Object[]) em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t0.PREVIEWIMAGEAVAILABLE, t1.CONTENTTYPE, t1.FILESYSTEMNAME, t1.FILESIZE, t1.INGESTSTATUS, t1.MD5, t1.RESTRICTED, t3.ID, t3.AUTHORITY, t3.IDENTIFIER FROM DVOBJECT t0, DATAFILE t1, DVOBJECT t2, DATASET t3 WHERE ((t0.ID = " + id + ") AND (t0.OWNER_ID = t2.ID) AND (t2.ID = t3.ID) AND (t1.ID = t0.ID))").getSingleResult(); + result = (Object[]) em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t0.PREVIEWIMAGEAVAILABLE, t1.CONTENTTYPE, t1.FILESYSTEMNAME, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t3.ID, t3.AUTHORITY, t3.IDENTIFIER, t1.CHECKSUMTYPE FROM DVOBJECT t0, DATAFILE t1, DVOBJECT t2, DATASET t3 WHERE ((t0.ID = " + id + ") AND (t0.OWNER_ID = t2.ID) AND (t2.ID = t3.ID) AND (t1.ID = t0.ID))").getSingleResult(); } catch (Exception ex) { return null; } @@ -346,14 +346,14 @@ public DataFile findCheapAndEasy(Long id) { String md5 = (String) result[14]; if (md5 != null) { - dataFile.setmd5(md5); + dataFile.setChecksumValue(md5); } Boolean restricted = (Boolean) result[15]; if (restricted != null) { dataFile.setRestricted(restricted); } - + Dataset owner = new Dataset(); @@ -362,6 +362,17 @@ public DataFile findCheapAndEasy(Long id) { owner.setId((Long)result[16]); owner.setAuthority((String)result[17]); owner.setIdentifier((String)result[18]); + + String checksumType = (String) result[19]; + if (checksumType != null) { + try { + // In the database we store "SHA1" rather than "SHA-1". + DataFile.ChecksumType typeFromStringInDatabase = DataFile.ChecksumType.valueOf(checksumType); + dataFile.setChecksumType(typeFromStringInDatabase); + } catch (IllegalArgumentException ex) { + logger.info("Exception trying to convert " + checksumType + " to enum: " + ex); + } + } dataFile.setOwner(owner); @@ -465,7 +476,7 @@ public void findFileMetadataOptimizedExperimental(Dataset owner, DatasetVersion i = 0; - List fileResults = em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t1.CONTENTTYPE, t1.FILESYSTEMNAME, t1.FILESIZE, t1.INGESTSTATUS, t1.MD5, t1.RESTRICTED FROM DVOBJECT t0, DATAFILE t1 WHERE ((t0.OWNER_ID = " + owner.getId() + ") AND ((t1.ID = t0.ID) AND (t0.DTYPE = 'DataFile')))").getResultList(); + List fileResults = em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t1.CONTENTTYPE, t1.FILESYSTEMNAME, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t1.CHECKSUMTYPE FROM DVOBJECT t0, DATAFILE t1 WHERE ((t0.OWNER_ID = " + owner.getId() + ") AND ((t1.ID = t0.ID) AND (t0.DTYPE = 'DataFile')))").getResultList(); for (Object[] result : fileResults) { Integer file_id = (Integer) result[0]; @@ -544,13 +555,24 @@ public void findFileMetadataOptimizedExperimental(Dataset owner, DatasetVersion String md5 = (String) result[13]; if (md5 != null) { - dataFile.setmd5(md5); + dataFile.setChecksumValue(md5); } Boolean restricted = (Boolean) result[14]; if (restricted != null) { dataFile.setRestricted(restricted); } + + String checksumType = (String) result[15]; + if (checksumType != null) { + try { + // In the database we store "SHA1" rather than "SHA-1". + DataFile.ChecksumType typeFromStringInDatabase = DataFile.ChecksumType.valueOf(checksumType); + dataFile.setChecksumType(typeFromStringInDatabase); + } catch (IllegalArgumentException ex) { + logger.info("Exception trying to convert " + checksumType + " to enum: " + ex); + } + } // TODO: // - if ingest status is "bad", look up the ingest report; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 882608dff45..e744263f0f6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2837,7 +2837,7 @@ public String cancel() { } public boolean isDuplicate(FileMetadata fileMetadata) { - String thisMd5 = fileMetadata.getDataFile().getmd5(); + String thisMd5 = fileMetadata.getDataFile().getChecksumValue(); if (thisMd5 == null) { return false; } @@ -2854,7 +2854,7 @@ public boolean isDuplicate(FileMetadata fileMetadata) { Iterator fmIt = workingVersion.getFileMetadatas().iterator(); while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); - String md5 = fm.getDataFile().getmd5(); + String md5 = fm.getDataFile().getChecksumValue(); if (md5 != null) { if (MD5Map.get(md5) != null) { MD5Map.put(md5, MD5Map.get(md5).intValue() + 1); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java index 5fe3c9cd61e..09cc4ad50b2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java @@ -711,7 +711,8 @@ public int compare(FileMetadata l1, FileMetadata l2) { if (fileMetadataIsDifferent(fm1, fm2)) { datasetFileDifferenceItem fdi = selectFileMetadataDiffs(fm1, fm2); fdi.setFileId(fm1.getDataFile().getId().toString()); - fdi.setFileMD5(fm1.getDataFile().getmd5()); + fdi.setFileChecksumType(fm1.getDataFile().getChecksumType()); + fdi.setFileChecksumValue(fm1.getDataFile().getChecksumValue()); datasetFilesDiffList.add(fdi); } i++; @@ -719,14 +720,16 @@ public int compare(FileMetadata l1, FileMetadata l2) { } else if (fm2.getDataFile().getId() != null && fm1.getDataFile().getId().compareTo(fm2.getDataFile().getId()) > 0) { datasetFileDifferenceItem fdi = selectFileMetadataDiffs(null, fm2); fdi.setFileId(fm2.getDataFile().getId().toString()); - fdi.setFileMD5(fm2.getDataFile().getmd5()); + fdi.setFileChecksumType(fm2.getDataFile().getChecksumType()); + fdi.setFileChecksumValue(fm2.getDataFile().getChecksumValue()); datasetFilesDiffList.add(fdi); j++; } else if (fm2.getDataFile().getId() == null || fm1.getDataFile().getId().compareTo(fm2.getDataFile().getId()) < 0) { datasetFileDifferenceItem fdi = selectFileMetadataDiffs(fm1, null); fdi.setFileId(fm1.getDataFile().getId().toString()); - fdi.setFileMD5(fm1.getDataFile().getmd5()); + fdi.setFileChecksumType(fm1.getDataFile().getChecksumType()); + fdi.setFileChecksumValue(fm1.getDataFile().getChecksumValue()); datasetFilesDiffList.add(fdi); i++; @@ -740,7 +743,8 @@ public int compare(FileMetadata l1, FileMetadata l2) { fm1 = originalVersion.getFileMetadatas().get(i); datasetFileDifferenceItem fdi = selectFileMetadataDiffs(fm1, null); fdi.setFileId(fm1.getDataFile().getId().toString()); - fdi.setFileMD5(fm1.getDataFile().getmd5()); + fdi.setFileChecksumType(fm1.getDataFile().getChecksumType()); + fdi.setFileChecksumValue(fm1.getDataFile().getChecksumValue()); datasetFilesDiffList.add(fdi); i++; @@ -754,10 +758,18 @@ public int compare(FileMetadata l1, FileMetadata l2) { } else { fdi.setFileId("[UNASSIGNED]"); } - if (fm2.getDataFile().getmd5() != null) { - fdi.setFileMD5(fm2.getDataFile().getmd5()); + if (fm2.getDataFile().getChecksumValue() != null) { + fdi.setFileChecksumType(fm2.getDataFile().getChecksumType()); + fdi.setFileChecksumValue(fm2.getDataFile().getChecksumValue()); } else { - fdi.setFileMD5("[UNASSIGNED]"); + /** + * @todo What should we do here? checksumValue is set to + * "nullable = false" so it should never be non-null. Let's set + * it to "null" and see if this code path is ever reached. If + * not, the null check above can probably be safely removed. + */ + fdi.setFileChecksumType(null); + fdi.setFileChecksumValue("[UNASSIGNED]"); } datasetFilesDiffList.add(fdi); @@ -993,7 +1005,8 @@ public datasetFileDifferenceItem() { } private String fileId; - private String fileMD5; + private DataFile.ChecksumType fileChecksumType; + private String fileChecksumValue; private String fileName1; private String fileType1; @@ -1131,13 +1144,21 @@ public void setFile1Empty(boolean state) { public void setFile2Empty(boolean state) { file2Empty = state; } - - public String getFileMD5() { - return fileMD5; + + public DataFile.ChecksumType getFileChecksumType() { + return fileChecksumType; + } + + public void setFileChecksumType(DataFile.ChecksumType fileChecksumType) { + this.fileChecksumType = fileChecksumType; + } + + public String getFileChecksumValue() { + return fileChecksumValue; } - public void setFileMD5(String fileMD5) { - this.fileMD5 = fileMD5; + public void setFileChecksumValue(String fileChecksumValue) { + this.fileChecksumValue = fileChecksumValue; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index a86374185e8..edbaa9516b5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -1030,7 +1030,7 @@ public String cancel() { } public boolean isDuplicate(FileMetadata fileMetadata) { - String thisMd5 = fileMetadata.getDataFile().getmd5(); + String thisMd5 = fileMetadata.getDataFile().getChecksumValue(); if (thisMd5 == null) { return false; } @@ -1052,7 +1052,7 @@ public boolean isDuplicate(FileMetadata fileMetadata) { while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); - String md5 = fm.getDataFile().getmd5(); + String md5 = fm.getDataFile().getChecksumValue(); if (md5 != null) { if (MD5Map.get(md5) != null) { MD5Map.put(md5, MD5Map.get(md5).intValue() + 1); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index 2c0322e719c..e73a3a53e13 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -180,7 +180,10 @@ public Response createDataset( String jsonBody, @PathParam("identifier") String } try { try { - DatasetVersion version = jsonParser().parseDatasetVersion(jsonVersion); + DatasetVersion version = new DatasetVersion(); + version.setDataset(ds); + // Use the two argument version so that the version knows which dataset it's associated with. + version = jsonParser().parseDatasetVersion(jsonVersion, version); // force "initial version" properties version.setMinorVersionNumber(null); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/WorldMapRelatedData.java b/src/main/java/edu/harvard/iq/dataverse/api/WorldMapRelatedData.java index 616d5c6ccce..58787fa5423 100755 --- a/src/main/java/edu/harvard/iq/dataverse/api/WorldMapRelatedData.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/WorldMapRelatedData.java @@ -494,7 +494,7 @@ public Response getWorldMapDatafileInfo(String jsonTokenData, @Context HttpServl jsonData.add("datafile_id", dfile.getId()); jsonData.add("datafile_label", dfile_meta.getLabel()); //jsonData.add("filename", dfile_meta.getLabel()); - jsonData.add("datafile_expected_md5_checksum", dfile.getmd5()); + jsonData.add("datafile_expected_md5_checksum", dfile.getChecksumValue()); Long fsize = dfile.getFilesize(); if (fsize == null){ fsize= new Long(-1); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 77dbeebdf22..8f5c0fc99c8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -62,7 +62,6 @@ import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReader; import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReaderSpi; import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.MD5Checksum; import edu.harvard.iq.dataverse.util.ShapefileHandler; import edu.harvard.iq.dataverse.util.SumStatCalculator; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -600,11 +599,12 @@ public List createDataFiles(DatasetVersion version, InputStream inputS if (!tempFile.toFile().renameTo(new File(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier()))) { return null; } - - // MD5: - MD5Checksum md5Checksum = new MD5Checksum(); + try { - datafile.setmd5(md5Checksum.CalculateMD5(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier())); + // We persist "SHA1" rather than "SHA-1". + datafile.setChecksumType(systemConfig.getFileFixityChecksumAlgorithm()); + FileUtil fileUtil = new FileUtil(); + datafile.setChecksumValue(fileUtil.CalculateCheckSum(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(), datafile.getChecksumType())); } catch (Exception md5ex) { logger.warning("Could not calculate MD5 signature for new file " + fileName); } @@ -853,12 +853,17 @@ private DataFile createSingleDataFile(DatasetVersion version, InputStream inputS outputStream.close(); } catch (IOException ioex) {} } - - // MD5: + + /** + * @todo Can this block and the similar block above be refactored + * into a common code path? + */ if (datafile != null) { - MD5Checksum md5Checksum = new MD5Checksum(); + FileUtil fileUtil = new FileUtil(); try { - datafile.setmd5(md5Checksum.CalculateMD5(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier())); + // We persist "SHA1" rather than "SHA-1". + datafile.setChecksumType(systemConfig.getFileFixityChecksumAlgorithm()); + datafile.setChecksumValue(fileUtil.CalculateCheckSum(getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(), datafile.getChecksumType())); } catch (Exception md5ex) { logger.warning("Could not calculate MD5 signature for new file " + fileName); } diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java index 3744a90241a..e5ffce5ea88 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java @@ -1,7 +1,5 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. +/** + * @todo Shouldn't this be in the "edu.harvard.iq.dataverse.api" package? Is the only one that isn't. */ package edu.harvard.iq.dataverse.mydata; @@ -267,7 +265,11 @@ private String getJSONErrorString(String jsonMsg, String optionalLoggerMsg){ } - + + /** + * @todo This should support the "X-Dataverse-key" header like the other + * APIs. + */ @Path(retrieveDataPartialAPIPath) @GET @Produces({"application/json"}) diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 5da94dc34df..648bebaa4c7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -950,7 +950,16 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset) { datafileSolrInputDocument.addField(SearchFields.FILE_TYPE, FileUtil.getFacetFileType(fileMetadata.getDataFile())); datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, FileUtil.getFacetFileType(fileMetadata.getDataFile())); datafileSolrInputDocument.addField(SearchFields.FILE_SIZE_IN_BYTES, fileMetadata.getDataFile().getFilesize()); - datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getmd5()); + if (DataFile.ChecksumType.MD5.equals(fileMetadata.getDataFile().getChecksumType())) { + /** + * @todo Someday we should probably deprecate this + * FILE_MD5 in favor of a combination of + * FILE_CHECKSUM_TYPE and FILE_CHECKSUM_VALUE. + */ + datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getChecksumValue()); + } + datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_TYPE, fileMetadata.getDataFile().getChecksumType().toString()); + datafileSolrInputDocument.addField(SearchFields.FILE_CHECKSUM_VALUE, fileMetadata.getDataFile().getChecksumValue()); datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription()); datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription()); datafileSolrInputDocument.addField(SearchFields.UNF, fileMetadata.getDataFile().getUnf()); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java index 871b28e7858..0dddfd3b22e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java @@ -147,6 +147,8 @@ public class SearchFields { public static final String FILE_TYPE = "fileTypeGroupFacet"; public static final String FILE_SIZE_IN_BYTES = "fileSizeInBytes"; public static final String FILE_MD5 = "fileMd5"; + public static final String FILE_CHECKSUM_TYPE = "fileChecksumType"; + public static final String FILE_CHECKSUM_VALUE = "fileChecksumValue"; public static final String FILENAME_WITHOUT_EXTENSION = "fileNameWithoutExtension"; /** * Indexed as a string so we can facet on it. diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 8eab46a3556..28803e8d1bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -1043,13 +1043,15 @@ public String dataFileSizeDisplay(DataFile datafile) { } - public String dataFileMD5Display(DataFile datafile) { + public String dataFileChecksumDisplay(DataFile datafile) { if (datafile == null) { return ""; } - if (datafile.getmd5() != null && datafile.getmd5() != "") { - return " MD5: " + datafile.getmd5() + " "; + if (datafile.getChecksumValue() != null && datafile.getChecksumValue() != "") { + if (datafile.getChecksumType() != null) { + return " " + datafile.getChecksumType() + ": " + datafile.getChecksumValue() + " "; + } } return ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 54c06214383..bd2fbcc75a2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.search; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; @@ -527,6 +528,12 @@ public SolrQueryResponse search(User user, Dataverse dataverse, String query, Li } } solrSearchResult.setFileMd5((String) solrDocument.getFieldValue(SearchFields.FILE_MD5)); + try { + solrSearchResult.setFileChecksumType((DataFile.ChecksumType) DataFile.ChecksumType.fromString((String) solrDocument.getFieldValue(SearchFields.FILE_CHECKSUM_TYPE))); + } catch (IllegalArgumentException ex) { + logger.info("Exception setting setFileChecksumType: " + ex); + } + solrSearchResult.setFileChecksumValue((String) solrDocument.getFieldValue(SearchFields.FILE_CHECKSUM_VALUE)); solrSearchResult.setUnf((String) solrDocument.getFieldValue(SearchFields.UNF)); solrSearchResult.setDatasetVersionId(datasetVersionId); List fileCategories = (ArrayList) solrDocument.getFieldValues(SearchFields.FILE_TAG); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java index fa738f0ea6d..bb084c3c69f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java @@ -4,6 +4,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.api.Util; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import java.util.ArrayList; import java.util.Date; @@ -15,6 +16,8 @@ import javax.json.JsonObject; import javax.json.JsonObjectBuilder; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; +import java.math.BigDecimal; +import javax.json.JsonValue; public class SolrSearchResult { @@ -67,7 +70,12 @@ public class SolrSearchResult { private String filetype; private String fileContentType; private Long fileSizeInBytes; + /** + * fileMD5 is here for legacy and backward-compatibility reasons. It might be deprecated some day in favor of "fileChecksumType" and "fileChecksumValue" + */ private String fileMd5; + private DataFile.ChecksumType fileChecksumType; + private String fileChecksumValue; private String dataverseAlias; private String dataverseParentAlias; // private boolean statePublished; @@ -494,7 +502,13 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool .add("file_type", this.filetype) .add("file_content_type", this.fileContentType) .add("size_in_bytes", getFileSizeInBytes()) + /** + * "md5" was the only possible value so it's hard-coded here but + * we might want to deprecate it someday since we now put the + * MD5 or SHA-1 in "checksum". + */ .add("md5", getFileMd5()) + .add("checksum", JsonPrinter.getChecksumTypeAndValue(getFileChecksumType(), getFileChecksumValue())) .add("unf", getUnf()) .add("dataset_citation", datasetCitation) .add("deaccession_reason", this.deaccessionReason) @@ -807,13 +821,33 @@ public void setFileSizeInBytes(Long fileSizeInBytes) { } public String getFileMd5() { - return fileMd5; + if (DataFile.ChecksumType.MD5.equals(getFileChecksumType())) { + return fileMd5; + } else { + return null; + } } public void setFileMd5(String fileMd5) { this.fileMd5 = fileMd5; } + public DataFile.ChecksumType getFileChecksumType() { + return fileChecksumType; + } + + public void setFileChecksumType(DataFile.ChecksumType fileChecksumType) { + this.fileChecksumType = fileChecksumType; + } + + public String getFileChecksumValue() { + return fileChecksumValue; + } + + public void setFileChecksumValue(String fileChecksumValue) { + this.fileChecksumValue = fileChecksumValue; + } + public String getNameSort() { return nameSort; } @@ -1020,4 +1054,5 @@ public void setUserRole(List userRole) { this.userRole = userRole; } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 65c3278eb8b..e6b6c827760 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -33,6 +33,7 @@ public class SettingsServiceBean { * So there. */ public enum Key { + FileFixityChecksumAlgorithm, /** * Override Solr highlighting "fragsize" * https://wiki.apache.org/solr/HighlightingParameters#hl.fragsize diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 3addc9513fa..5e6766a3dcf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -21,10 +21,12 @@ package edu.harvard.iq.dataverse.util; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.ingest.IngestableDataChecker; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; @@ -33,6 +35,8 @@ import java.util.MissingResourceException; import java.nio.channels.FileChannel; import java.nio.channels.WritableByteChannel; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.HashMap; import java.util.Map; import java.util.logging.Logger; @@ -403,4 +407,52 @@ public static String getFriendlySize(Long filesize) { return displaySize; } + + // from MD5Checksum.java + public synchronized String CalculateCheckSum(String datafile, ChecksumType checksumType) { + + FileInputStream fis = null; + try { + fis = new FileInputStream(datafile); + } catch (FileNotFoundException ex) { + throw new RuntimeException(ex); + } + + return CalculateChecksum(fis, checksumType); + } + + // from MD5Checksum.java + public synchronized String CalculateChecksum(InputStream in, ChecksumType checksumType) { + MessageDigest md = null; + try { + // Use "SHA-1" (toString) rather than "SHA1", for example. + md = MessageDigest.getInstance(checksumType.toString()); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + + byte[] dataBytes = new byte[1024]; + + int nread; + try { + while ((nread = in.read(dataBytes)) != -1) { + md.update(dataBytes, 0, nread); + } + } catch (IOException ex) { + throw new RuntimeException(ex); + } finally { + try { + in.close(); + } catch (Exception e) { + } + } + + byte[] mdbytes = md.digest(); + StringBuilder sb = new StringBuilder(""); + for (int i = 0; i < mdbytes.length; i++) { + sb.append(Integer.toString((mdbytes[i] & 0xff) + 0x100, 16).substring(1)); + } + return sb.toString(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MD5Checksum.java b/src/main/java/edu/harvard/iq/dataverse/util/MD5Checksum.java deleted file mode 100644 index 2b4d8755743..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/util/MD5Checksum.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ - -package edu.harvard.iq.dataverse.util; - -import java.io.InputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; - -/** - * (from v.3.6) - * @author xyang - */ -public class MD5Checksum implements java.io.Serializable { - - public MD5Checksum() { - } - - public synchronized String CalculateMD5 (String datafile) { - - FileInputStream fis = null; - try { - fis = new FileInputStream(datafile); - } catch (FileNotFoundException ex) { - throw new RuntimeException(ex); - } - - return CalculateMD5(fis); - /* - byte[] dataBytes = new byte[1024]; - - int nread; - try { - while ((nread = fis.read(dataBytes)) != -1) { - md.update(dataBytes, 0, nread); - } - } catch (IOException ex) { - throw new RuntimeException(ex); - } - - byte[] mdbytes = md.digest(); - StringBuilder sb = new StringBuilder(""); - for (int i = 0; i < mdbytes.length; i++) { - sb.append(Integer.toString((mdbytes[i] & 0xff) + 0x100, 16).substring(1)); - } - return sb.toString(); - */ - } - - public synchronized String CalculateMD5 (InputStream in) { - MessageDigest md = null; - try { - md = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e); - } - - byte[] dataBytes = new byte[1024]; - - int nread; - try { - while ((nread = in.read(dataBytes)) != -1) { - md.update(dataBytes, 0, nread); - } - } catch (IOException ex) { - throw new RuntimeException(ex); - } finally { - try {in.close();} catch (Exception e) {} - } - - byte[] mdbytes = md.digest(); - StringBuilder sb = new StringBuilder(""); - for (int i = 0; i < mdbytes.length; i++) { - sb.append(Integer.toString((mdbytes[i] & 0xff) + 0x100, 16).substring(1)); - } - return sb.toString(); - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index f03ae885e61..30bd8c1250a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.util; import com.ocpsoft.pretty.PrettyContext; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import java.io.FileInputStream; import java.io.IOException; @@ -509,4 +510,16 @@ public boolean isTimerServer() { } return false; } + + public DataFile.ChecksumType getFileFixityChecksumAlgorithm() { + DataFile.ChecksumType saneDefault = DataFile.ChecksumType.MD5; + String checksumStringFromDatabase = settingsService.getValueForKey(SettingsServiceBean.Key.FileFixityChecksumAlgorithm, saneDefault.toString()); + try { + DataFile.ChecksumType checksumTypeFromDatabase = DataFile.ChecksumType.fromString(checksumStringFromDatabase); + return checksumTypeFromDatabase; + } catch (IllegalArgumentException ex) { + logger.info("The setting " + SettingsServiceBean.Key.FileFixityChecksumAlgorithm + " is misconfigured. " + ex.getMessage() + " Returning sane default: " + saneDefault + "."); + return saneDefault; + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 3e8a6152558..2fe6b7f36ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -35,6 +35,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Set; +import java.util.logging.Logger; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; @@ -49,6 +50,8 @@ */ public class JsonParser { + private static final Logger logger = Logger.getLogger(JsonParser.class.getCanonicalName()); + DatasetFieldServiceBean datasetFieldSvc; MetadataBlockServiceBean blockService; SettingsServiceBean settingsService; @@ -373,18 +376,44 @@ public DataFile parseDataFile(JsonObject datafileJson) { contentType = "application/octet-stream"; } String storageIdentifier = datafileJson.getString("storageIdentifier"); - String md5 = datafileJson.getString("md5", null); - - if (md5 == null) { - md5 = "unknown"; + JsonObject checksum = datafileJson.getJsonObject("checksum"); + if (checksum != null) { + // newer style that allows for SHA-1 rather than MD5 + /** + * @todo Add more error checking. Do we really expect people to set + * file metadata without uploading files? Some day we'd like to work + * on a "native" API that allows for multipart upload of the JSON + * describing the files (this "parseDataFile" method) and the bits + * of the files themselves. See + * https://github.com/IQSS/dataverse/issues/1612 + */ + String type = checksum.getString("type"); + if (type != null) { + String value = checksum.getString("value"); + if (value != null) { + try { + dataFile.setChecksumType(DataFile.ChecksumType.fromString(type)); + dataFile.setChecksumValue(value); + } catch (IllegalArgumentException ex) { + logger.info("Invalid"); + } + } + } + } else { + // older, MD5 logic, still her for backward compatibility + String md5 = datafileJson.getString("md5", null); + if (md5 == null) { + md5 = "unknown"; + } + dataFile.setChecksumType(DataFile.ChecksumType.MD5); + dataFile.setChecksumValue(md5); } - + // TODO: // unf (if available)... etc.? dataFile.setContentType(contentType); dataFile.setStorageIdentifier(storageIdentifier); - dataFile.setmd5(md5); return dataFile; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index f00bee0b140..cf6e04283d3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -29,6 +29,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; import edu.harvard.iq.dataverse.util.StringUtil; import java.util.Set; @@ -468,7 +469,12 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { .add("originalFileFormat", df.getOriginalFileFormat()) .add("originalFormatLabel", df.getOriginalFormatLabel()) .add("UNF", df.getUnf()) - .add("md5", df.getmd5()) + /** + * @todo Should we deprecate "md5" now that it's under + * "checksum" (which may also be a SHA-1 rather than an MD5)? + */ + .add("md5", getMd5IfItExists(df.getChecksumType(), df.getChecksumValue())) + .add("checksum", getChecksumTypeAndValue(df.getChecksumType(), df.getChecksumValue())) .add("description", df.getDescription()); } @@ -589,4 +595,23 @@ public static JsonArrayBuilder json(Collection jc) { } return bld; } + + public static String getMd5IfItExists(DataFile.ChecksumType checksumType, String checksumValue) { + if (DataFile.ChecksumType.MD5.equals(checksumType)) { + return checksumValue; + } else { + return null; + } + } + + public static JsonObjectBuilder getChecksumTypeAndValue(DataFile.ChecksumType checksumType, String checksumValue) { + if (checksumType != null) { + return Json.createObjectBuilder() + .add("type", checksumType.toString()) + .add("value", checksumValue); + } else { + return null; + } + } + } diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 22079d4576c..4abaeedb8a1 100755 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -825,7 +825,7 @@

- +
diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index 8650b50c5e4..872900a7cd6 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -149,16 +149,16 @@ - +
-
- - +
+ + -
#{bundle['file.MD5.exists.tip']}
+
#{bundle['file.checksum.exists.tip']}
diff --git a/src/main/webapp/file.xhtml b/src/main/webapp/file.xhtml index ee3d4f71af4..da8fbc3f48b 100644 --- a/src/main/webapp/file.xhtml +++ b/src/main/webapp/file.xhtml @@ -194,8 +194,9 @@
- + +
@@ -236,12 +237,13 @@
-
+
- +
@@ -304,8 +306,9 @@ FITS stuff....
  • + #{bundle['file.metadataTab.fileMetadata.md5.label']} - +
  • diff --git a/src/main/webapp/filesFragment.xhtml b/src/main/webapp/filesFragment.xhtml index 0a87c89198f..1af77330d03 100644 --- a/src/main/webapp/filesFragment.xhtml +++ b/src/main/webapp/filesFragment.xhtml @@ -264,7 +264,7 @@
    - +
    diff --git a/src/main/webapp/mydata_templates/cards.html b/src/main/webapp/mydata_templates/cards.html index 1d12409e7cd..39e9cba22a3 100644 --- a/src/main/webapp/mydata_templates/cards.html +++ b/src/main/webapp/mydata_templates/cards.html @@ -75,6 +75,7 @@ {{ sdoc.date_to_display_on_card }} - {{ sdoc.parentName }} {#
    {{ sdoc.dataset_citation|safe }}#} +
    {{ sdoc.file_type }} - {{ sdoc.size_in_bytes }} KB - MD5: {{ sdoc.md5 }} {% endif %} diff --git a/src/main/webapp/mydata_templates/cards_minimum.html b/src/main/webapp/mydata_templates/cards_minimum.html index 6fa4066d48b..c0dd50db7f1 100644 --- a/src/main/webapp/mydata_templates/cards_minimum.html +++ b/src/main/webapp/mydata_templates/cards_minimum.html @@ -83,7 +83,7 @@
    {{ card_info.file_type }} - {{ card_info.size_in_bytes }} KB - - MD5: {{ card_info.md5 }} + {{ card_info.checksum.type }}: {{ card_info.checksum.value }} {% endif %}
    diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index 22336165006..40e689c9869 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -608,7 +608,7 @@ - +